From f00790e5391ee1546d9db43a92c8f09e6c361384 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 2 May 2023 01:47:33 -0400 Subject: [PATCH 001/604] informational thermo column titles --- src/REACTION/fix_bond_react.cpp | 8 ++++++++ src/REACTION/fix_bond_react.h | 1 + src/fix.cpp | 1 + src/fix.h | 2 ++ src/thermo.cpp | 3 +++ 5 files changed, 15 insertions(+) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 784c163f70d..04272e7a9db 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -123,6 +123,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : newton_bond = force->newton_bond; + thermo_modify_colname = 1; restart_global = 1; attempted_rxn = 0; force_reneighbor = 1; @@ -4310,6 +4311,13 @@ double FixBondReact::compute_vector(int n) /* ---------------------------------------------------------------------- */ +std::string FixBondReact::get_thermo_colname(int n) +{ + return rxn_name[n-1]; +} + +/* ---------------------------------------------------------------------- */ + void FixBondReact::post_integrate_respa(int ilevel, int /*iloop*/) { if (ilevel == nlevels_respa-1) post_integrate(); diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 66a5e4d6a0f..cef7401df0f 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -46,6 +46,7 @@ class FixBondReact : public Fix { void init_list(int, class NeighList *) override; void post_integrate() override; void post_integrate_respa(int, int) override; + std::string get_thermo_colname(int) override; int pack_forward_comm(int, int *, double *, int, int *) override; void unpack_forward_comm(int, int, double *) override; diff --git a/src/fix.cpp b/src/fix.cpp index f0cc8a20ea9..d661aaf274b 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -58,6 +58,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : box_change = NO_BOX_CHANGE; thermo_energy = 0; thermo_virial = 0; + thermo_modify_colname = 0; energy_global_flag = energy_peratom_flag = 0; virial_global_flag = virial_peratom_flag = 0; ecouple_flag = 0; diff --git a/src/fix.h b/src/fix.h index 334f61ff2b2..a5b5e982f1e 100644 --- a/src/fix.h +++ b/src/fix.h @@ -53,6 +53,7 @@ class Fix : protected Pointers { int nevery; // how often to call an end_of_step fix int thermo_energy; // 1 if fix_modify energy enabled, 0 if not int thermo_virial; // 1 if fix_modify virial enabled, 0 if not + int thermo_modify_colname; // 1 if fix has custom column names for output int energy_global_flag; // 1 if contributes to global eng int energy_peratom_flag; // 1 if contributes to peratom eng int virial_global_flag; // 1 if contributes to global virial @@ -233,6 +234,7 @@ class Fix : protected Pointers { virtual double compute_scalar() { return 0.0; } virtual double compute_vector(int) { return 0.0; } virtual double compute_array(int, int) { return 0.0; } + virtual std::string get_thermo_colname(int) { return {}; } virtual int dof(int) { return 0; } virtual void deform(int) {} diff --git a/src/thermo.cpp b/src/thermo.cpp index 302bf566d26..05131e244ed 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1044,6 +1044,9 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, "Thermo fix array is accessed out-of-range"); } + if (ifix->thermo_modify_colname) + keyword_user[nfield] = ifix->get_thermo_colname(argindex1[nfield]); + field2index[nfield] = add_fix(argi.get_name()); addfield(word.c_str(), &Thermo::compute_fix, FLOAT); From f5b8a8746331266915f5964ce7368e4f120c1f3b Mon Sep 17 00:00:00 2001 From: jrgissing Date: Fri, 5 May 2023 01:04:49 -0400 Subject: [PATCH 002/604] example for NH fixes --- src/REACTION/fix_bond_react.cpp | 2 +- src/REACTION/fix_bond_react.h | 2 +- src/fix_nh.cpp | 158 ++++++++++++++++++++++++++++++++ src/fix_nh.h | 1 + src/thermo.cpp | 2 +- 5 files changed, 162 insertions(+), 3 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 04272e7a9db..b5a743a78f7 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -4313,7 +4313,7 @@ double FixBondReact::compute_vector(int n) std::string FixBondReact::get_thermo_colname(int n) { - return rxn_name[n-1]; + return rxn_name[n]; } /* ---------------------------------------------------------------------- */ diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index cef7401df0f..c770cae0a01 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -46,13 +46,13 @@ class FixBondReact : public Fix { void init_list(int, class NeighList *) override; void post_integrate() override; void post_integrate_respa(int, int) override; - std::string get_thermo_colname(int) override; int pack_forward_comm(int, int *, double *, int, int *) override; void unpack_forward_comm(int, int, double *) override; int pack_reverse_comm(int, int, double *) override; void unpack_reverse_comm(int, int *, double *) override; double compute_vector(int) override; + std::string get_thermo_colname(int) override; double memory_usage() override; private: diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index 12272c31235..15b05e121a0 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -64,6 +64,7 @@ FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) : restart_global = 1; dynamic_group_allow = 1; + thermo_modify_colname = 1; time_integrate = 1; scalar_flag = 1; vector_flag = 1; @@ -1688,6 +1689,163 @@ double FixNH::compute_vector(int n) /* ---------------------------------------------------------------------- */ +std::string FixNH::get_thermo_colname(int n) +{ + int ilen; + + if (tstat_flag) { + ilen = mtchain; + if (n < ilen) return fmt::format("f_{}:eta[{}]",id,n+1); + n -= ilen; + ilen = mtchain; + if (n < ilen) return fmt::format("f_{}:eta_dot[{}]",id,n+1); + n -= ilen; + } + + if (pstat_flag) { + if (pstyle == ISO) { + ilen = 1; + if (n < ilen) return fmt::format("f_{}:omega[{}]",id,n+1); + n -= ilen; + } else if (pstyle == ANISO) { + ilen = 3; + if (n < ilen) return fmt::format("f_{}:omega[{}]",id,n+1); + n -= ilen; + } else { + ilen = 6; + if (n < ilen) return fmt::format("f_{}:omega[{}]",id,n+1); + n -= ilen; + } + + if (pstyle == ISO) { + ilen = 1; + if (n < ilen) return fmt::format("f_{}:omega_dot[{}]",id,n+1); + n -= ilen; + } else if (pstyle == ANISO) { + ilen = 3; + if (n < ilen) return fmt::format("f_{}:omega_dot[{}]",id,n+1); + n -= ilen; + } else { + ilen = 6; + if (n < ilen) return fmt::format("f_{}:omega_dot[{}]",id,n+1); + n -= ilen; + } + + if (mpchain) { + ilen = mpchain; + if (n < ilen) return fmt::format("f_{}:etap[{}]",id,n+1); + n -= ilen; + ilen = mpchain; + if (n < ilen) return fmt::format("f_{}:etap_dot[{}]",id,n+1); + n -= ilen; + } + } + + int ich; + + if (tstat_flag) { + ilen = mtchain; + if (n < ilen) { + ich = n; + if (ich == 0) + return fmt::format("f_{}:PE_eta[{}]",id,n+1); + else + return fmt::format("f_{}:PE_eta[{}]",id,n+1); + } + n -= ilen; + ilen = mtchain; + if (n < ilen) { + ich = n; + if (ich == 0) + return fmt::format("f_{}:KE_eta_dot[{}]",id,n+1); + else + return fmt::format("f_{}:KE_eta_dot[{}]",id,n+1); + } + n -= ilen; + } + + if (pstat_flag) { + if (pstyle == ISO) { + ilen = 1; + if (n < ilen) + return fmt::format("f_{}:PE_omega[{}]",id,n+1); + n -= ilen; + } else if (pstyle == ANISO) { + ilen = 3; + if (n < ilen) { + if (p_flag[n]) + return fmt::format("f_{}:PE_omega[{}]",id,n+1); + else + return fmt::format("f_{}:PE_omega[none]",id); + } + n -= ilen; + } else { + ilen = 6; + if (n < ilen) { + if (n > 2) return fmt::format("f_{}:PE_omega[none]",id); + else if (p_flag[n]) + return fmt::format("f_{}:PE_omega[{}]",id,n+1); + else + return fmt::format("f_{}:PE_omega[none]",id); + } + n -= ilen; + } + + if (pstyle == ISO) { + ilen = 1; + if (n < ilen) + return fmt::format("f_{}:KE_omega_dot[{}]",id,n+1); + n -= ilen; + } else if (pstyle == ANISO) { + ilen = 3; + if (n < ilen) { + if (p_flag[n]) + return fmt::format("f_{}:KE_omega_dot[{}]",id,n+1); + else return fmt::format("f_{}:KE_omega_dot[none]",id); + } + n -= ilen; + } else { + ilen = 6; + if (n < ilen) { + if (p_flag[n]) + return fmt::format("f_{}:KE_omega_dot[{}]",id,n+1); + else return fmt::format("f_{}:KE_omega_dot[none]",id); + } + n -= ilen; + } + + if (mpchain) { + ilen = mpchain; + if (n < ilen) { + ich = n; + if (ich == 0) return fmt::format("f_{}:PE_etap[{}]",id,n+1); + else return fmt::format("f_{}:PE_etap[{}]",id,n+1); + } + n -= ilen; + ilen = mpchain; + if (n < ilen) { + ich = n; + if (ich == 0) + return fmt::format("f_{}:KE_etap_dot[{}]",id,n+1); + else + return fmt::format("f_{}:KE_etap_dot[{}]",id,n+1); + } + n -= ilen; + } + + if (deviatoric_flag) { + ilen = 1; + if (n < ilen) + return fmt::format("f_{}:PE_strain[{}]",id,n+1); + n -= ilen; + } + } + + return "none"; +} + +/* ---------------------------------------------------------------------- */ + void FixNH::reset_target(double t_new) { t_target = t_start = t_stop = t_new; diff --git a/src/fix_nh.h b/src/fix_nh.h index 3e406929b21..0bafe6b2f00 100644 --- a/src/fix_nh.h +++ b/src/fix_nh.h @@ -33,6 +33,7 @@ class FixNH : public Fix { void pre_exchange() override; double compute_scalar() override; double compute_vector(int) override; + std::string get_thermo_colname(int) override; void write_restart(FILE *) override; virtual int pack_restart_data(double *); // pack restart data void restart(char *) override; diff --git a/src/thermo.cpp b/src/thermo.cpp index 05131e244ed..33a1ed8990d 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1045,7 +1045,7 @@ void Thermo::parse_fields(const std::string &str) } if (ifix->thermo_modify_colname) - keyword_user[nfield] = ifix->get_thermo_colname(argindex1[nfield]); + keyword_user[nfield] = ifix->get_thermo_colname(argindex1[nfield]-1); field2index[nfield] = add_fix(argi.get_name()); addfield(word.c_str(), &Thermo::compute_fix, FLOAT); From b3022ec7db893f7f8d9a5a6ae4977c10730cf47c Mon Sep 17 00:00:00 2001 From: jrgissing Date: Mon, 8 May 2023 10:17:32 -0400 Subject: [PATCH 003/604] finish fix_nh implementation include description of scalar output --- src/fix_nh.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index 15b05e121a0..39f8b6175de 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -1691,6 +1691,10 @@ double FixNH::compute_vector(int n) std::string FixNH::get_thermo_colname(int n) { + + // scalar value if n == -1 + if (n == -1) return fmt::format("f_{}:ecouple",id); + int ilen; if (tstat_flag) { From ce1bc2dae47869e3af4b80d2bc2963a0e4b5d1a3 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Mon, 8 May 2023 11:43:23 -0400 Subject: [PATCH 004/604] add legacy_colname option to thermo_modify this currently deletes all previous manually defined custom colnames as well --- src/thermo.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/thermo.cpp b/src/thermo.cpp index 33a1ed8990d..4e6f8d8fc5e 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -651,6 +651,14 @@ void Thermo::modify_params(int narg, char **arg) keyword_user[icol] = arg[iarg + 2]; iarg += 3; } + } else if (strcmp(arg[iarg], "legacy_colname") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "thermo_modify legacy_colname", error); + int legacy_colname = utils::logical(FLERR, arg[iarg + 1], false, lmp); + if (legacy_colname) + for (int i = 0; i < nfield; i++) + keyword_user[i] = {}; + iarg += 2; + } else if (strcmp(arg[iarg], "format") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "thermo_modify format", error); From 3880b5fbc8c62c36ca04c2b58c84d75febafc492 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Mon, 8 May 2023 17:17:23 -0400 Subject: [PATCH 005/604] example for compute_reduce indicates the reduce mode and 'replace' keyword --- src/compute.cpp | 1 + src/compute.h | 2 ++ src/compute_reduce.cpp | 37 +++++++++++++++++++++++++++---------- src/compute_reduce.h | 2 ++ src/thermo.cpp | 3 +++ 5 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/compute.cpp b/src/compute.cpp index dcad4954fc8..f989ff1800b 100644 --- a/src/compute.cpp +++ b/src/compute.cpp @@ -60,6 +60,7 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) : // set child class defaults scalar_flag = vector_flag = array_flag = 0; + thermo_modify_colname = 0; peratom_flag = local_flag = pergrid_flag = 0; size_vector_variable = size_array_rows_variable = 0; diff --git a/src/compute.h b/src/compute.h index 72b0075fd6a..18bae8b30e8 100644 --- a/src/compute.h +++ b/src/compute.h @@ -49,6 +49,7 @@ class Compute : protected Pointers { int scalar_flag; // 0/1 if compute_scalar() function exists int vector_flag; // 0/1 if compute_vector() function exists int array_flag; // 0/1 if compute_array() function exists + int thermo_modify_colname; // 1 if fix has custom column names for output int size_vector; // length of global vector int size_array_rows; // rows in global array int size_array_cols; // columns in global array @@ -124,6 +125,7 @@ class Compute : protected Pointers { virtual void compute_local() {} virtual void compute_pergrid() {} virtual void set_arrays(int) {} + virtual std::string get_thermo_colname(int) { return {}; } virtual int pack_forward_comm(int, int *, double *, int, int *) { return 0; } virtual void unpack_forward_comm(int, int, double *) {} diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 6b27498eb70..fda32502a56 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -79,25 +79,27 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : iarg = 4; } - if (strcmp(arg[iarg], "sum") == 0) + modestr = arg[iarg]; + + if (modestr == "sum") mode = SUM; - else if (strcmp(arg[iarg], "sumsq") == 0) + else if (modestr == "sumsq") mode = SUMSQ; - else if (strcmp(arg[iarg], "sumabs") == 0) + else if (modestr == "sumabs") mode = SUMABS; - else if (strcmp(arg[iarg], "min") == 0) + else if (modestr == "min") mode = MINN; - else if (strcmp(arg[iarg], "max") == 0) + else if (modestr == "max") mode = MAXX; - else if (strcmp(arg[iarg], "ave") == 0) + else if (modestr == "ave") mode = AVE; - else if (strcmp(arg[iarg], "avesq") == 0) + else if (modestr == "avesq") mode = AVESQ; - else if (strcmp(arg[iarg], "aveabs") == 0) + else if (modestr == "aveabs") mode = AVEABS; - else if (strcmp(arg[iarg], "maxabs") == 0) + else if (modestr == "maxabs") mode = MAXABS; - else if (strcmp(arg[iarg], "minabs") == 0) + else if (modestr == "minabs") mode = MINABS; else error->all(FLERR, "Unknown compute {} mode: {}", style, arg[iarg]); @@ -318,6 +320,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : } maxatom = 0; + thermo_modify_colname = 1; varatom = nullptr; } @@ -626,6 +629,20 @@ double ComputeReduce::compute_one(int m, int flag) /* ---------------------------------------------------------------------- */ +std::string ComputeReduce::get_thermo_colname(int m) { + if (replace[m] >= 0) { + auto &val1 = values[m]; + auto &val2 = values[replace[m]]; + return fmt::format("c_{}:c_{}[{}]<-{}(c_{})",id,val1.id,val1.argindex,modestr,val2.id); + } else { + auto &val = values[m]; + return fmt::format("c_{}:{}(c_{})",id,modestr,val.id); + } + return "none"; +} + +/* ---------------------------------------------------------------------- */ + bigint ComputeReduce::count(int m) { auto &val = values[m]; diff --git a/src/compute_reduce.h b/src/compute_reduce.h index f8f73cb17a6..df10f548666 100644 --- a/src/compute_reduce.h +++ b/src/compute_reduce.h @@ -34,10 +34,12 @@ class ComputeReduce : public Compute { void init() override; double compute_scalar() override; void compute_vector() override; + std::string get_thermo_colname(int) override; double memory_usage() override; protected: int mode, nvalues; + std::string modestr; struct value_t { int which; int argindex; diff --git a/src/thermo.cpp b/src/thermo.cpp index 4e6f8d8fc5e..61d9f3023f5 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1026,6 +1026,9 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, "Thermo compute array is accessed out-of-range"); } + if (icompute->thermo_modify_colname) + keyword_user[nfield] = icompute->get_thermo_colname(argindex1[nfield]-1); + if (argindex1[nfield] == 0) field2index[nfield] = add_compute(argi.get_name(), SCALAR); else if (argindex2[nfield] == 0) From 0446178176fc409988e760125c9d735d59d54ad3 Mon Sep 17 00:00:00 2001 From: jrgissing Date: Mon, 17 Jul 2023 20:41:47 -0400 Subject: [PATCH 006/604] refactor to use 'auto' keyword no auto column names by default --- src/thermo.cpp | 17 +++++++---------- src/thermo.h | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/thermo.cpp b/src/thermo.cpp index 61d9f3023f5..63ab97e55b4 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -111,6 +111,7 @@ Thermo::Thermo(LAMMPS *_lmp, int narg, char **arg) : lostflag = lostbond = Thermo::ERROR; lostbefore = warnbefore = 0; flushflag = 0; + autocolname = 0; // set style and corresponding lineflag // custom style builds its own line of keywords, including wildcard expansion @@ -632,6 +633,10 @@ void Thermo::modify_params(int narg, char **arg) if (strcmp(arg[iarg + 1], "default") == 0) { for (auto &item : keyword_user) item.clear(); iarg += 2; + } else if (strcmp(arg[iarg + 1], "auto") == 0) { + autocolname = 1; + parse_fields(line); + iarg += 2; } else { if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "thermo_modify colname", error); int icol = -1; @@ -651,14 +656,6 @@ void Thermo::modify_params(int narg, char **arg) keyword_user[icol] = arg[iarg + 2]; iarg += 3; } - } else if (strcmp(arg[iarg], "legacy_colname") == 0) { - if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "thermo_modify legacy_colname", error); - int legacy_colname = utils::logical(FLERR, arg[iarg + 1], false, lmp); - if (legacy_colname) - for (int i = 0; i < nfield; i++) - keyword_user[i] = {}; - iarg += 2; - } else if (strcmp(arg[iarg], "format") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "thermo_modify format", error); @@ -1026,7 +1023,7 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, "Thermo compute array is accessed out-of-range"); } - if (icompute->thermo_modify_colname) + if (autocolname && icompute->thermo_modify_colname) keyword_user[nfield] = icompute->get_thermo_colname(argindex1[nfield]-1); if (argindex1[nfield] == 0) @@ -1055,7 +1052,7 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, "Thermo fix array is accessed out-of-range"); } - if (ifix->thermo_modify_colname) + if (autocolname && ifix->thermo_modify_colname) keyword_user[nfield] = ifix->get_thermo_colname(argindex1[nfield]-1); field2index[nfield] = add_fix(argi.get_name()); diff --git a/src/thermo.h b/src/thermo.h index eaec3eb9f8c..f07afa710fd 100644 --- a/src/thermo.h +++ b/src/thermo.h @@ -59,7 +59,7 @@ class Thermo : protected Pointers { int firststep; int lostbefore, warnbefore; - int flushflag, lineflag; + int flushflag, lineflag, autocolname; double last_tpcpu, last_spcpu; double last_time; From 3465fb26bb2f1f8ea40d6e7a9911e274e8f177f7 Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Wed, 23 Aug 2023 11:07:43 +0200 Subject: [PATCH 007/604] new harm copy paste --- src/fix_wall_harmonic_returned.cpp | 77 ++++++++++++++++++++++++++++++ src/fix_wall_harmonic_returned.h | 37 ++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 src/fix_wall_harmonic_returned.cpp create mode 100644 src/fix_wall_harmonic_returned.h diff --git a/src/fix_wall_harmonic_returned.cpp b/src/fix_wall_harmonic_returned.cpp new file mode 100644 index 00000000000..0e8fa22684c --- /dev/null +++ b/src/fix_wall_harmonic_returned.cpp @@ -0,0 +1,77 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_wall_harmonic.h" +#include "atom.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +FixWallHarmonic::FixWallHarmonic(LAMMPS *lmp, int narg, char **arg) : FixWall(lmp, narg, arg) +{ + dynamic_group_allow = 1; +} + +/* ---------------------------------------------------------------------- + interaction of all particles in group with a wall + m = index of wall coeffs + which = xlo,xhi,ylo,yhi,zlo,zhi + error if any particle is on or behind wall +------------------------------------------------------------------------- */ + +void FixWallHarmonic::wall_particle(int m, int which, double coord) +{ + double delta, dr, fwall; + double vn; + + double **x = atom->x; + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + int dim = which / 2; + int side = which % 2; + if (side == 0) side = -1; + + int onflag = 0; + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + if (side < 0) + delta = x[i][dim] - coord; + else + delta = coord - x[i][dim]; + if (delta >= cutoff[m]) continue; + if (delta <= 0.0) { + onflag = 1; + continue; + } + dr = cutoff[m] - delta; + fwall = side * 2.0 * epsilon[m] * dr; + f[i][dim] -= fwall; + ewall[0] += epsilon[m] * dr * dr; + ewall[m + 1] += fwall; + + if (evflag) { + if (side < 0) + vn = -fwall * delta; + else + vn = fwall * delta; + v_tally(dim, i, vn); + } + } + + if (onflag) error->one(FLERR, "Particle on or inside fix wall surface"); +} diff --git a/src/fix_wall_harmonic_returned.h b/src/fix_wall_harmonic_returned.h new file mode 100644 index 00000000000..34e9a9a2df4 --- /dev/null +++ b/src/fix_wall_harmonic_returned.h @@ -0,0 +1,37 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(wall/harmonic,FixWallHarmonic); +// clang-format on +#else + +#ifndef LMP_FIX_WALL_HARMONIC_H +#define LMP_FIX_WALL_HARMONIC_H + +#include "fix_wall.h" + +namespace LAMMPS_NS { + +class FixWallHarmonic : public FixWall { + public: + FixWallHarmonic(class LAMMPS *, int, char **); + void precompute(int) override {} + void wall_particle(int, int, double) override; +}; + +} // namespace LAMMPS_NS + +#endif +#endif From 5b302c31e1e164ddcb00201e91659b50a8ed0aac Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Wed, 23 Aug 2023 11:18:50 +0200 Subject: [PATCH 008/604] Reversed force, no warnings, comments --- src/fix_wall_harmonic_returned.cpp | 49 ++++++++++++++++-------------- src/fix_wall_harmonic_returned.h | 10 +++--- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/src/fix_wall_harmonic_returned.cpp b/src/fix_wall_harmonic_returned.cpp index 0e8fa22684c..fee814dd33e 100644 --- a/src/fix_wall_harmonic_returned.cpp +++ b/src/fix_wall_harmonic_returned.cpp @@ -11,7 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "fix_wall_harmonic.h" +#include "fix_wall_harmonic_returned.h" #include "atom.h" #include "error.h" @@ -19,21 +19,28 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -FixWallHarmonic::FixWallHarmonic(LAMMPS *lmp, int narg, char **arg) : FixWall(lmp, narg, arg) +FixWallHarmonicReturned::FixWallHarmonicReturned(LAMMPS *lmp, int narg, char **arg) : FixWall(lmp, narg, arg) { dynamic_group_allow = 1; } /* ---------------------------------------------------------------------- interaction of all particles in group with a wall + recalling force applied if outside the control volume + and within the interaction cutoff m = index of wall coeffs - which = xlo,xhi,ylo,yhi,zlo,zhi - error if any particle is on or behind wall + which = 0,1,..,5 (xlo,xhi,ylo,yhi,zlo,zhi) + coord = + dim = 0,1,2 (x,y,z) + side = -1,1 (low, high) + if side is the low boundary, + no error if any particle is on or above the wall ------------------------------------------------------------------------- */ -void FixWallHarmonic::wall_particle(int m, int which, double coord) + +void FixWallHarmonicReturned::wall_particle(int m, int which, double coord) { - double delta, dr, fwall; + double dr, fwall; double vn; double **x = atom->x; @@ -45,33 +52,31 @@ void FixWallHarmonic::wall_particle(int m, int which, double coord) int side = which % 2; if (side == 0) side = -1; - int onflag = 0; - + // iterate through the atoms owned by the proc for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { + // calculate the distance (dr) of each atom from the wall if (side < 0) - delta = x[i][dim] - coord; + dr = coord - x[i][dim]; else - delta = coord - x[i][dim]; - if (delta >= cutoff[m]) continue; - if (delta <= 0.0) { - onflag = 1; + dr = x[i][dim] - coord; + if (dr >= cutoff[m]) continue; // no force if above the interaction cutoff + if (dr <= 0.0) { + /* No force if the particle is inside the control volume */ continue; } - dr = cutoff[m] - delta; - fwall = side * 2.0 * epsilon[m] * dr; - f[i][dim] -= fwall; - ewall[0] += epsilon[m] * dr * dr; - ewall[m + 1] += fwall; + fwall = side * 2.0 * epsilon[m] * dr; // calculate the simple harmonic force + f[i][dim] -= fwall; // apply the force over the atom in the same dimension as the wall + ewall[0] += epsilon[m] * dr * dr; // sum the energies of the walls for record + ewall[m + 1] += fwall; // sum the forces of the wall for record if (evflag) { if (side < 0) - vn = -fwall * delta; + vn = -fwall * dr; else - vn = fwall * delta; + vn = fwall * dr; v_tally(dim, i, vn); } } - if (onflag) error->one(FLERR, "Particle on or inside fix wall surface"); -} +} \ No newline at end of file diff --git a/src/fix_wall_harmonic_returned.h b/src/fix_wall_harmonic_returned.h index 34e9a9a2df4..8f2d3f0bd60 100644 --- a/src/fix_wall_harmonic_returned.h +++ b/src/fix_wall_harmonic_returned.h @@ -13,20 +13,20 @@ #ifdef FIX_CLASS // clang-format off -FixStyle(wall/harmonic,FixWallHarmonic); +FixStyle(wall/harmonic/returned,FixWallHarmonicReturned); // clang-format on #else -#ifndef LMP_FIX_WALL_HARMONIC_H -#define LMP_FIX_WALL_HARMONIC_H +#ifndef LMP_FIX_WALL_HARMONIC_RETURNED_H +#define LMP_FIX_WALL_HARMONIC_RETURNED_H #include "fix_wall.h" namespace LAMMPS_NS { -class FixWallHarmonic : public FixWall { +class FixWallHarmonicReturned : public FixWall { public: - FixWallHarmonic(class LAMMPS *, int, char **); + FixWallHarmonicReturned(class LAMMPS *, int, char **); void precompute(int) override {} void wall_particle(int, int, double) override; }; From 8b5a2b04e1cc8ea33e4f7e6592374a29174ab4e6 Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Wed, 23 Aug 2023 12:29:15 +0200 Subject: [PATCH 009/604] update the doc --- doc/src/fix_wall.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/src/fix_wall.rst b/doc/src/fix_wall.rst index eb8219e1573..3ed5233a4ad 100644 --- a/doc/src/fix_wall.rst +++ b/doc/src/fix_wall.rst @@ -4,6 +4,7 @@ .. index:: fix wall/lj1043 .. index:: fix wall/colloid .. index:: fix wall/harmonic +.. index:: fix wall/harmonic/reversed .. index:: fix wall/lepton .. index:: fix wall/morse .. index:: fix wall/table @@ -25,6 +26,9 @@ fix wall/colloid command fix wall/harmonic command ========================= +fix wall/harmonic/reversed command +========================= + fix wall/lepton command ========================= @@ -42,7 +46,7 @@ Syntax fix ID group-ID style [tabstyle] [N] face args ... keyword value ... * ID, group-ID are documented in :doc:`fix ` command -* style = *wall/lj93* or *wall/lj126* or *wall/lj1043* or *wall/colloid* or *wall/harmonic* or *wall/lepton* or *wall/morse* or *wall/table* +* style = *wall/lj93* or *wall/lj126* or *wall/lj1043* or *wall/colloid* or *wall/harmonic* or *wall/harmonic/reversed* or *wall/lepton* or *wall/morse* or *wall/table* * tabstyle = *linear* or *spline* = method of table interpolation (only applies to *wall/table*) * N = use N values in *linear* or *spline* interpolation (only applies to *wall/table*) * one or more face/arg pairs may be appended @@ -187,6 +191,9 @@ spring potential: E = \epsilon \quad (r - r_c)^2 \qquad r < r_c +For style *wall/harmonic/reversed*, the energy E is given by an attractive-only harmonic +spring potential of the same form as *wall/harmonic*. + For style *wall/morse*, the energy E is given by a Morse potential: .. math:: @@ -211,7 +218,7 @@ Optionally, the expression may use "rc" to refer to the cutoff distance for the given wall. Further constants in the expression can be defined in the same string as additional expressions separated by semicolons. The expression "k*(r-rc)^2;k=100.0" represents a repulsive-only harmonic -spring as in fix *wall/harmonic* with a force constant *K* (same as +spring as in fix *wall/harmonic* or *wall/harmonic/reversed* with a force constant *K* (same as :math:`\epsilon` above) of 100 energy units. More details on the Lepton expression strings are given below. @@ -310,7 +317,8 @@ particle and a 3d half-lattice of Lennard-Jones 12/6 particles of size the density of particles in the wall and colloid can be different, as specified by the :math:`\epsilon` prefactor. -For the *wall/harmonic* style, :math:`\epsilon` is effectively the spring +For the *wall/harmonic* and *wall/harmonic/reversed* style, + :math:`\epsilon` is effectively the spring constant K, and has units (energy/distance\^2). The input parameter :math:`\sigma` is ignored. The minimum energy position of the harmonic spring is at the *cutoff*\ . This is a repulsive-only spring since the From be073cdc8c3abba1df6ed0ea681dc23954768890 Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Wed, 23 Aug 2023 12:33:26 +0200 Subject: [PATCH 010/604] corrected white space --- src/fix_wall_harmonic_returned.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fix_wall_harmonic_returned.cpp b/src/fix_wall_harmonic_returned.cpp index fee814dd33e..25d98a110ee 100644 --- a/src/fix_wall_harmonic_returned.cpp +++ b/src/fix_wall_harmonic_returned.cpp @@ -30,7 +30,7 @@ FixWallHarmonicReturned::FixWallHarmonicReturned(LAMMPS *lmp, int narg, char **a and within the interaction cutoff m = index of wall coeffs which = 0,1,..,5 (xlo,xhi,ylo,yhi,zlo,zhi) - coord = + coord = wall coordinate on the dim dim = 0,1,2 (x,y,z) side = -1,1 (low, high) if side is the low boundary, @@ -57,9 +57,9 @@ void FixWallHarmonicReturned::wall_particle(int m, int which, double coord) if (mask[i] & groupbit) { // calculate the distance (dr) of each atom from the wall if (side < 0) - dr = coord - x[i][dim]; + dr = coord - x[i][dim]; else - dr = x[i][dim] - coord; + dr = x[i][dim] - coord; if (dr >= cutoff[m]) continue; // no force if above the interaction cutoff if (dr <= 0.0) { /* No force if the particle is inside the control volume */ @@ -79,4 +79,4 @@ void FixWallHarmonicReturned::wall_particle(int m, int which, double coord) } } -} \ No newline at end of file +} From e108cf3ec325191ce7b136899d9ba424ee450001 Mon Sep 17 00:00:00 2001 From: "Dan S. Bolintineanu" Date: Wed, 23 Aug 2023 13:00:18 -0600 Subject: [PATCH 011/604] A few bug fixes for fix srd --- src/SRD/fix_srd.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index 86be5bab67b..30ead13fb9c 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -674,7 +674,7 @@ void FixSRD::pre_neighbor() hi = nbin2z - 1; } - for (iz = lo; iz < hi; iz++) + for (iz = lo; iz <= hi; iz++) for (ix = 0; ix < nbin2x; ix++) for (iy = 0; iy < nbin2y; iy++) { ibin = iz * nbin2y * nbin2x + iy * nbin2x + ix; @@ -1423,6 +1423,7 @@ void FixSRD::collisions_multi() tagint *tag = atom->tag; int *mask = atom->mask; int nlocal = atom->nlocal; + Big* bigfirst; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; @@ -1443,8 +1444,8 @@ void FixSRD::collisions_multi() k = binbig[ibin][m]; big = &biglist[k]; j = big->index; - if (j == jlast) continue; type = big->type; + if ((j == jlast) && (type == typefirst)) continue; if (type == SPHERE) inside = inside_sphere(x[i], x[j], big); @@ -1498,6 +1499,7 @@ void FixSRD::collisions_multi() t_first = t_remain; jfirst = j; typefirst = type; + bigfirst = big; xscollfirst[0] = xscoll[0]; xscollfirst[1] = xscoll[1]; xscollfirst[2] = xscoll[2]; @@ -1514,6 +1516,7 @@ void FixSRD::collisions_multi() if (t_first == 0.0) break; j = jlast = jfirst; type = typefirst; + big = bigfirst; xscoll[0] = xscollfirst[0]; xscoll[1] = xscollfirst[1]; xscoll[2] = xscollfirst[2]; From 00e9dcbdd15d620982d1135963d928c86d78e431 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 15 Dec 2023 13:52:26 -0700 Subject: [PATCH 012/604] Limiting references to gm class --- src/GRANULAR/gran_sub_mod_rolling.cpp | 44 +++++---- src/GRANULAR/gran_sub_mod_tangential.cpp | 110 +++++++++++++---------- src/GRANULAR/gran_sub_mod_twisting.cpp | 64 ++++++++----- src/GRANULAR/gran_sub_mod_twisting.h | 8 +- src/GRANULAR/granular_model.cpp | 2 +- 5 files changed, 137 insertions(+), 91 deletions(-) diff --git a/src/GRANULAR/gran_sub_mod_rolling.cpp b/src/GRANULAR/gran_sub_mod_rolling.cpp index c1dc53fcc8b..69776c5b9ef 100644 --- a/src/GRANULAR/gran_sub_mod_rolling.cpp +++ b/src/GRANULAR/gran_sub_mod_rolling.cpp @@ -68,23 +68,31 @@ void GranSubModRollingSDS::calculate_forces() double Frcrit, rolldotn, rollmag, prjmag, magfr, hist_temp[3], scalefac, temp_array[3]; double k_inv, magfr_inv; + double *nx = gm->nx; + double *vrl = gm->vrl; + double *fr = gm->fr; + double dt = gm->dt; + double Fncrit = gm->normal_model->Fncrit; + double *history = gm->history; + int history_update = gm->history_update; + rhist0 = history_index; rhist1 = rhist0 + 1; rhist2 = rhist1 + 1; - Frcrit = mu * gm->normal_model->Fncrit; + Frcrit = mu * Fncrit; - if (gm->history_update) { - hist_temp[0] = gm->history[rhist0]; - hist_temp[1] = gm->history[rhist1]; - hist_temp[2] = gm->history[rhist2]; - rolldotn = dot3(hist_temp, gm->nx); + if (history_update) { + hist_temp[0] = history[rhist0]; + hist_temp[1] = history[rhist1]; + hist_temp[2] = history[rhist2]; + rolldotn = dot3(hist_temp, nx); frameupdate = (fabs(rolldotn) * k) > (EPSILON * Frcrit); if (frameupdate) { // rotate into tangential plane rollmag = len3(hist_temp); // projection - scale3(rolldotn, gm->nx, temp_array); + scale3(rolldotn, nx, temp_array); sub3(hist_temp, temp_array, hist_temp); // also rescale to preserve magnitude @@ -93,32 +101,32 @@ void GranSubModRollingSDS::calculate_forces() else scalefac = 0; scale3(scalefac, hist_temp); } - scale3(gm->dt, gm->vrl, temp_array); + scale3(dt, vrl, temp_array); add3(hist_temp, temp_array, hist_temp); } - scaleadd3(-k, hist_temp, -gamma, gm->vrl, gm->fr); + scaleadd3(-k, hist_temp, -gamma, vrl, fr); // rescale frictional displacements and forces if needed - magfr = len3(gm->fr); + magfr = len3(fr); if (magfr > Frcrit) { rollmag = len3(hist_temp); if (rollmag != 0.0) { k_inv = 1.0 / k; magfr_inv = 1.0 / magfr; - scale3(-Frcrit * k_inv * magfr_inv, gm->fr, hist_temp); - scale3(-gamma * k_inv, gm->vrl, temp_array); + scale3(-Frcrit * k_inv * magfr_inv, fr, hist_temp); + scale3(-gamma * k_inv, vrl, temp_array); add3(hist_temp, temp_array, hist_temp); - scale3(Frcrit * magfr_inv, gm->fr); + scale3(Frcrit * magfr_inv, fr); } else { - zero3(gm->fr); + zero3(fr); } } - if (gm->history_update) { - gm->history[rhist0] = hist_temp[0]; - gm->history[rhist1] = hist_temp[1]; - gm->history[rhist2] = hist_temp[2]; + if (history_update) { + history[rhist0] = hist_temp[0]; + history[rhist1] = hist_temp[1]; + history[rhist2] = hist_temp[2]; } } diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp index 0fae97ee965..0e8f7c42147 100644 --- a/src/GRANULAR/gran_sub_mod_tangential.cpp +++ b/src/GRANULAR/gran_sub_mod_tangential.cpp @@ -68,14 +68,17 @@ void GranSubModTangentialLinearNoHistory::calculate_forces() // classic pair gran/hooke (no history) damp = xt * gm->damping_model->damp_prefactor; + double *vrel = gm->vrel; + double *vtr = gm->vtr; + double *fs = gm->fs; double Fscrit = mu * gm->normal_model->Fncrit; - double fsmag = damp * gm->vrel; + double fsmag = damp * vrel; double Ft; - if (gm->vrel != 0.0) Ft = MIN(Fscrit, fsmag) / gm->vrel; + if (vrel != 0.0) Ft = MIN(Fscrit, fsmag) / vrel; else Ft = 0.0; - scale3(-Ft, gm->vtr, gm->fs); + scale3(-Ft, vtr, fs); } /* ---------------------------------------------------------------------- @@ -112,18 +115,23 @@ void GranSubModTangentialLinearHistory::calculate_forces() double Fscrit = gm->normal_model->Fncrit * mu; double *history = & gm->history[history_index]; + double *nx = gm->nx; + double *vtr = gm->vtr; + double *fs = gm->fs; + double dt = gm->dt; + int history_update = gm->history_update; // rotate and update displacements / force. // see e.g. eq. 17 of Luding, Gran. Matter 2008, v10,p235 - if (gm->history_update) { - rsht = dot3(history, gm->nx); + if (history_update) { + rsht = dot3(history, nx); frame_update = (fabs(rsht) * k) > (EPSILON * Fscrit); if (frame_update) { shrmag = len3(history); // projection - scale3(rsht, gm->nx, temp_array); + scale3(rsht, nx, temp_array); sub3(history, temp_array, history); // also rescale to preserve magnitude @@ -135,28 +143,28 @@ void GranSubModTangentialLinearHistory::calculate_forces() // update history, tangential force // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46 - scale3(gm->dt, gm->vtr, temp_array); + scale3(dt, vtr, temp_array); add3(history, temp_array, history); } // tangential forces = history + tangential velocity damping - scale3(-k, history, gm->fs); - scale3(damp, gm->vtr, temp_array); - sub3(gm->fs, temp_array, gm->fs); + scale3(-k, history, fs); + scale3(damp, vtr, temp_array); + sub3(fs, temp_array, fs); // rescale frictional displacements and forces if needed - magfs = len3(gm->fs); + magfs = len3(fs); if (magfs > Fscrit) { shrmag = len3(history); if (shrmag != 0.0) { magfs_inv = 1.0 / magfs; - scale3(Fscrit * magfs_inv, gm->fs, history); - scale3(damp, gm->vtr, temp_array); + scale3(Fscrit * magfs_inv, fs, history); + scale3(damp, vtr, temp_array); add3(history, temp_array, history); scale3(-1.0 / k, history); - scale3(Fscrit * magfs_inv, gm->fs); + scale3(Fscrit * magfs_inv, fs); } else { - zero3(gm->fs); + zero3(fs); } } } @@ -181,40 +189,46 @@ void GranSubModTangentialLinearHistoryClassic::calculate_forces() double Fscrit = gm->normal_model->Fncrit * mu; double *history = & gm->history[history_index]; + double *nx = gm->nx; + double *vtr = gm->vtr; + double *fs = gm->fs; + double dt = gm->dt; + double contact_radius = gm->contact_radius; + int history_update = gm->history_update; // update history - if (gm->history_update) { - scale3(gm->dt, gm->vtr, temp_array); + if (history_update) { + scale3(dt, vtr, temp_array); add3(history, temp_array, history); } shrmag = len3(history); // rotate shear displacements - if (gm->history_update) { - rsht = dot3(history, gm->nx); - scale3(rsht, gm->nx, temp_array); + if (history_update) { + rsht = dot3(history, nx); + scale3(rsht, nx, temp_array); sub3(history, temp_array, history); } // tangential forces = history + tangential velocity damping - if (contact_radius_flag) scale3(-k * gm->contact_radius, history, gm->fs); - else scale3(-k, history, gm->fs); - scale3(damp, gm->vtr, temp_array); - sub3(gm->fs, temp_array, gm->fs); + if (contact_radius_flag) scale3(-k * contact_radius, history, fs); + else scale3(-k, history, fs); + scale3(damp, vtr, temp_array); + sub3(fs, temp_array, fs); // rescale frictional displacements and forces if needed - magfs = len3(gm->fs); + magfs = len3(fs); if (magfs > Fscrit) { if (shrmag != 0.0) { magfs_inv = 1.0 / magfs; - scale3(Fscrit * magfs_inv, gm->fs, history); - scale3(damp, gm->vtr, temp_array); + scale3(Fscrit * magfs_inv, fs, history); + scale3(damp, vtr, temp_array); add3(history, temp_array, history); scale3(-1.0 / k, history); - scale3(Fscrit * magfs_inv, gm->fs); + scale3(Fscrit * magfs_inv, fs); } else { - zero3(gm->fs); + zero3(fs); } } } @@ -290,18 +304,24 @@ void GranSubModTangentialMindlin::calculate_forces() double *history = & gm->history[history_index]; double Fscrit = gm->normal_model->Fncrit * mu; + double *nx = gm->nx; + double *vtr = gm->vtr; + double *fs = gm->fs; + double dt = gm->dt; + double contact_radius = gm->contact_radius; + int history_update = gm->history_update; k_scaled = k * gm->contact_radius; // on unloading, rescale the shear displacements/force if (mindlin_rescale) - if (gm->contact_radius < history[3]) - scale3(gm->contact_radius / history[3], history); + if (contact_radius < history[3]) + scale3(contact_radius / history[3], history); // rotate and update displacements / force. // see e.g. eq. 17 of Luding, Gran. Matter 2008, v10,p235 - if (gm->history_update) { - rsht = dot3(history, gm->nx); + if (history_update) { + rsht = dot3(history, nx); if (mindlin_force) { frame_update = fabs(rsht) > (EPSILON * Fscrit); } else { @@ -311,7 +331,7 @@ void GranSubModTangentialMindlin::calculate_forces() if (frame_update) { shrmag = len3(history); // projection - scale3(rsht, gm->nx, temp_array); + scale3(rsht, nx, temp_array); sub3(history, temp_array, history); // also rescale to preserve magnitude prjmag = len3(history); @@ -324,41 +344,41 @@ void GranSubModTangentialMindlin::calculate_forces() if (mindlin_force) { // tangential force // see e.g. eq. 18 of Thornton et al, Pow. Tech. 2013, v223,p30-46 - scale3(-k_scaled * gm->dt, gm->vtr, temp_array); + scale3(-k_scaled * dt, vtr, temp_array); } else { - scale3(gm->dt, gm->vtr, temp_array); + scale3(dt, vtr, temp_array); } add3(history, temp_array, history); - if (mindlin_rescale) history[3] = gm->contact_radius; + if (mindlin_rescale) history[3] = contact_radius; } // tangential forces = history + tangential velocity damping - scale3(-damp, gm->vtr, gm->fs); + scale3(-damp, vtr, fs); if (!mindlin_force) { scale3(k_scaled, history, temp_array); - sub3(gm->fs, temp_array, gm->fs); + sub3(fs, temp_array, fs); } else { - add3(gm->fs, history, gm->fs); + add3(fs, history, fs); } // rescale frictional displacements and forces if needed - magfs = len3(gm->fs); + magfs = len3(fs); if (magfs > Fscrit) { shrmag = len3(history); if (shrmag != 0.0) { magfs_inv = 1.0 / magfs; - scale3(Fscrit * magfs_inv, gm->fs, history); - scale3(damp, gm->vtr, temp_array); + scale3(Fscrit * magfs_inv, fs, history); + scale3(damp, vtr, temp_array); add3(history, temp_array, history); if (!mindlin_force) scale3(-1.0 / k_scaled, history); - scale3(Fscrit * magfs_inv, gm->fs); + scale3(Fscrit * magfs_inv, fs); } else { - zero3(gm->fs); + zero3(fs); } } } diff --git a/src/GRANULAR/gran_sub_mod_twisting.cpp b/src/GRANULAR/gran_sub_mod_twisting.cpp index c5135a1befc..2514f8985c2 100644 --- a/src/GRANULAR/gran_sub_mod_twisting.cpp +++ b/src/GRANULAR/gran_sub_mod_twisting.cpp @@ -57,29 +57,39 @@ void GranSubModTwistingMarshall::init() /* ---------------------------------------------------------------------- */ -void GranSubModTwistingMarshall::calculate_forces() +double GranSubModTwistingMarshall::calculate_forces() { - double signtwist, Mtcrit; + double signtwist, Mtcrit, magtortwist; + + double magtwist = gm->magtwist; + double Fncrit = gm->normal_model->Fncrit; + double tdamp = gm->tangential_model->damp; + double dt = gm->dt; + double *history = gm->history; + double contact_radius = gm->contact_radius; + int history_update = gm->history_update; // Calculate twist coefficients from tangential model & contact geometry // eq 32 of Marshall paper - double k = 0.5 * k_tang * gm->contact_radius * gm->contact_radius; - double damp = 0.5 * gm->tangential_model->damp * gm->contact_radius * gm->contact_radius; - double mu = TWOTHIRDS * mu_tang * gm->contact_radius; + double k = 0.5 * k_tang * contact_radius * contact_radius; + double damp = 0.5 * tdamp * contact_radius * contact_radius; + double mu = TWOTHIRDS * mu_tang * contact_radius; - if (gm->history_update) { - gm->history[history_index] += gm->magtwist * gm->dt; + if (history_update) { + history[history_index] += magtwist * dt; } // M_t torque (eq 30) - gm->magtortwist = -k * gm->history[history_index] - damp * gm->magtwist; - signtwist = (gm->magtwist > 0) - (gm->magtwist < 0); - Mtcrit = mu * gm->normal_model->Fncrit; // critical torque (eq 44) + magtortwist = -k * history[history_index] - damp * magtwist; + signtwist = (magtwist > 0) - (magtwist < 0); + Mtcrit = mu * Fncrit; // critical torque (eq 44) - if (fabs(gm->magtortwist) > Mtcrit) { - gm->history[history_index] = (Mtcrit * signtwist - damp * gm->magtwist) / k; - gm->magtortwist = -Mtcrit * signtwist; // eq 34 + if (fabs(magtortwist) > Mtcrit) { + history[history_index] = (Mtcrit * signtwist - damp * magtwist) / k; + magtortwist = -Mtcrit * signtwist; // eq 34 } + + return magtortwist; } /* ---------------------------------------------------------------------- @@ -106,21 +116,29 @@ void GranSubModTwistingSDS::coeffs_to_local() /* ---------------------------------------------------------------------- */ -void GranSubModTwistingSDS::calculate_forces() +double GranSubModTwistingSDS::calculate_forces() { - double signtwist, Mtcrit; + double signtwist, Mtcrit, magtortwist; - if (gm->history_update) { - gm->history[history_index] += gm->magtwist * gm->dt; + double magtwist = gm->magtwist; + double Fncrit = gm->normal_model->Fncrit; + double dt = gm->dt; + double *history = gm->history; + int history_update = gm->history_update; + + if (history_update) { + history[history_index] += magtwist * dt; } // M_t torque (eq 30) - gm->magtortwist = -k * gm->history[history_index] - damp * gm->magtwist; - signtwist = (gm->magtwist > 0) - (gm->magtwist < 0); - Mtcrit = mu * gm->normal_model->Fncrit; // critical torque (eq 44) + magtortwist = -k * history[history_index] - damp * magtwist; + signtwist = (magtwist > 0) - (magtwist < 0); + Mtcrit = mu * Fncrit; // critical torque (eq 44) - if (fabs(gm->magtortwist) > Mtcrit) { - gm->history[history_index] = (Mtcrit * signtwist - damp * gm->magtwist) / k; - gm->magtortwist = -Mtcrit * signtwist; // eq 34 + if (fabs(magtortwist) > Mtcrit) { + history[history_index] = (Mtcrit * signtwist - damp * magtwist) / k; + magtortwist = -Mtcrit * signtwist; // eq 34 } + + return magtortwist; } diff --git a/src/GRANULAR/gran_sub_mod_twisting.h b/src/GRANULAR/gran_sub_mod_twisting.h index 46f56d37ceb..138c5d59c85 100644 --- a/src/GRANULAR/gran_sub_mod_twisting.h +++ b/src/GRANULAR/gran_sub_mod_twisting.h @@ -39,7 +39,7 @@ class GranSubModTwisting : public GranSubMod { public: GranSubModTwisting(class GranularModel *, class LAMMPS *); virtual ~GranSubModTwisting() {}; - virtual void calculate_forces() = 0; + virtual double calculate_forces() = 0; }; /* ---------------------------------------------------------------------- */ @@ -47,7 +47,7 @@ class GranSubModTwisting : public GranSubMod { class GranSubModTwistingNone : public GranSubModTwisting { public: GranSubModTwistingNone(class GranularModel *, class LAMMPS *); - void calculate_forces() {}; + double calculate_forces() {}; }; /* ---------------------------------------------------------------------- */ @@ -56,7 +56,7 @@ class GranSubModTwistingMarshall : public GranSubModTwisting { public: GranSubModTwistingMarshall(class GranularModel *, class LAMMPS *); void init() override; - void calculate_forces(); + double calculate_forces(); protected: double k_tang, mu_tang; }; @@ -67,7 +67,7 @@ class GranSubModTwistingSDS : public GranSubModTwisting { public: GranSubModTwistingSDS(class GranularModel *, class LAMMPS *); void coeffs_to_local() override; - void calculate_forces(); + double calculate_forces(); protected: double k, mu, damp; }; diff --git a/src/GRANULAR/granular_model.cpp b/src/GRANULAR/granular_model.cpp index b39419187a0..2aca913b08a 100644 --- a/src/GRANULAR/granular_model.cpp +++ b/src/GRANULAR/granular_model.cpp @@ -473,7 +473,7 @@ void GranularModel::calculate_forces() // omega_T (eq 29 of Marshall) magtwist = dot3(relrot, nx); - twisting_model->calculate_forces(); + magtortwist = twisting_model->calculate_forces(); double tortwist[3]; scale3(magtortwist, nx, tortwist); From a74883257a2a02294885f5c86f0357b25a286c3d Mon Sep 17 00:00:00 2001 From: taenzel <66378296+taenzel@users.noreply.github.com> Date: Thu, 8 May 2025 15:35:42 +0200 Subject: [PATCH 013/604] Make fix-srd collision cells follow PBCs and LEBCs. --- src/SRD/fix_srd.cpp | 59 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index 6b8ce1e9d60..4a869e05258 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -830,6 +830,8 @@ void FixSRD::reset_velocities() double vsq, tbin, scale; double *vave, *xlamda; double vstream[3]; + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; // if requested, perform a dynamic shift of bin positions @@ -881,6 +883,29 @@ void FixSRD::reset_velocities() iz = MAX(iz, binlo[2]); iz = MIN(iz, binhi[2]); + // link first and last cells for PBCs, shift velocity if velocity remap + if (domain->xperiodic && ix == binhi[0]) { + ix = binlo[0]; + if (domain->deform_vremap) { + v[i][0] -= h_rate[0]; + } + } + if (domain->yperiodic && iy == binhi[1]) { + iy = binlo[1]; + if (domain->deform_vremap) { + v[i][0] -= h_rate[5]; + v[i][1] -= h_rate[1]; + } + } + if (domain->zperiodic && iz == binhi[2]) { + iz = binlo[2]; + if (domain->deform_vremap) { + v[i][0] -= h_rate[4]; + v[i][1] -= h_rate[3]; + v[i][2] -= h_rate[2]; + } + } + ibin = (iz - binlo[2]) * nbiny * nbinx + (iy - binlo[1]) * nbinx + (ix - binlo[0]); binnext[i] = binhead[ibin]; binhead[ibin] = i; @@ -946,8 +971,6 @@ void FixSRD::reset_velocities() srd_bin_count = 0; if (dimension == 2) axis = 2; - double *h_rate = domain->h_rate; - double *h_ratelo = domain->h_ratelo; for (i = 0; i < nbins; i++) { vbin[i].value[0] = 0.0; @@ -1053,6 +1076,38 @@ void FixSRD::reset_velocities() } } } + + // undo velocity remap + if (deformflag && domain->deform_vremap) { + domain->x2lamda(nlocal); + for (i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + ix = static_cast((x[i][0] - corner[0]) * bininv1x); + ix = MAX(ix, binlo[0]); + ix = MIN(ix, binhi[0]); + iy = static_cast((x[i][1] - corner[1]) * bininv1y); + iy = MAX(iy, binlo[1]); + iy = MIN(iy, binhi[1]); + iz = static_cast((x[i][2] - corner[2]) * bininv1z); + iz = MAX(iz, binlo[2]); + iz = MIN(iz, binhi[2]); + + if (domain->xperiodic && ix == binhi[0]) { + v[i][0] += h_rate[0]; + } + if (domain->yperiodic && iy == binhi[1]) { + v[i][0] += h_rate[5]; + v[i][1] += h_rate[1]; + } + if (domain->zperiodic && iz == binhi[2]) { + v[i][0] += h_rate[4]; + v[i][1] += h_rate[3]; + v[i][2] += h_rate[2]; + } + } + } + domain->lamda2x(nlocal); + } } /* ---------------------------------------------------------------------- From 695d032b257a31e55a455bcd2ba97c3168c2d55a Mon Sep 17 00:00:00 2001 From: taenzel <66378296+taenzel@users.noreply.github.com> Date: Fri, 9 May 2025 10:51:53 +0200 Subject: [PATCH 014/604] Add profile-unbiased thermostat for fix srd. Update documentation. --- doc/src/fix_srd.rst | 46 +++++++++++++++++++++++++-------------------- src/SRD/fix_srd.cpp | 11 +++++++++-- src/SRD/fix_srd.h | 2 +- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/doc/src/fix_srd.rst b/doc/src/fix_srd.rst index 7e8cf96f6b6..27a6c4388ec 100644 --- a/doc/src/fix_srd.rst +++ b/doc/src/fix_srd.rst @@ -41,6 +41,7 @@ Syntax *possible* = shift depending on mean free path and bin size shiftseed = random # seed (positive integer) *tstat* value = *yes* or *no* = thermostat SRD particles or not + *put* value = *yes* or *no* = use profile-unbiased thermostat or not *rescale* value = *yes* or *no* or *rotate* or *collide* = rescaling of SRD velocities *yes* = rescale during velocity rotation and collisions *no* = no rescaling @@ -52,6 +53,7 @@ Examples .. code-block:: LAMMPS + fix 1 all srd 10 NULL 1.0 1.0 482984 fix 1 srd srd 10 big 1.0 0.25 482984 fix 1 srd srd 10 big 0.5 0.25 482984 collision slip search 0.5 @@ -271,7 +273,7 @@ a vector whose coordinates are chosen randomly in the range [-1/2 bin size, 1/2 bin size]. Note that all particles are shifted by the same vector. The specified random number *shiftseed* is used to generate these vectors. This operation sufficiently randomizes which SRD -particles are in the same bin, even if :math:`lambda` is small. +particles are in the same bin, even if :math:`\lambda` is small. If the *shift* flag is set to *no*, then no shifting is performed, but bin data will be communicated if bins overlap processor boundaries. An @@ -281,26 +283,30 @@ only if :math:`\lambda < 0.6` of the SRD bin size. A warning is generated to let you know this is occurring. If the *shift* flag is set to *yes* then shifting is performed regardless of the magnitude of :math:`\lambda`. Note that the *shiftseed* is not used if the *shift* -flag is set to *no*, but must still be specified. - -Note that shifting of SRD coordinates requires extra communication, -hence it should not normally be enabled unless required. +flag is set to *no*, but must still be specified. Note that shifting of SRD +coordinates requires extra communication, hence it should not normally be +enabled unless required. The *tstat* keyword will thermostat the SRD particles to the specified -*Tsrd*\ . This is done every N timesteps, during the velocity rotation -operation, by rescaling the thermal velocity of particles in each SRD -bin to the desired temperature. If there is a streaming velocity -associated with the system, e.g. due to use of the :doc:`fix deform ` command to perform a simulation undergoing -shear, then that is also accounted for. The mean velocity of each bin -of SRD particles is set to the position-dependent streaming velocity, -based on the coordinates of the center of the SRD bin. Note that -collisions of SRD particles with big particles or walls has a -thermostatting effect on the colliding particles, so it may not be -necessary to thermostat the SRD particles on a bin by bin basis in -that case. Also note that for streaming simulations, if no -thermostatting is performed (the default), then it may take a long -time for the SRD fluid to come to equilibrium with a velocity profile -that matches the simulation box deformation. +*Tsrd*\ . This is done every N timesteps, during the velocity rotation +operation, by rescaling the thermal velocities of particles in each SRD +bin to the desired temperature. Note that collisions of SRD particles with +big particles or walls have a thermostatting effect on the colliding particles, +so it may not be necessary to thermostat the SRD particles on a bin by bin +basis in that case. + +The *put* keyword controls how the thermostat operates if there is a streaming +velocity associated with the system, e.g. due to use of the +:doc:`fix deform ` command to perform a simulation undergoing +shear. The default case, *no*, is profile-biased: velocities relative to the +mean velocity of the bin are rescaled, and then the mean velocity of each bin +is set to the position-dependent streaming velocity, based on the coordinates +of the center of the SRD bin. This enforces a linear velocity profile. With +*yes*, after rescaling, the mean velocity of the bin is not changed, which +renders the thermostat profile-unbiased. Note that for streaming simulations, +if no thermostatting is performed (the default), it may take a long time for +the SRD fluid to come to equilibrium with a velocity profile that matches the +simulation box deformation. The *rescale* keyword enables rescaling of an SRD particle's velocity if it would travel more than 4 mean-free paths in an SRD timestep. If @@ -397,7 +403,7 @@ Default The option defaults are: *lamda* (:math:`\lambda`) is inferred from *Tsrd*, collision = noslip, overlap = no, inside = error, exact = yes, radius = 1.0, bounce = 0, search = hgrid, cubic = error 0.01, shift = no, tstat = -no, and rescale = yes. +no, put = no, and rescale = yes. ---------- diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index 4a869e05258..d372c9ac5fc 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -123,6 +123,7 @@ FixSRD::FixSRD(LAMMPS *lmp, int narg, char **arg) : shiftuser = SHIFT_NO; shiftseed = 0; tstat = 0; + putflag = 0; rescale_rotate = rescale_collide = 1; int iarg = 8; @@ -199,6 +200,10 @@ FixSRD::FixSRD(LAMMPS *lmp, int narg, char **arg) : if (iarg + 2 > narg) error->all(FLERR, "Illegal fix srd command"); tstat = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; + } else if (strcmp(arg[iarg], "put") == 0) { + if (iarg + 2 > narg) error->all(FLERR, "Illegal fix srd command: stream"); + putflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; } else if (strcmp(arg[iarg], "rescale") == 0) { if (iarg + 2 > narg) error->all(FLERR, "Illegal fix srd command"); if (strcmp(arg[iarg + 1], "no") == 0) @@ -232,6 +237,7 @@ FixSRD::FixSRD(LAMMPS *lmp, int narg, char **arg) : if (cubictol < 0.0 || cubictol > 1.0) error->all(FLERR, "Illegal fix srd command"); if ((shiftuser == SHIFT_YES || shiftuser == SHIFT_POSSIBLE) && shiftseed <= 0) error->all(FLERR, "Illegal fix srd command"); + if (putflag && !tstat) error->all(FLERR, "PUT requires tstat"); // initialize Marsaglia RNG with processor-unique seed @@ -961,7 +967,7 @@ void FixSRD::reset_velocities() int dof_temp = 1; int dof_tstat; if (tstat) { - if (deformflag) + if (deformflag && !putflag) dof_tstat = dof_temp = 0; else dof_tstat = 1; @@ -1020,7 +1026,7 @@ void FixSRD::reset_velocities() vave = vbin[i].vsum; - if (deformflag) { + if (deformflag && !putflag) { xlamda = vbin[i].xctr; vstream[0] = h_rate[0] * xlamda[0] + h_rate[5] * xlamda[1] + h_rate[4] * xlamda[2] + h_ratelo[0]; @@ -2829,6 +2835,7 @@ void FixSRD::parameterize() mesg += fmt::format(" SRD per actual grid cell = {:.8}\n", srd_per_cell); mesg += fmt::format(" SRD viscosity = {:.8}\n", viscosity); mesg += fmt::format(" big/SRD mass density ratio = {:.8}\n", mdratio); + mesg += fmt::format(" PUT = {}\n", putflag); utils::logmesg(lmp, mesg); } diff --git a/src/SRD/fix_srd.h b/src/SRD/fix_srd.h index 1a06b7cae4f..6c55cfbcd14 100644 --- a/src/SRD/fix_srd.h +++ b/src/SRD/fix_srd.h @@ -43,7 +43,7 @@ class FixSRD : public Fix { int me, nprocs; int bigexist, biggroup, biggroupbit; int collidestyle, lamdaflag, overlap, insideflag, exactflag, maxbounceallow; - int cubicflag, shiftuser, shiftseed, shiftflag, tstat; + int cubicflag, shiftuser, shiftseed, shiftflag, tstat, putflag; int rescale_rotate, rescale_collide; double gridsrd, gridsearch, lamda, radfactor, cubictol; int triclinic, change_size, change_shape, deformflag; From 989f5a39d9d060fe136a9831e66af25a46f9430b Mon Sep 17 00:00:00 2001 From: taenzel <66378296+taenzel@users.noreply.github.com> Date: Fri, 23 May 2025 17:36:13 +0200 Subject: [PATCH 015/604] Make new velocity remapping of fix srd compatible with mpi. --- src/SRD/fix_srd.cpp | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index d372c9ac5fc..e95ea0f3d0b 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -889,23 +889,16 @@ void FixSRD::reset_velocities() iz = MAX(iz, binlo[2]); iz = MIN(iz, binhi[2]); - // link first and last cells for PBCs, shift velocity if velocity remap - if (domain->xperiodic && ix == binhi[0]) { - ix = binlo[0]; - if (domain->deform_vremap) { + if (deformflag) { + // shift velocities in last bins + if (domain->xperiodic && ix == nbin1x) { v[i][0] -= h_rate[0]; } - } - if (domain->yperiodic && iy == binhi[1]) { - iy = binlo[1]; - if (domain->deform_vremap) { + if (domain->yperiodic && iy == nbin1y) { v[i][0] -= h_rate[5]; v[i][1] -= h_rate[1]; } - } - if (domain->zperiodic && iz == binhi[2]) { - iz = binlo[2]; - if (domain->deform_vremap) { + if (domain->zperiodic && iz == nbin1z) { v[i][0] -= h_rate[4]; v[i][1] -= h_rate[3]; v[i][2] -= h_rate[2]; @@ -1083,8 +1076,8 @@ void FixSRD::reset_velocities() } } - // undo velocity remap - if (deformflag && domain->deform_vremap) { + // undo velocity remap (only if using PUT or tstat no) + if (deformflag && (putflag || !tstat)) { domain->x2lamda(nlocal); for (i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { @@ -1098,14 +1091,14 @@ void FixSRD::reset_velocities() iz = MAX(iz, binlo[2]); iz = MIN(iz, binhi[2]); - if (domain->xperiodic && ix == binhi[0]) { + if (domain->xperiodic && ix == nbin1x) { v[i][0] += h_rate[0]; } - if (domain->yperiodic && iy == binhi[1]) { + if (domain->yperiodic && iy == nbin1y) { v[i][0] += h_rate[5]; v[i][1] += h_rate[1]; } - if (domain->zperiodic && iz == binhi[2]) { + if (domain->zperiodic && iz == nbin1z) { v[i][0] += h_rate[4]; v[i][1] += h_rate[3]; v[i][2] += h_rate[2]; @@ -2835,7 +2828,7 @@ void FixSRD::parameterize() mesg += fmt::format(" SRD per actual grid cell = {:.8}\n", srd_per_cell); mesg += fmt::format(" SRD viscosity = {:.8}\n", viscosity); mesg += fmt::format(" big/SRD mass density ratio = {:.8}\n", mdratio); - mesg += fmt::format(" PUT = {}\n", putflag); + mesg += fmt::format(" unbiased profile = {}\n", putflag); utils::logmesg(lmp, mesg); } From a22a0a19b2e204900214da4e950de4780ff81035 Mon Sep 17 00:00:00 2001 From: taenzel <66378296+taenzel@users.noreply.github.com> Date: Fri, 23 May 2025 17:37:19 +0200 Subject: [PATCH 016/604] Update keyword for profile-unbiased thermostat --- doc/src/fix_srd.rst | 6 +++--- src/SRD/fix_srd.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/fix_srd.rst b/doc/src/fix_srd.rst index 27a6c4388ec..e38c8f90df0 100644 --- a/doc/src/fix_srd.rst +++ b/doc/src/fix_srd.rst @@ -41,7 +41,7 @@ Syntax *possible* = shift depending on mean free path and bin size shiftseed = random # seed (positive integer) *tstat* value = *yes* or *no* = thermostat SRD particles or not - *put* value = *yes* or *no* = use profile-unbiased thermostat or not + *unbiased* value = *yes* or *no* = use profile-unbiased thermostat or not *rescale* value = *yes* or *no* or *rotate* or *collide* = rescaling of SRD velocities *yes* = rescale during velocity rotation and collisions *no* = no rescaling @@ -295,7 +295,7 @@ big particles or walls have a thermostatting effect on the colliding particles, so it may not be necessary to thermostat the SRD particles on a bin by bin basis in that case. -The *put* keyword controls how the thermostat operates if there is a streaming +The *unbiased* keyword controls how the thermostat operates if there is a streaming velocity associated with the system, e.g. due to use of the :doc:`fix deform ` command to perform a simulation undergoing shear. The default case, *no*, is profile-biased: velocities relative to the @@ -403,7 +403,7 @@ Default The option defaults are: *lamda* (:math:`\lambda`) is inferred from *Tsrd*, collision = noslip, overlap = no, inside = error, exact = yes, radius = 1.0, bounce = 0, search = hgrid, cubic = error 0.01, shift = no, tstat = -no, put = no, and rescale = yes. +no, unbiased = no, and rescale = yes. ---------- diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index e95ea0f3d0b..a17306b8d7d 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -200,7 +200,7 @@ FixSRD::FixSRD(LAMMPS *lmp, int narg, char **arg) : if (iarg + 2 > narg) error->all(FLERR, "Illegal fix srd command"); tstat = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg], "put") == 0) { + } else if (strcmp(arg[iarg], "unbiased") == 0) { if (iarg + 2 > narg) error->all(FLERR, "Illegal fix srd command: stream"); putflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; From 8aab1e79d422bf7db46c4658ed12e908451f040c Mon Sep 17 00:00:00 2001 From: megmcca Date: Tue, 17 Jun 2025 13:01:55 -0600 Subject: [PATCH 017/604] create basic files --- src/KOKKOS/pair_hybrid_scaled_kokkos.cpp | 796 +++++++++++++++++++++++ src/KOKKOS/pair_hybrid_scaled_kokkos.h | 65 ++ 2 files changed, 861 insertions(+) create mode 100644 src/KOKKOS/pair_hybrid_scaled_kokkos.cpp create mode 100644 src/KOKKOS/pair_hybrid_scaled_kokkos.h diff --git a/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp b/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp new file mode 100644 index 00000000000..9878262162f --- /dev/null +++ b/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp @@ -0,0 +1,796 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_hybrid_scaled_kokkos.h" + +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "input.h" +#include "memory.h" +#include "respa.h" +#include "update.h" +#include "variable.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairHybridScaledKokkos::PairHybridScaledKokkos(LAMMPS *lmp) : PairHybridKokkos(lmp), fsum(nullptr), tsum(nullptr), scaleval(nullptr), scaleidx(nullptr), + atomvar(nullptr), atomscale(nullptr) +{ + nmaxfsum = -1; + + // set comm size needed by this Pair (if atomscaleflag) + + comm_forward = 1; + +} + + +PairHybridScaledKokkos::~PairHybridScaledKokkos() +{ + memory->destroy(fsum); + memory->destroy(tsum); + delete[] scaleval; + delete[] scaleidx; + delete[] atomvar; + memory->destroy(atomscale); +} + +/* ---------------------------------------------------------------------- + call each sub-style's compute() or compute_outer() function + accumulate sub-style global/peratom energy/virial in hybrid + for global vflag = VIRIAL_PAIR: + each sub-style computes own virial[6] + sum sub-style virial[6] to hybrid's virial[6] + for global vflag = VIRIAL_FDOTR: + call sub-style with adjusted vflag to prevent it calling + virial_fdotr_compute() + hybrid calls virial_fdotr_compute() on final accumulated f +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::compute(int eflag, int vflag) +{ + int i, j, m, n; + + // update scale values from variables where needed + + const int nvars = scalevars.size(); + int atomscaleflag = 0; + if (nvars > 0) { + auto vals = new double[nvars]; + auto vars = new int[nvars]; + for (int k = 0; k < nvars; ++k) { + int m = input->variable->find(scalevars[k].c_str()); + if (m < 0) + error->all(FLERR, "Variable '{}' not found when updating scale factors", scalevars[k]); + + // for equal-style, compute variable, set variable index to -1 + if (input->variable->equalstyle(m)) { + vals[k] = input->variable->compute_equal(m); + vars[k] = -1; + // for atom-style, store variable index, set variable to 0.0, set atomscaleflag + } else if (input->variable->atomstyle(m)) { + vals[k] = 0.0; + vars[k] = m; + atomscaleflag = 1; + } else + error->all(FLERR, "Variable '{}' has incompatible style", scalevars[k]); + } + for (int k = 0; k < nstyles; ++k) { + if (scaleidx[k] >= 0) { + scaleval[k] = vals[scaleidx[k]]; + atomvar[k] = vars[scaleidx[k]]; + } + } + delete[] vals; + delete[] vars; + } + + // check if no_virial_fdotr_compute is set and global component of + // incoming vflag = VIRIAL_FDOTR + // if so, reset vflag as if global component were VIRIAL_PAIR + // necessary since one or more sub-styles cannot compute virial as F dot r + + if (no_virial_fdotr_compute && (vflag & VIRIAL_FDOTR)) + vflag = VIRIAL_PAIR | (vflag & ~VIRIAL_FDOTR); + + ev_init(eflag, vflag); + + // grow fsum array if needed, and copy existing forces (usually 0.0) to it. + + if (atom->nmax > nmaxfsum) { + memory->destroy(fsum); + if (atom->torque_flag) memory->destroy(tsum); + if (atomscaleflag) memory->destroy(atomscale); + nmaxfsum = atom->nmax; + memory->create(fsum, nmaxfsum, 3, "pair:fsum"); + if (atom->torque_flag) memory->create(tsum, nmaxfsum, 3, "pair:tsum"); + if (atomscaleflag) memory->create(atomscale, nmaxfsum, "pair:atomscale"); + } + const int nall = atom->nlocal + atom->nghost; + auto f = atom->f; + auto t = atom->torque; + for (i = 0; i < nall; ++i) { + fsum[i][0] = f[i][0]; + fsum[i][1] = f[i][1]; + fsum[i][2] = f[i][2]; + if (atom->torque_flag) { + tsum[i][0] = t[i][0]; + tsum[i][1] = t[i][1]; + tsum[i][2] = t[i][2]; + } + } + + // check if global component of incoming vflag = VIRIAL_FDOTR + // if so, reset vflag passed to substyle so VIRIAL_FDOTR is turned off + // necessary so substyle will not invoke virial_fdotr_compute() + + int vflag_substyle; + if (vflag & VIRIAL_FDOTR) + vflag_substyle = vflag & ~VIRIAL_FDOTR; + else + vflag_substyle = vflag; + + double *saved_special = save_special(); + + // check if we are running with r-RESPA using the hybrid keyword + + Respa *respa = nullptr; + respaflag = 0; + if (utils::strmatch(update->integrate_style, "^respa")) { + respa = dynamic_cast(update->integrate); + if (respa->nhybrid_styles > 0) respaflag = 1; + } + + for (m = 0; m < nstyles; m++) { + + // clear forces and torques + + memset(&f[0][0], 0, nall * 3 * sizeof(double)); + if (atom->torque_flag) memset(&t[0][0], 0, nall * 3 * sizeof(double)); + + set_special(m); + + if (!respaflag || (respaflag && respa->hybrid_compute[m])) { + + // invoke compute() unless compute flag is turned off or + // outerflag is set and sub-style has a compute_outer() method + + if (styles[m]->compute_flag == 0) continue; + if (outerflag && styles[m]->respa_enable) + styles[m]->compute_outer(eflag, vflag_substyle); + else + styles[m]->compute(eflag, vflag_substyle); + } + + // add scaled forces to global sum + const double scale = scaleval[m]; + + // if scale factor is constant or equal-style variable + if (scaleidx[m] < 0 || atomvar[m] < 0) { + for (i = 0; i < nall; ++i) { + fsum[i][0] += scale * f[i][0]; + fsum[i][1] += scale * f[i][1]; + fsum[i][2] += scale * f[i][2]; + if (atom->torque_flag) { + tsum[i][0] += scale * t[i][0]; + tsum[i][1] += scale * t[i][1]; + tsum[i][2] += scale * t[i][2]; + } + } + // if scale factor is atom-style variable + } else { + const int igroupall = 0; + input->variable->compute_atom(atomvar[m], igroupall, atomscale, 1, 0); + comm->forward_comm(this); + for (i = 0; i < nall; ++i) { + const double ascale = atomscale[i]; + fsum[i][0] += ascale * f[i][0]; + fsum[i][1] += ascale * f[i][1]; + fsum[i][2] += ascale * f[i][2]; + if (atom->torque_flag) { + tsum[i][0] += ascale * t[i][0]; + tsum[i][1] += ascale * t[i][1]; + tsum[i][2] += ascale * t[i][2]; + } + } + } + + restore_special(saved_special); + + // jump to next sub-style if r-RESPA does not want global accumulated data + + if (respaflag && !respa->tally_global) continue; + + if (eflag_global) { + eng_vdwl += scale * styles[m]->eng_vdwl; + eng_coul += scale * styles[m]->eng_coul; + } + if (vflag_global) { + for (n = 0; n < 6; n++) virial[n] += scale * styles[m]->virial[n]; + } + if (eflag_atom) { + n = atom->nlocal; + if (force->newton_pair) n += atom->nghost; + double *eatom_substyle = styles[m]->eatom; + for (i = 0; i < n; i++) eatom[i] += scale * eatom_substyle[i]; + } + if (vflag_atom) { + n = atom->nlocal; + if (force->newton_pair) n += atom->nghost; + double **vatom_substyle = styles[m]->vatom; + for (i = 0; i < n; i++) + for (j = 0; j < 6; j++) vatom[i][j] += scale * vatom_substyle[i][j]; + } + + // substyles may be CENTROID_SAME or CENTROID_AVAIL + + if (cvflag_atom) { + n = atom->nlocal; + if (force->newton_pair) n += atom->nghost; + if (styles[m]->centroidstressflag == CENTROID_AVAIL) { + double **cvatom_substyle = styles[m]->cvatom; + for (i = 0; i < n; i++) + for (j = 0; j < 9; j++) cvatom[i][j] += scale * cvatom_substyle[i][j]; + } else { + double **vatom_substyle = styles[m]->vatom; + for (i = 0; i < n; i++) { + for (j = 0; j < 6; j++) { cvatom[i][j] += scale * vatom_substyle[i][j]; } + for (j = 6; j < 9; j++) { cvatom[i][j] += scale * vatom_substyle[i][j - 3]; } + } + } + } + } + + // copy accumulated scaled forces to original force array + + for (i = 0; i < nall; ++i) { + f[i][0] = fsum[i][0]; + f[i][1] = fsum[i][1]; + f[i][2] = fsum[i][2]; + if (atom->torque_flag) { + t[i][0] = tsum[i][0]; + t[i][1] = tsum[i][1]; + t[i][2] = tsum[i][2]; + } + } + delete[] saved_special; + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + create one pair style for each arg in list +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::settings(int narg, char **arg) +{ + if (narg < 1) error->all(FLERR, "Illegal pair_style command"); + + if (lmp->kokkos && !utils::strmatch(force->pair_style, "^hybrid.*/kk$")) + error->all(FLERR, "Must use pair_style {}/kk with Kokkos", force->pair_style); + + if (atom->avec->forceclearflag) + error->all(FLERR, "Atom style is not compatible with pair_style hybrid/scaled"); + + // delete old lists, since cannot just change settings + + if (nstyles > 0) { + for (int m = 0; m < nstyles; m++) { + delete styles[m]; + delete[] keywords[m]; + delete[] special_lj[m]; + delete[] special_coul[m]; + } + delete[] styles; + delete[] cutmax_style; + delete[] keywords; + delete[] multiple; + delete[] special_lj; + delete[] special_coul; + delete[] compute_tally; + delete[] scaleval; + delete[] scaleidx; + delete[] atomvar; + scalevars.clear(); + } + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cutghost); + memory->destroy(nmap); + memory->destroy(map); + } + allocated = 0; + + // allocate list of sub-styles as big as possibly needed if no extra args + + styles = new Pair *[narg]; + cutmax_style = new double[narg]; + memset(cutmax_style, 0.0, narg * sizeof(double)); + keywords = new char *[narg]; + multiple = new int[narg]; + + special_lj = new double *[narg]; + special_coul = new double *[narg]; + compute_tally = new int[narg]; + + scaleval = new double[narg]; + scaleidx = new int[narg]; + atomvar = new int[narg]; + scalevars.reserve(narg); + + // allocate each sub-style + // allocate uses suffix, but don't store suffix version in keywords, + // else syntax in coeff() will not match + // call settings() with set of args that are not pair style names + // use force->pair_map to determine which args these are + + int iarg, jarg, dummy; + + iarg = 0; + nstyles = 0; + while (iarg < narg - 1) { + + // first process scale factor or variable + // scaleidx[k] < 0 indicates constant value, otherwise index in variable name list + // initialize atomvar[k] to -1, indicates not atom-style variable + + double val = 0.0; + int idx = -1; + if (utils::strmatch(arg[iarg], "^v_")) { + for (std::size_t i = 0; i < scalevars.size(); ++i) { + if (scalevars[i] == arg[iarg] + 2) { + idx = i; + break; + } + } + if (idx < 0) { + idx = scalevars.size(); + scalevars.emplace_back(arg[iarg] + 2); + } + } else { + val = utils::numeric(FLERR, arg[iarg], false, lmp); + } + scaleval[nstyles] = val; + scaleidx[nstyles] = idx; + atomvar[nstyles] = -1; + ++iarg; + + if (utils::strmatch(arg[iarg], "^hybrid")) + error->all(FLERR, "Pair style hybrid/scaled cannot have hybrid as an argument"); + if (strcmp(arg[iarg], "none") == 0) + error->all(FLERR, "Pair style hybrid/scaled cannot have none as an argument"); + + styles[nstyles] = force->new_pair(arg[iarg], 1, dummy); + keywords[nstyles] = force->store_style(arg[iarg], 0); + special_lj[nstyles] = special_coul[nstyles] = nullptr; + compute_tally[nstyles] = 1; + + // determine list of arguments for pair style settings + // by looking for the next known pair style name. + + jarg = iarg + 1; + while ((jarg < narg) && !force->pair_map->count(arg[jarg]) && + !lmp->match_style("pair", arg[jarg])) + jarg++; + + // decrement to account for scale factor except when last argument + + if (jarg < narg) --jarg; + + styles[nstyles]->settings(jarg - iarg - 1, arg + iarg + 1); + iarg = jarg; + nstyles++; + } + + // multiple[i] = 1 to M if sub-style used multiple times, else 0 + + for (int i = 0; i < nstyles; i++) { + int count = 0; + for (int j = 0; j < nstyles; j++) { + if (strcmp(keywords[j], keywords[i]) == 0) count++; + if (j == i) multiple[i] = count; + } + if (count == 1) multiple[i] = 0; + } + + // set pair flags from sub-style flags + + flags(); +} + +/* ---------------------------------------------------------------------- + call sub-style to compute single interaction + error if sub-style does not support single() call + since overlay could have multiple sub-styles, sum results explicitly +------------------------------------------------------------------------- */ + +double PairHybridScaledKokkos::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, + double factor_lj, double &fforce) +{ + if (nmap[itype][jtype] == 0) error->one(FLERR, "Invoked pair single on pair style none"); + + // update scale values from variables where needed + + const int nvars = scalevars.size(); + if (nvars > 0) { + auto vals = new double[nvars]; + auto vars = new int[nvars]; + for (int k = 0; k < nvars; ++k) { + int m = input->variable->find(scalevars[k].c_str()); + if (m < 0) + error->all(FLERR, "Variable '{}' not found when updating scale factors", scalevars[k]); + + // for equal-style, compute variable, set variable index to -1 + if (input->variable->equalstyle(m)) { + vals[k] = input->variable->compute_equal(m); + vars[k] = -1; + // for atom-style, store variable index, set variable to 0.0, set atomscaleflag + } else if (input->variable->atomstyle(m)) { + vals[k] = 0.0; + vars[k] = m; + } else + error->all(FLERR, "Variable '{}' has incompatible style", scalevars[k]); + } + for (int k = 0; k < nstyles; ++k) { + if (scaleidx[k] >= 0) { + scaleval[k] = vals[scaleidx[k]]; + atomvar[k] = vars[scaleidx[k]]; + } + } + delete[] vals; + delete[] vars; + } + + double fone; + fforce = 0.0; + double esum = 0.0; + + for (int m = 0; m < nmap[itype][jtype]; m++) { + auto pstyle = styles[map[itype][jtype][m]]; + if (rsq < pstyle->cutsq[itype][jtype]) { + if (pstyle->single_enable == 0) + error->one(FLERR, "Pair hybrid sub-style does not support single call"); + + if ((special_lj[map[itype][jtype][m]] != nullptr) || + (special_coul[map[itype][jtype][m]] != nullptr)) + error->one(FLERR, "Pair hybrid single() does not support per sub-style special_bond"); + + double scale = scaleval[map[itype][jtype][m]]; + esum += scale * pstyle->single(i, j, itype, jtype, rsq, factor_coul, factor_lj, fone); + + // if scale factor is constant or equal-style variable + if (scaleidx[m] < 0 || atomvar[m] < 0) { + fforce += scale * fone; + // if scale factor is atom-style variable, average i and j + } else { + const int igroupall = 0; + input->variable->compute_atom(atomvar[m], igroupall, atomscale, 1, 0); + comm->forward_comm(this); + const double ascale = 0.5 * (atomscale[i] + atomscale[j]); + fforce += ascale * fone; + } + } + } + + if (single_extra) copy_svector(itype, jtype); + return esum; +} + + +/* ---------------------------------------------------------------------- + call sub-style to compute born matrix interaction + error if sub-style does not support born_matrix call + since overlay could have multiple sub-styles, sum results explicitly +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::born_matrix(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, double &dupair, + double &du2pair) +{ + if (nmap[itype][jtype] == 0) error->one(FLERR, "Invoked pair born_matrix on pair style none"); + + // update scale values from variables where needed + + const int nvars = scalevars.size(); + if (nvars > 0) { + auto vals = new double[nvars]; + auto vars = new int[nvars]; + for (int k = 0; k < nvars; ++k) { + int m = input->variable->find(scalevars[k].c_str()); + if (m < 0) + error->all(FLERR, "Variable '{}' not found when updating scale factors", scalevars[k]); + + // for equal-style, compute variable, set variable index to -1 + if (input->variable->equalstyle(m)) { + vals[k] = input->variable->compute_equal(m); + vars[k] = -1; + // for atom-style, store variable index, set variable to 0.0, set atomscaleflag + } else if (input->variable->atomstyle(m)) { + vals[k] = 0.0; + vars[k] = m; + } else + error->all(FLERR, "Variable '{}' has incompatible style", scalevars[k]); + } + for (int k = 0; k < nstyles; ++k) { + if (scaleidx[k] >= 0) { + scaleval[k] = vals[scaleidx[k]]; + atomvar[k] = vars[scaleidx[k]]; + } + } + delete[] vals; + delete[] vars; + } + + double du, du2; + dupair = du2pair = 0.0; + + for (int m = 0; m < nmap[itype][jtype]; m++) { + auto pstyle = styles[map[itype][jtype][m]]; + if (rsq < pstyle->cutsq[itype][jtype]) { + if (pstyle->single_enable == 0) + error->one(FLERR, "Pair hybrid sub-style does not support single call"); + + if ((special_lj[map[itype][jtype][m]] != nullptr) || + (special_coul[map[itype][jtype][m]] != nullptr)) + error->one(FLERR, "Pair hybrid single() does not support per sub-style special_bond"); + + du = du2 = 0.0; + double scale = scaleval[map[itype][jtype][m]]; + pstyle->born_matrix(i, j, itype, jtype, rsq, factor_coul, factor_lj, du, du2); + + // if scale factor is constant or equal-style variable + if (scaleidx[m] < 0 || atomvar[m] < 0) { + dupair += scale * du; + du2pair += scale * du2; + // if scale factor is atom-style variable, average i and j + } else { + const int igroupall = 0; + input->variable->compute_atom(atomvar[m], igroupall, atomscale, 1, 0); + comm->forward_comm(this); + const double ascale = 0.5 * (atomscale[i] + atomscale[j]); + dupair += ascale * du; + du2pair += ascale * du2; + } + } + } +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::coeff(int narg, char **arg) +{ + if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21)); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); + utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); + + // 3rd arg = pair sub-style name + // 4th arg = pair sub-style index if name used multiple times + // allow for "none" as valid sub-style name + + int multflag = 0; + int m; + + for (m = 0; m < nstyles; m++) { + multflag = 0; + if (strcmp(arg[2],keywords[m]) == 0) { + if (multiple[m]) { + multflag = 1; + if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21)); + if (multiple[m] == utils::inumeric(FLERR,arg[3],false,lmp)) break; + else continue; + } else break; + } + } + + int none = 0; + if (m == nstyles) { + if (strcmp(arg[2],"none") == 0) none = 1; + else error->all(FLERR,"Pair coeff for hybrid has invalid style: {}", arg[2]); + } + + // move 1st/2nd args to 2nd/3rd args + // if multflag: move 1st/2nd args to 3rd/4th args + // just copy ptrs, since arg[] points into original input line + + arg[2+multflag] = arg[1]; + arg[1+multflag] = arg[0]; + + // ensure that one_coeff flag is honored + + if (!none && styles[m]->one_coeff) + if ((strcmp(arg[0],"*") != 0) || (strcmp(arg[1],"*") != 0)) + error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21)); + + // invoke sub-style coeff() starting with 1st remaining arg + + if (!none) styles[m]->coeff(narg-1-multflag,arg+1+multflag); + + // set setflag and which type pairs map to which sub-style + // if sub-style is none: set hybrid subflag, wipe out map + // else: set hybrid setflag & map only if substyle setflag is set + // if sub-style is new for type pair, add as multiple mapping + // if sub-style exists for type pair, don't add, just update coeffs + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + if (none) { + setflag[i][j] = 1; + nmap[i][j] = 0; + count++; + } else if (styles[m]->setflag[i][j]) { + int k; + for (k = 0; k < nmap[i][j]; k++) + if (map[i][j][k] == m) break; + if (k == nmap[i][j]) map[i][j][nmap[i][j]++] = m; + setflag[i][j] = 1; + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients" + utils::errorurl(21)); +} + + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::write_restart(FILE *fp) +{ + PairHybrid::write_restart(fp); + + fwrite(scaleval, sizeof(double), nstyles, fp); + fwrite(scaleidx, sizeof(int), nstyles, fp); + fwrite(atomvar, sizeof(int), nstyles, fp); + + int n = scalevars.size(); + fwrite(&n, sizeof(int), 1, fp); + for (auto &var : scalevars) { + n = var.size() + 1; + fwrite(&n, sizeof(int), 1, fp); + fwrite(var.c_str(), sizeof(char), n, fp); + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::read_restart(FILE *fp) +{ + PairHybrid::read_restart(fp); + + delete[] scaleval; + delete[] scaleidx; + delete[] atomvar; + scalevars.clear(); + scaleval = new double[nstyles]; + scaleidx = new int[nstyles]; + atomvar = new int[nstyles]; + + int n, me = comm->me; + if (me == 0) { + utils::sfread(FLERR, scaleval, sizeof(double), nstyles, fp, nullptr, error); + utils::sfread(FLERR, scaleidx, sizeof(int), nstyles, fp, nullptr, error); + utils::sfread(FLERR, atomvar, sizeof(int), nstyles, fp, nullptr, error); + } + MPI_Bcast(scaleval, nstyles, MPI_DOUBLE, 0, world); + MPI_Bcast(scaleidx, nstyles, MPI_INT, 0, world); + MPI_Bcast(atomvar, nstyles, MPI_INT, 0, world); + + char *tmp; + if (me == 0) utils::sfread(FLERR, &n, sizeof(int), 1, fp, nullptr, error); + MPI_Bcast(&n, 1, MPI_INT, 0, world); + scalevars.resize(n); + for (auto &scale : scalevars) { + if (me == 0) utils::sfread(FLERR, &n, sizeof(int), 1, fp, nullptr, error); + MPI_Bcast(&n, 1, MPI_INT, 0, world); + tmp = new char[n]; + if (me == 0) utils::sfread(FLERR, tmp, sizeof(char), n, fp, nullptr, error); + MPI_Bcast(tmp, n, MPI_CHAR, 0, world); + scale = tmp; + delete[] tmp; + } +} + + +/* ---------------------------------------------------------------------- + we need to handle Pair::svector special for hybrid/scaled +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::init_svector() +{ + // single_extra = list all sub-style single_extra + // allocate svector + + single_extra = 0; + for (int m = 0; m < nstyles; m++) + single_extra += styles[m]->single_extra; + + if (single_extra) { + delete [] svector; + svector = new double[single_extra]; + } +} + +/* ---------------------------------------------------------------------- + we need to handle Pair::svector special for hybrid/scaled +------------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::copy_svector(int itype, int jtype) +{ + int n=0; + Pair *this_style = nullptr; + + // fill svector array. + // copy data from active styles and use 0.0 for inactive ones + for (int m = 0; m < nstyles; m++) { + for (int k = 0; k < nmap[itype][jtype]; ++k) { + if (m == map[itype][jtype][k]) { + this_style = styles[m]; + } else { + this_style = nullptr; + } + } + for (int l = 0; l < styles[m]->single_extra; ++l) { + if (this_style) { + svector[n++] = this_style->svector[l]; + } else { + svector[n++] = 0.0; + } + } + } +} + + +/* ---------------------------------------------------------------------- */ + +int PairHybridScaledKokkos::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/, int * /*pbc*/) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = atomscale[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void PairHybridScaledKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) atomscale[i] = buf[m++]; +} \ No newline at end of file diff --git a/src/KOKKOS/pair_hybrid_scaled_kokkos.h b/src/KOKKOS/pair_hybrid_scaled_kokkos.h new file mode 100644 index 00000000000..3fc8926d6a3 --- /dev/null +++ b/src/KOKKOS/pair_hybrid_scaled_kokkos.h @@ -0,0 +1,65 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(hybrid/scaled/kk,PairHybridScaledKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_HYBRID_SCALED_KOKKOS_H +#define LMP_PAIR_HYBRID_SCALED_KOKKOS_H + +#include "pair_hybrid_kokkos.h" + +#include +#include + +namespace LAMMPS_NS { + +class PairHybridScaledKokkos : public PairHybridKokkos { + public: + PairHybridScaledKokkos(class LAMMPS *); + ~PairHybridScaledKokkos() override; + void compute(int, int) override; + void settings(int, char **) override; + void coeff(int, char **) override; + + void write_restart(FILE *) override; + void read_restart(FILE *) override; + double single(int, int, int, int, double, double, double, double &) override; + void born_matrix(int, int, int, int, double, double, double, double &, double &) override; + + void init_svector() override; + void copy_svector(int, int) override; + + int pack_forward_comm(int, int *, double *, int, int *) override; + void unpack_forward_comm(int, int, double *) override; + +protected: + double **fsum, **tsum; + double *scaleval; + int *scaleidx; + std::vector scalevars; + int nmaxfsum; + int *atomvar; // indices of atom-style variables + double *atomscale; // vector of atom-style variable values +}; + +} // namespace LAMMPS_NS + +#endif +#endif + + From b2c1435025eef474e1d95dc6df5ea876c3a368dc Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Fri, 20 Jun 2025 11:15:31 -0400 Subject: [PATCH 018/604] framework for JSON delete keyword needs actual output of molecules --- src/REAXFF/fix_reaxff_species.cpp | 112 +++++++++++++++++++++--------- src/REAXFF/fix_reaxff_species.h | 1 + src/platform.cpp | 14 ++++ src/platform.h | 8 +++ 4 files changed, 101 insertions(+), 34 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 5b32ca0528d..a0f83701826 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -60,6 +60,8 @@ static const char cite_reaxff_species_delete[] = " pages = {336-347}\n" "}\n\n"; +enum { NONE, NATIVE, JSON }; // output file type + /* ---------------------------------------------------------------------- */ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : @@ -156,7 +158,9 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // optional args filepos = filedel = nullptr; eleflag = posflag = padflag = 0; - delflag = specieslistflag = masslimitflag = 0; + delflag = NONE; + specieslistflag = masslimitflag = 0; + deljson_init = 0; delete_Nlimit = delete_Nsteps = 0; singlepos_opened = multipos_opened = del_opened = 0; @@ -194,9 +198,10 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // delete species } else if (strcmp(arg[iarg], "delete") == 0) { - delflag = 1; delete[] filedel; filedel = utils::strdup(arg[iarg + 1]); + if (platform::has_extension(filedel, "json")) delflag = JSON; + else delflag = NATIVE; if (comm->me == 0) { if (fdel) fclose(fdel); fdel = fopen(filedel, "w"); @@ -222,17 +227,26 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : del_species.resize(ndelspec); for (int i = 0; i < ndelspec; i++) del_species[i] = arg[iarg + 4 + i]; - if (comm->me == 0) { + if (comm->me == 0 && delflag == NATIVE) { fprintf(fdel, "Timestep"); for (int i = 0; i < ndelspec; i++) fprintf(fdel, "\t%s", del_species[i].c_str()); fprintf(fdel, "\n"); fflush(fdel); } iarg += ndelspec + 4; - } else error->all(FLERR, "Unknown fix reaxff/species delete option: {}", arg[iarg]); // rate limit when deleting molecules + + if (comm->me == 0 && delflag == JSON) { + fprintf(fdel, "{\n"); + fprintf(fdel, " \"application\": \"LAMMPS\",\n"); + fprintf(fdel, " \"format\": \"output\",\n"); + fprintf(fdel, " \"subformat\": \"fix reaxff/species: delete keyword\",\n"); + fprintf(fdel, " \"revision\": 1,\n"); + fprintf(fdel, " \"data\": [\n"); + fflush(fdel); + } } else if (strcmp(arg[iarg], "delete_rate_limit") == 0) { if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "fix reaxff/species delete_rate_limit", error); @@ -278,7 +292,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR, "Unknown fix reaxff/species keyword: {}", arg[iarg]); } - if (delflag && specieslistflag && masslimitflag) + if (delflag != NONE && specieslistflag && masslimitflag) error->all(FLERR, "Incompatible combination fix reaxff/species command options"); if (delete_Nsteps > 0) { @@ -317,6 +331,7 @@ FixReaxFFSpecies::~FixReaxFFSpecies() else fclose(fp); if (posflag && multipos_opened) fclose(pos); + if (delflag == JSON) fprintf(fdel, " }\n ]\n}"); if (fdel) fclose(fdel); } @@ -471,7 +486,7 @@ void FixReaxFFSpecies::Output_ReaxFF_Bonds(bigint ntimestep, FILE * /*fp*/) if (comm->me == 0) fflush(pos); } - if (delflag && nvalid != -1) { + if (delflag != NONE && nvalid != -1) { DeleteSpecies(Nmole, Nspec); // reset molecule ID to index from 1 @@ -1087,39 +1102,68 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) MPI_Reduce(deletecount, deletecount, ndelcomm, MPI_DOUBLE, MPI_SUM, 0, world); if (comm->me == 0) { - if (masslimitflag) { - int printflag = 0; - for (int m = 0; m < Nspec; m++) { - if (deletecount[m] > 0) { - if (printflag == 0) { - utils::print(fdel, "Timestep {}", update->ntimestep); - printflag = 1; - } - fprintf(fdel, " %g ", deletecount[m]); - for (j = 0; j < nutypes; j++) { - int itemp = MolName[nutypes * m + j]; - if (itemp != 0) { - fprintf(fdel, "%s", ueletype[j].c_str()); - if (itemp != 1) fprintf(fdel, "%d", itemp); + if (delflag == NATIVE) { + if (masslimitflag) { + int printflag = 0; + for (int m = 0; m < Nspec; m++) { + if (deletecount[m] > 0) { + if (printflag == 0) { + utils::print(fdel, "Timestep {}", update->ntimestep); + printflag = 1; + } + fprintf(fdel, " %g ", deletecount[m]); + for (j = 0; j < nutypes; j++) { + int itemp = MolName[nutypes * m + j]; + if (itemp != 0) { + fprintf(fdel, "%s", ueletype[j].c_str()); + if (itemp != 1) fprintf(fdel, "%d", itemp); + } } } } + if (printflag) { + fprintf(fdel, "\n"); + fflush(fdel); + } + } else { + int writeflag = 0; + for (i = 0; i < ndelspec; i++) + if (deletecount[i]) writeflag = 1; + + if (writeflag) { + utils::print(fdel, "{}", update->ntimestep); + for (i = 0; i < ndelspec; i++) { fprintf(fdel, "\t%g", deletecount[i]); } + fprintf(fdel, "\n"); + fflush(fdel); + } } - if (printflag) { - fprintf(fdel, "\n"); - fflush(fdel); - } - } else { - int writeflag = 0; - for (i = 0; i < ndelspec; i++) - if (deletecount[i]) writeflag = 1; - - if (writeflag) { - utils::print(fdel, "{}", update->ntimestep); - for (i = 0; i < ndelspec; i++) { fprintf(fdel, "\t%g", deletecount[i]); } - fprintf(fdel, "\n"); - fflush(fdel); + } else if (delflag == JSON) { + std::string indent; + int json_level = 2, tab = 4; + indent.resize(json_level*tab, ' '); + if (deljson_init == 1) { + fprintf(fdel, "%s},\n%s{\n", indent.c_str(), indent.c_str()); + } else { + fprintf(fdel, "%s{\n", indent.c_str()); + deljson_init = 1; } + + indent.resize(++json_level*tab, ' '); + utils::print(fdel, "{}\"timestep\": {},\n", indent, update->ntimestep); + utils::print(fdel, "{}\"deleted_molecules\": [\n", indent); + + indent.resize(++json_level*tab, ' '); + fprintf(fdel, "%s{\n", indent.c_str()); + + indent.resize(++json_level*tab, ' '); + utils::print(fdel, "{}Deleted_Molecule\n", indent); + + indent.resize(--json_level*tab, ' '); + fprintf(fdel, "%s},\n", indent.c_str()); + + indent.resize(--json_level*tab, ' '); + fprintf(fdel, "%s]\n", indent.c_str()); + fflush(fdel); } } diff --git a/src/REAXFF/fix_reaxff_species.h b/src/REAXFF/fix_reaxff_species.h index d378065a82e..6ea51fa2c95 100644 --- a/src/REAXFF/fix_reaxff_species.h +++ b/src/REAXFF/fix_reaxff_species.h @@ -57,6 +57,7 @@ class FixReaxFFSpecies : public Fix { FILE *fp, *pos, *fdel; int eleflag, posflag, multipos, padflag, setupflag; int delflag, specieslistflag, masslimitflag; + int deljson_init; int delete_Nlimit, delete_Nlimit_varid; std::string delete_Nlimit_varname; int delete_Nsteps, *delete_Tcount; diff --git a/src/platform.cpp b/src/platform.cpp index 8e6b6c72e61..cc6d69d832b 100644 --- a/src/platform.cpp +++ b/src/platform.cpp @@ -1110,6 +1110,20 @@ double platform::disk_free(const std::string &path) return bytes_free; } +/* ---------------------------------------------------------------------- + check if filename has the provided extension +------------------------------------------------------------------------- */ + +bool platform::has_extension(const std::string &file, const std::string &extension) +{ + std::size_t dot = file.find_last_of('.'); + if (dot != std::string::npos) { + const std::string ext = file.substr(dot + 1); + if (ext == extension) return true; + } + return false; +} + /* ---------------------------------------------------------------------- check if filename has a known compression extension ------------------------------------------------------------------------- */ diff --git a/src/platform.h b/src/platform.h index e693e328de8..f03782fead2 100644 --- a/src/platform.h +++ b/src/platform.h @@ -393,6 +393,14 @@ namespace platform { double disk_free(const std::string &path); + /*! Check if a file name ends in the provided extension + * + * \param file name of the file to check + * \param extension the file extension to check for + * \return true if the file has the provided extension, otherwise false */ + + bool has_extension(const std::string &file, const std::string &extension); + /*! Check if a file name ends in a known extension for a compressed file format * * Currently supported file extensions are: .gz, .bz2, .zst, .xz, .lzma, lz4 From 42edcfc01f7ef76e5c0dd90b04b020364ceb7b1f Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 22 Jun 2025 11:13:34 -0400 Subject: [PATCH 019/604] use utils for json extension matching might be better to still have 'has_extension' wrapper in platform, due to ease of and grepping for it --- src/REAXFF/fix_reaxff_species.cpp | 8 +++++++- src/REAXFF/fix_reaxff_species.h | 1 + src/platform.cpp | 14 -------------- src/platform.h | 8 -------- 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index a0f83701826..070b337ef0e 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -200,7 +200,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : } else if (strcmp(arg[iarg], "delete") == 0) { delete[] filedel; filedel = utils::strdup(arg[iarg + 1]); - if (platform::has_extension(filedel, "json")) delflag = JSON; + if (utils::strmatch(filedel, "\\.json$")) delflag = JSON; else delflag = NATIVE; if (comm->me == 0) { if (fdel) fclose(fdel); @@ -239,6 +239,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // rate limit when deleting molecules if (comm->me == 0 && delflag == JSON) { + // header for 'delete' keyword JSON output fprintf(fdel, "{\n"); fprintf(fdel, " \"application\": \"LAMMPS\",\n"); fprintf(fdel, " \"format\": \"output\",\n"); @@ -246,6 +247,11 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : fprintf(fdel, " \"revision\": 1,\n"); fprintf(fdel, " \"data\": [\n"); fflush(fdel); + + std::string id_fix1 = "_internal_prop-atom_fix_reaxff_species"; + mols4json = "_ivec_internal_prop-atom_fix_reaxff_species"; + if (!modify->get_fix_by_id(id_fix1)) + modify->add_fix(id_fix1 + " all property/atom i_" + mols4json + " ghost yes"); } } else if (strcmp(arg[iarg], "delete_rate_limit") == 0) { if (iarg + 3 > narg) diff --git a/src/REAXFF/fix_reaxff_species.h b/src/REAXFF/fix_reaxff_species.h index 6ea51fa2c95..681b7e18f5e 100644 --- a/src/REAXFF/fix_reaxff_species.h +++ b/src/REAXFF/fix_reaxff_species.h @@ -64,6 +64,7 @@ class FixReaxFFSpecies : public Fix { double massmin, massmax; int singlepos_opened, multipos_opened, del_opened; char *filepos, *filedel; + std::string mols4json; // name of 'fix property/atom' vector to identify molecules for JSON output std::vector ele2uele; // for element eletype[i], ele2uele[i] stores index of unique element std::vector eletype; // list of ReaxFF elements of length ntypes std::vector ueletype; // list of unique elements, of quantity nutypes diff --git a/src/platform.cpp b/src/platform.cpp index cc6d69d832b..8e6b6c72e61 100644 --- a/src/platform.cpp +++ b/src/platform.cpp @@ -1110,20 +1110,6 @@ double platform::disk_free(const std::string &path) return bytes_free; } -/* ---------------------------------------------------------------------- - check if filename has the provided extension -------------------------------------------------------------------------- */ - -bool platform::has_extension(const std::string &file, const std::string &extension) -{ - std::size_t dot = file.find_last_of('.'); - if (dot != std::string::npos) { - const std::string ext = file.substr(dot + 1); - if (ext == extension) return true; - } - return false; -} - /* ---------------------------------------------------------------------- check if filename has a known compression extension ------------------------------------------------------------------------- */ diff --git a/src/platform.h b/src/platform.h index f03782fead2..e693e328de8 100644 --- a/src/platform.h +++ b/src/platform.h @@ -393,14 +393,6 @@ namespace platform { double disk_free(const std::string &path); - /*! Check if a file name ends in the provided extension - * - * \param file name of the file to check - * \param extension the file extension to check for - * \return true if the file has the provided extension, otherwise false */ - - bool has_extension(const std::string &file, const std::string &extension); - /*! Check if a file name ends in a known extension for a compressed file format * * Currently supported file extensions are: .gz, .bz2, .zst, .xz, .lzma, lz4 From 5e9a16dec588e396d1af71f0aa2ca55909b3708b Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Fri, 27 Jun 2025 23:46:02 -0400 Subject: [PATCH 020/604] working v0 --- src/REAXFF/fix_reaxff_species.cpp | 162 +++++++++++++------------- src/REAXFF/fix_reaxff_species.h | 4 +- src/label_map.h | 3 +- src/output.cpp | 186 ++++++++++++++++++++++++++++++ src/output.h | 20 +++- 5 files changed, 282 insertions(+), 93 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 070b337ef0e..42eca57892d 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -34,6 +34,7 @@ #include "modify.h" #include "neigh_list.h" #include "neighbor.h" +#include "output.h" #include "update.h" #include "variable.h" @@ -68,7 +69,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), Name(nullptr), MolName(nullptr), NMol(nullptr), nd(nullptr), MolType(nullptr), molmap(nullptr), mark(nullptr), Mol2Spec(nullptr), clusterID(nullptr), x0(nullptr), BOCut(nullptr), fp(nullptr), pos(nullptr), fdel(nullptr), delete_Tcount(nullptr), - filepos(nullptr), filedel(nullptr) + filepos(nullptr) { if (narg < 7) utils::missing_cmd_args(FLERR, "fix reaxff/species", error); @@ -156,7 +157,7 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : for (int j = 1; j < np1; j++) BOCut[i][j] = bo_cut; // optional args - filepos = filedel = nullptr; + filepos = nullptr; eleflag = posflag = padflag = 0; delflag = NONE; specieslistflag = masslimitflag = 0; @@ -198,13 +199,12 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // delete species } else if (strcmp(arg[iarg], "delete") == 0) { - delete[] filedel; - filedel = utils::strdup(arg[iarg + 1]); + filedel = arg[iarg + 1]; if (utils::strmatch(filedel, "\\.json$")) delflag = JSON; else delflag = NATIVE; if (comm->me == 0) { if (fdel) fclose(fdel); - fdel = fopen(filedel, "w"); + fdel = fopen(filedel.c_str(), "w"); if (!fdel) error->one(FLERR, "Cannot open fix reaxff/species delete file {}: {}", filedel, utils::getsyserror()); @@ -238,21 +238,19 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR, "Unknown fix reaxff/species delete option: {}", arg[iarg]); // rate limit when deleting molecules - if (comm->me == 0 && delflag == JSON) { - // header for 'delete' keyword JSON output - fprintf(fdel, "{\n"); - fprintf(fdel, " \"application\": \"LAMMPS\",\n"); - fprintf(fdel, " \"format\": \"output\",\n"); - fprintf(fdel, " \"subformat\": \"fix reaxff/species: delete keyword\",\n"); - fprintf(fdel, " \"revision\": 1,\n"); - fprintf(fdel, " \"data\": [\n"); - fflush(fdel); - - std::string id_fix1 = "_internal_prop-atom_fix_reaxff_species"; - mols4json = "_ivec_internal_prop-atom_fix_reaxff_species"; - if (!modify->get_fix_by_id(id_fix1)) - modify->add_fix(id_fix1 + " all property/atom i_" + mols4json + " ghost yes"); + if (delflag == JSON) { + if (comm->me == 0) { + // header for 'delete' keyword JSON output + fprintf(fdel, "{\n"); + fprintf(fdel, " \"application\": \"LAMMPS\",\n"); + fprintf(fdel, " \"format\": \"output\",\n"); + fprintf(fdel, " \"subformat\": \"fix reaxff/species: delete keyword\",\n"); + fprintf(fdel, " \"revision\": 1,\n"); + fprintf(fdel, " \"run_output\": [\n"); + fflush(fdel); + } } + } else if (strcmp(arg[iarg], "delete_rate_limit") == 0) { if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "fix reaxff/species delete_rate_limit", error); @@ -329,7 +327,6 @@ FixReaxFFSpecies::~FixReaxFFSpecies() memory->destroy(delete_Tcount); delete[] filepos; - delete[] filedel; if (comm->me == 0) { if (compressed) @@ -981,7 +978,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) } int j, m, n, itype, cid; - int ndel, ndelone, count, count_tmp; + int count, count_tmp; int *Nameall; int *mask = atom->mask; double *mass = atom->mass; @@ -1067,7 +1064,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) if (totalmass > massmin && totalmass < massmax) { this_delete_Tcount++; for (j = 0; j < nmarklist; j++) { - mark[marklist[j]] = 1; + mark[marklist[j]] = m; deletecount[Mol2Spec[m - 1]] += 1.0 / (double) count; } } @@ -1077,7 +1074,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) if (del_species[i] == species_str) { this_delete_Tcount++; for (j = 0; j < nmarklist; j++) { - mark[marklist[j]] = 1; + mark[marklist[j]] = m; deletecount[i] += 1.0 / (double) count; } break; @@ -1087,56 +1084,38 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) } } - // delete atoms. loop in reverse order to avoid copying marked atoms - - ndel = ndelone = 0; - for (i = atom->nlocal - 1; i >= 0; i--) { - if (mark[i] == 1) { - avec->copy(atom->nlocal - 1, i, 1); - atom->nlocal--; - ndelone++; - } - } - - MPI_Allreduce(&ndelone, &ndel, 1, MPI_INT, MPI_SUM, world); - - atom->natoms -= ndel; - if (comm->me == 0) MPI_Reduce(MPI_IN_PLACE, deletecount, ndelcomm, MPI_DOUBLE, MPI_SUM, 0, world); else MPI_Reduce(deletecount, deletecount, ndelcomm, MPI_DOUBLE, MPI_SUM, 0, world); - if (comm->me == 0) { + int printflag = 0; + if (comm->me == 0) + for (int m = 0; m < ndelcomm; m++) + if (deletecount[m] > 0) { printflag = 1; break; } + + MPI_Bcast(&printflag, 1, MPI_INT, 0, world); + + if (printflag) { if (delflag == NATIVE) { - if (masslimitflag) { - int printflag = 0; - for (int m = 0; m < Nspec; m++) { - if (deletecount[m] > 0) { - if (printflag == 0) { - utils::print(fdel, "Timestep {}", update->ntimestep); - printflag = 1; - } - fprintf(fdel, " %g ", deletecount[m]); - for (j = 0; j < nutypes; j++) { - int itemp = MolName[nutypes * m + j]; - if (itemp != 0) { - fprintf(fdel, "%s", ueletype[j].c_str()); - if (itemp != 1) fprintf(fdel, "%d", itemp); + if (comm->me == 0) { + if (masslimitflag) { + utils::print(fdel, "Timestep {}", update->ntimestep); + for (int m = 0; m < Nspec; m++) { + if (deletecount[m] > 0) { + fprintf(fdel, " %g ", deletecount[m]); + for (j = 0; j < nutypes; j++) { + int itemp = MolName[nutypes * m + j]; + if (itemp != 0) { + fprintf(fdel, "%s", ueletype[j].c_str()); + if (itemp != 1) fprintf(fdel, "%d", itemp); + } } } } - } - if (printflag) { fprintf(fdel, "\n"); fflush(fdel); - } - } else { - int writeflag = 0; - for (i = 0; i < ndelspec; i++) - if (deletecount[i]) writeflag = 1; - - if (writeflag) { + } else if (specieslistflag) { utils::print(fdel, "{}", update->ntimestep); for (i = 0; i < ndelspec; i++) { fprintf(fdel, "\t%g", deletecount[i]); } fprintf(fdel, "\n"); @@ -1146,39 +1125,54 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) } else if (delflag == JSON) { std::string indent; int json_level = 2, tab = 4; - indent.resize(json_level*tab, ' '); - if (deljson_init == 1) { - fprintf(fdel, "%s},\n%s{\n", indent.c_str(), indent.c_str()); - } else { - fprintf(fdel, "%s{\n", indent.c_str()); - deljson_init = 1; + + if (comm->me == 0) { + indent.resize(json_level*tab, ' '); + if (deljson_init == 1) { + fprintf(fdel, "%s},\n%s{\n", indent.c_str(), indent.c_str()); + } else { + fprintf(fdel, "%s{\n", indent.c_str()); + deljson_init = 1; + } + + indent.resize(++json_level*tab, ' '); + utils::print(fdel, "{}\"timestep\": {},\n", indent, update->ntimestep); + utils::print(fdel, "{}\"deleted_molecules\": [\n", indent); + + indent.resize(++json_level*tab, ' '); } + + output->write_molecule_json(fdel, json_level, mark); + + if (comm->me == 0) { + indent.resize(--json_level*tab, ' '); + fprintf(fdel, "%s]\n", indent.c_str()); + fflush(fdel); + } + } + } - indent.resize(++json_level*tab, ' '); - utils::print(fdel, "{}\"timestep\": {},\n", indent, update->ntimestep); - utils::print(fdel, "{}\"deleted_molecules\": [\n", indent); - - indent.resize(++json_level*tab, ' '); - fprintf(fdel, "%s{\n", indent.c_str()); - - indent.resize(++json_level*tab, ' '); - utils::print(fdel, "{}Deleted_Molecule\n", indent); - - indent.resize(--json_level*tab, ' '); - fprintf(fdel, "%s},\n", indent.c_str()); + // delete atoms. loop in reverse order to avoid copying marked atoms - indent.resize(--json_level*tab, ' '); - fprintf(fdel, "%s]\n", indent.c_str()); - fflush(fdel); + int ndel, ndelone = 0; + for (i = atom->nlocal - 1; i >= 0; i--) { + if (mark[i] > 0) { + avec->copy(atom->nlocal - 1, i, 1); + atom->nlocal--; + ndelone++; } } + MPI_Allreduce(&ndelone, &ndel, 1, MPI_INT, MPI_SUM, world); + + atom->natoms -= ndel; + // push back delete_Tcount on every step if (delete_Nsteps > 0) { for (i = delete_Nsteps - 1; i > 0; i--) delete_Tcount[i] = delete_Tcount[i - 1]; delete_Tcount[0] += this_delete_Tcount; } - + if (ndel && (atom->map_style != Atom::MAP_NONE)) { atom->nghost = 0; atom->map_init(); diff --git a/src/REAXFF/fix_reaxff_species.h b/src/REAXFF/fix_reaxff_species.h index 681b7e18f5e..1c4b5b173e9 100644 --- a/src/REAXFF/fix_reaxff_species.h +++ b/src/REAXFF/fix_reaxff_species.h @@ -63,8 +63,8 @@ class FixReaxFFSpecies : public Fix { int delete_Nsteps, *delete_Tcount; double massmin, massmax; int singlepos_opened, multipos_opened, del_opened; - char *filepos, *filedel; - std::string mols4json; // name of 'fix property/atom' vector to identify molecules for JSON output + char *filepos; + std::string filedel; std::vector ele2uele; // for element eletype[i], ele2uele[i] stores index of unique element std::vector eletype; // list of ReaxFF elements of length ntypes std::vector ueletype; // list of unique elements, of quantity nutypes diff --git a/src/label_map.h b/src/label_map.h index a10344cd4bb..4af80832538 100644 --- a/src/label_map.h +++ b/src/label_map.h @@ -43,10 +43,11 @@ class LabelMap : protected Pointers { void read_restart(FILE *fp); void write_restart(FILE *); - protected: int natomtypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes; std::vector typelabel, btypelabel, atypelabel; std::vector dtypelabel, itypelabel; + + protected: std::unordered_map typelabel_map; std::unordered_map btypelabel_map; std::unordered_map atypelabel_map; diff --git a/src/output.cpp b/src/output.cpp index 8bac7662130..df8042eacd6 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -19,6 +19,7 @@ #include "output.h" #include "style_dump.h" // IWYU pragma: keep +#include "atom.h" #include "comm.h" #include "domain.h" #include "dump.h" @@ -26,6 +27,7 @@ #include "group.h" #include "info.h" #include "input.h" +#include "label_map.h" #include "memory.h" #include "modify.h" #include "thermo.h" @@ -628,6 +630,190 @@ void Output::write_restart(bigint ntimestep) last_restart = ntimestep; } +/* ---------------------------------------------------------------------- + write molecule JSON objects to file based on per-atom array + atoms with integer array value of 0 assumed to not belong to a molecule +------------------------------------------------------------------------- */ + +void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) +{ + std::string indent; + int tab = 4; + indent.resize(json_level*tab, ' '); + + // get max ivec value + + int local_max = 0; + for (int i = 0; i < atom->nlocal; i++) + local_max = MAX(local_max, ivec[i]); + + int global_max; + MPI_Allreduce(&local_max, &global_max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + int iglobal_max = global_max + 1; // (!) here is where assumes outer-loop index is value of ivec (!) + + // (!) need ERROR check: assumes max value (Nmax) is roughly equal to number of values (!) + + // let's break all operations up into natoms/nproc chunks (pagesize) + + int pagesize = atom->natoms / comm->nprocs; + + // first, get pagesize number of counts for each value + + std::vector atoms_local(pagesize); + std::vector atoms_root(pagesize); + std::vector local_ivcounts(pagesize); + std::vector global_ivcounts(pagesize); + std::fill(global_ivcounts.begin(), global_ivcounts.end(), 0); + MPI_Datatype ParticleStructType = createParticleStructType(); + + int ivstart = 1; + int ivend = pagesize; + if (ivend > iglobal_max) ivend = iglobal_max; + int json_init = 0; + while (true) { + std::fill(local_ivcounts.begin(), local_ivcounts.end(), 0); + for (int i = 0; i < atom->nlocal; i++) + for (int ival = ivstart; ival < ivend; ival++) + if (ivec[i] == ival) local_ivcounts[ival-ivstart]++; // (!) here is where assumes outer-loop index is value of ivec (!) + + MPI_Allreduce(local_ivcounts.data(), global_ivcounts.data(), pagesize, MPI_INT, MPI_SUM, MPI_COMM_WORLD); //sometimes nsend pagesize) printf("WARNING: you triggered a bug, please contact developer\n"); // (?) need to skip and warn if single molecule bigger than pagesize + if (cumsum > pagesize) { + subend = i-1; + breakflag = 0; + break; + } + } + + int n2recv; + int n2send = 0; + for (int i = substart; i < subend; i++) + n2send += local_ivcounts[i]; + if (comm->me != 0) MPI_Send(&n2send, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); + Particle myatom; + int iloc = 0; + for (int i = 0; i < atom->nlocal; i++) { + if (ivec[i] >= ivstart+substart && ivec[i] < ivstart+subend ) { // (!) here is where assumes outer-loop index is value of ivec (!) + myatom.ival = ivec[i]; + myatom.type = atom->type[i]; + myatom.tag = atom->tag[i]; + for (int k = 0; k < 3; k++) + myatom.x[k] = atom->x[i][k]; + atoms_local[iloc++] = myatom; + } + } + if (comm->me != 0) MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); + + if (comm->me == 0) { + int n2print = 0; + for (int i = 1; i < comm->nprocs; i++) { + MPI_Recv(&n2recv, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&atoms_root[n2print], n2recv, ParticleStructType, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + n2print += n2recv; + } + // add atoms already on root + for (int i = 0; i < n2send; i++) + atoms_root[n2print++] = atoms_local[i]; + + for (int j = ivstart; j < ivend; j++) { + if (global_ivcounts[j-ivstart+substart] == 0) continue; //yikes + if (json_init == 1) { + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s},\n%s{\n", indent.c_str(), indent.c_str()); + } else { + fprintf(fp, "%s{\n", indent.c_str()); + json_init = 1; + } + indent.resize(++json_level*tab, ' '); + fprintf(fp, "%s\"types\": {\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + //fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); + fprintf(fp, "%s\"data\": [\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + for (int i = 0; i < n2print; i++) { + if (atoms_root[i].ival == j) { + int mytype = atoms_root[i].type; + std::string typestr = std::to_string(mytype); + if (atom->labelmapflag) typestr = atom->lmap->typelabel[mytype-1]; + utils::print(fp, "{}[{}, \"{}\"]", indent, atoms_root[i].tag, typestr); + if (i < n2print-1) fprintf(fp, ",\n"); + else fprintf(fp, "\n"); + } + } + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s]\n", indent.c_str()); + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s},\n", indent.c_str()); + fprintf(fp, "%s\"coords\": {\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + //fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); + fprintf(fp, "%s\"data\": [\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + for (int i = 0; i < n2print; i++) { + if (atoms_root[i].ival == j) { + utils::print(fp, "{}[{}, {}, {}, {}]", indent, atoms_root[i].tag, + atoms_root[i].x[0], atoms_root[i].x[1], atoms_root[i].x[2]); + if (i < n2print-1) fprintf(fp, ",\n"); + else fprintf(fp, "\n"); + } + } + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s]\n", indent.c_str()); + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s}\n", indent.c_str()); + } + } + if (breakflag) break; + substart = subend+1; + } + + if (ivend == iglobal_max) { + if (json_init == 1) { + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s}\n", indent.c_str()); + } + break; + } + ivstart += pagesize; + ivend += pagesize; + if (ivend > iglobal_max) ivend = iglobal_max; + } +} + +/* ---------------------------------------------------------------------- + create Particle struct type for MPI +------------------------------------------------------------------------- */ + +MPI_Datatype Output::createParticleStructType() { + MPI_Datatype ParticleStructType; + + const int nfields = 4; + int blocklengths[nfields] = {1, 1, 1, 3}; + MPI_Aint offsets[nfields]; + offsets[0] = offsetof(Particle, ival); + offsets[1] = offsetof(Particle, tag); + offsets[2] = offsetof(Particle, type); + offsets[3] = offsetof(Particle, x); + MPI_Datatype types[nfields] = {MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE}; + + MPI_Type_create_struct(nfields, blocklengths, offsets, types, &ParticleStructType); + MPI_Type_commit(&ParticleStructType); + + return ParticleStructType; +} + /* ---------------------------------------------------------------------- timestep is being changed, called by update->reset_timestep() for dumps, require that no dump is "active" diff --git a/src/output.h b/src/output.h index 32f3a0a48ad..34093bac0b2 100644 --- a/src/output.h +++ b/src/output.h @@ -70,15 +70,23 @@ class Output : protected Pointers { typedef std::map DumpCreatorMap; DumpCreatorMap *dump_map; + typedef struct Particle { + int ival; // atom's value from per-atom vector + int tag, type; + double x[3]; + } Particle; + MPI_Datatype createParticleStructType(); + Output(class LAMMPS *); ~Output() override; void init(); - void setup(int memflag = 1); // initial output before run/min - void write(bigint); // output for current timestep - void write_dump(bigint); // force output of dump snapshots - void write_restart(bigint); // force output of a restart file - void reset_timestep(bigint); // reset output which depends on timestep - void reset_dt(); // reset output which depends on timestep size + void setup(int memflag = 1); // initial output before run/min + void write(bigint); // output for current timestep + void write_dump(bigint); // force output of dump snapshots + void write_restart(bigint); // force output of a restart file + void write_molecule_json(FILE *, int, int *); // output molecule JSON objects to file + void reset_timestep(bigint); // reset output which depends on timestep + void reset_dt(); // reset output which depends on timestep size Dump *add_dump(int, char **); // add a Dump to Dump list void modify_dump(int, char **); // modify a Dump From 7739632ee10305f784148a37dd5fc7c377a324f0 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 28 Jun 2025 00:34:25 -0400 Subject: [PATCH 021/604] better version --- src/REAXFF/fix_reaxff_species.cpp | 16 +-- src/output.cpp | 221 ++++++++++++------------------ src/output.h | 4 +- 3 files changed, 97 insertions(+), 144 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 42eca57892d..6f68202ebcc 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -1091,7 +1091,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) int printflag = 0; if (comm->me == 0) - for (int m = 0; m < ndelcomm; m++) + for (int m = 0; m < ndelcomm; m++) if (deletecount[m] > 0) { printflag = 1; break; } MPI_Bcast(&printflag, 1, MPI_INT, 0, world); @@ -1125,7 +1125,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) } else if (delflag == JSON) { std::string indent; int json_level = 2, tab = 4; - + if (comm->me == 0) { indent.resize(json_level*tab, ' '); if (deljson_init == 1) { @@ -1134,17 +1134,17 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) fprintf(fdel, "%s{\n", indent.c_str()); deljson_init = 1; } - + indent.resize(++json_level*tab, ' '); utils::print(fdel, "{}\"timestep\": {},\n", indent, update->ntimestep); utils::print(fdel, "{}\"deleted_molecules\": [\n", indent); - + indent.resize(++json_level*tab, ' '); } - + output->write_molecule_json(fdel, json_level, mark); - - if (comm->me == 0) { + + if (comm->me == 0) { indent.resize(--json_level*tab, ' '); fprintf(fdel, "%s]\n", indent.c_str()); fflush(fdel); @@ -1172,7 +1172,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) for (i = delete_Nsteps - 1; i > 0; i--) delete_Tcount[i] = delete_Tcount[i - 1]; delete_Tcount[0] += this_delete_Tcount; } - + if (ndel && (atom->map_style != Atom::MAP_NONE)) { atom->nghost = 0; atom->map_init(); diff --git a/src/output.cpp b/src/output.cpp index df8042eacd6..1a2a731af54 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -640,155 +640,109 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) std::string indent; int tab = 4; indent.resize(json_level*tab, ' '); + int json_init = 0; - // get max ivec value - - int local_max = 0; - for (int i = 0; i < atom->nlocal; i++) - local_max = MAX(local_max, ivec[i]); - - int global_max; - MPI_Allreduce(&local_max, &global_max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - int iglobal_max = global_max + 1; // (!) here is where assumes outer-loop index is value of ivec (!) - - // (!) need ERROR check: assumes max value (Nmax) is roughly equal to number of values (!) - - // let's break all operations up into natoms/nproc chunks (pagesize) - - int pagesize = atom->natoms / comm->nprocs; - - // first, get pagesize number of counts for each value + // let's first condense all ivec values + std::unordered_set unique_ivec(ivec, ivec + (size_t) atom->nlocal); + unique_ivec.erase(0); + std::vector mivec(unique_ivec.begin(), unique_ivec.end()); - std::vector atoms_local(pagesize); - std::vector atoms_root(pagesize); - std::vector local_ivcounts(pagesize); - std::vector global_ivcounts(pagesize); - std::fill(global_ivcounts.begin(), global_ivcounts.end(), 0); + int approxsize = atom->natoms / comm->nprocs; + std::vector atoms_local; + atoms_local.reserve(approxsize); + std::vector atoms_root; + atoms_root.reserve(approxsize); MPI_Datatype ParticleStructType = createParticleStructType(); - int ivstart = 1; - int ivend = pagesize; - if (ivend > iglobal_max) ivend = iglobal_max; - int json_init = 0; - while (true) { - std::fill(local_ivcounts.begin(), local_ivcounts.end(), 0); - for (int i = 0; i < atom->nlocal; i++) - for (int ival = ivstart; ival < ivend; ival++) - if (ivec[i] == ival) local_ivcounts[ival-ivstart]++; // (!) here is where assumes outer-loop index is value of ivec (!) - - MPI_Allreduce(local_ivcounts.data(), global_ivcounts.data(), pagesize, MPI_INT, MPI_SUM, MPI_COMM_WORLD); //sometimes nsend pagesize) printf("WARNING: you triggered a bug, please contact developer\n"); // (?) need to skip and warn if single molecule bigger than pagesize - if (cumsum > pagesize) { - subend = i-1; - breakflag = 0; - break; - } - } + for (int sendr = 0; sendr < comm->nprocs; sendr++) { + int nvals; + if (comm->me == sendr) nvals = mivec.size(); + MPI_Bcast(&nvals, 1, MPI_INT, sendr, MPI_COMM_WORLD); + if (nvals == 0) continue; + std::vector loop_ivals(nvals); + if (comm->me == sendr) loop_ivals = mivec; + MPI_Bcast(loop_ivals.data(), nvals, MPI_INT, sendr, MPI_COMM_WORLD); + + for (int ival = 0; ival < nvals; ival++) { + int thisval = loop_ivals[ival]; - int n2recv; - int n2send = 0; - for (int i = substart; i < subend; i++) - n2send += local_ivcounts[i]; - if (comm->me != 0) MPI_Send(&n2send, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); Particle myatom; - int iloc = 0; + int n2send = 0, n2recv = 0; for (int i = 0; i < atom->nlocal; i++) { - if (ivec[i] >= ivstart+substart && ivec[i] < ivstart+subend ) { // (!) here is where assumes outer-loop index is value of ivec (!) - myatom.ival = ivec[i]; + if (ivec[i] == thisval) { myatom.type = atom->type[i]; - myatom.tag = atom->tag[i]; + myatom.tag = (int) atom->tag[i]; for (int k = 0; k < 3; k++) myatom.x[k] = atom->x[i][k]; - atoms_local[iloc++] = myatom; + atoms_local.push_back(myatom); + n2send++; } } - if (comm->me != 0) MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); - + if (comm->me != 0) { + MPI_Send(&n2send, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); + MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); + } + if (comm->me == 0) { - int n2print = 0; for (int i = 1; i < comm->nprocs; i++) { MPI_Recv(&n2recv, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(&atoms_root[n2print], n2recv, ParticleStructType, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - n2print += n2recv; + std::vector atoms_recv(n2recv); + MPI_Recv(atoms_recv.data(), n2recv, ParticleStructType, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + atoms_root.insert(atoms_root.end(), atoms_recv.begin(), atoms_recv.end()); } - // add atoms already on root - for (int i = 0; i < n2send; i++) - atoms_root[n2print++] = atoms_local[i]; - - for (int j = ivstart; j < ivend; j++) { - if (global_ivcounts[j-ivstart+substart] == 0) continue; //yikes - if (json_init == 1) { - indent.resize(--json_level*tab, ' '); - fprintf(fp, "%s},\n%s{\n", indent.c_str(), indent.c_str()); - } else { - fprintf(fp, "%s{\n", indent.c_str()); - json_init = 1; - } - indent.resize(++json_level*tab, ' '); - fprintf(fp, "%s\"types\": {\n", indent.c_str()); - indent.resize(++json_level*tab, ' '); - //fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); - fprintf(fp, "%s\"data\": [\n", indent.c_str()); - indent.resize(++json_level*tab, ' '); - for (int i = 0; i < n2print; i++) { - if (atoms_root[i].ival == j) { - int mytype = atoms_root[i].type; - std::string typestr = std::to_string(mytype); - if (atom->labelmapflag) typestr = atom->lmap->typelabel[mytype-1]; - utils::print(fp, "{}[{}, \"{}\"]", indent, atoms_root[i].tag, typestr); - if (i < n2print-1) fprintf(fp, ",\n"); - else fprintf(fp, "\n"); - } - } - indent.resize(--json_level*tab, ' '); - fprintf(fp, "%s]\n", indent.c_str()); - indent.resize(--json_level*tab, ' '); - fprintf(fp, "%s},\n", indent.c_str()); - fprintf(fp, "%s\"coords\": {\n", indent.c_str()); - indent.resize(++json_level*tab, ' '); - //fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); - fprintf(fp, "%s\"data\": [\n", indent.c_str()); - indent.resize(++json_level*tab, ' '); - for (int i = 0; i < n2print; i++) { - if (atoms_root[i].ival == j) { - utils::print(fp, "{}[{}, {}, {}, {}]", indent, atoms_root[i].tag, - atoms_root[i].x[0], atoms_root[i].x[1], atoms_root[i].x[2]); - if (i < n2print-1) fprintf(fp, ",\n"); - else fprintf(fp, "\n"); - } - } - indent.resize(--json_level*tab, ' '); - fprintf(fp, "%s]\n", indent.c_str()); + atoms_root.insert(atoms_root.end(), atoms_local.begin(), atoms_local.end()); + + if (json_init == 1) { indent.resize(--json_level*tab, ' '); - fprintf(fp, "%s}\n", indent.c_str()); + fprintf(fp, "%s},\n%s{\n", indent.c_str(), indent.c_str()); + } else { + fprintf(fp, "%s{\n", indent.c_str()); + json_init = 1; } - } - if (breakflag) break; - substart = subend+1; - } - - if (ivend == iglobal_max) { - if (json_init == 1) { + indent.resize(++json_level*tab, ' '); + fprintf(fp, "%s\"types\": {\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + //fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); + fprintf(fp, "%s\"data\": [\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + auto it = atoms_root.begin(); + for (auto myatom : atoms_root) { + int mytype = myatom.type; + std::string typestr = std::to_string(mytype); + if (atom->labelmapflag) typestr = atom->lmap->typelabel[mytype-1]; + utils::print(fp, "{}[{}, \"{}\"]", indent, myatom.tag, typestr); + if (std::next(it) == atoms_root.end()) fprintf(fp, ",\n"); + else fprintf(fp, "\n"); + it++; + } + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s]\n", indent.c_str()); + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s},\n", indent.c_str()); + fprintf(fp, "%s\"coords\": {\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + //fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); + fprintf(fp, "%s\"data\": [\n", indent.c_str()); + indent.resize(++json_level*tab, ' '); + it = atoms_root.begin(); + for (auto myatom : atoms_root) { + utils::print(fp, "{}[{}, {}, {}, {}]", indent, myatom.tag, + myatom.x[0], myatom.x[1], myatom.x[2]); + if (std::next(it) == atoms_root.end()) fprintf(fp, ",\n"); + else fprintf(fp, "\n"); + it++; + } + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s]\n", indent.c_str()); indent.resize(--json_level*tab, ' '); fprintf(fp, "%s}\n", indent.c_str()); + } - break; + unique_ivec.erase(thisval); + mivec.assign(unique_ivec.begin(), unique_ivec.end()); + atoms_local.clear(); + atoms_root.clear(); } - ivstart += pagesize; - ivend += pagesize; - if (ivend > iglobal_max) ivend = iglobal_max; } } @@ -799,14 +753,13 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) MPI_Datatype Output::createParticleStructType() { MPI_Datatype ParticleStructType; - const int nfields = 4; - int blocklengths[nfields] = {1, 1, 1, 3}; + const int nfields = 3; + int blocklengths[nfields] = {1, 1, 3}; MPI_Aint offsets[nfields]; - offsets[0] = offsetof(Particle, ival); - offsets[1] = offsetof(Particle, tag); - offsets[2] = offsetof(Particle, type); - offsets[3] = offsetof(Particle, x); - MPI_Datatype types[nfields] = {MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE}; + offsets[0] = offsetof(Particle, tag); + offsets[1] = offsetof(Particle, type); + offsets[2] = offsetof(Particle, x); + MPI_Datatype types[nfields] = {MPI_INT, MPI_INT, MPI_DOUBLE}; MPI_Type_create_struct(nfields, blocklengths, offsets, types, &ParticleStructType); MPI_Type_commit(&ParticleStructType); diff --git a/src/output.h b/src/output.h index 34093bac0b2..91a7df77ce3 100644 --- a/src/output.h +++ b/src/output.h @@ -71,8 +71,8 @@ class Output : protected Pointers { DumpCreatorMap *dump_map; typedef struct Particle { - int ival; // atom's value from per-atom vector - int tag, type; + int tag; + int type; double x[3]; } Particle; MPI_Datatype createParticleStructType(); From 2490d8a0bad6026e1e0a2578ed3b4c09a19d9a29 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 28 Jun 2025 00:46:50 -0400 Subject: [PATCH 022/604] lost a bracket somewhere along the way --- src/output.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/output.cpp b/src/output.cpp index 1a2a731af54..11b98a1d1ac 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -736,7 +736,6 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) fprintf(fp, "%s]\n", indent.c_str()); indent.resize(--json_level*tab, ' '); fprintf(fp, "%s}\n", indent.c_str()); - } unique_ivec.erase(thisval); mivec.assign(unique_ivec.begin(), unique_ivec.end()); @@ -744,6 +743,10 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) atoms_root.clear(); } } + if (json_init && comm->me == 0) { + indent.resize(--json_level*tab, ' '); + fprintf(fp, "%s}\n", indent.c_str()); + } } /* ---------------------------------------------------------------------- From c9287dcbaa28faf81c7964cd51b333900c7c8b93 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Mon, 30 Jun 2025 19:30:09 -0400 Subject: [PATCH 023/604] address Axel's comments --- src/REAXFF/fix_reaxff_species.cpp | 5 ++- src/label_map.h | 3 +- src/output.cpp | 62 +++++++++++++++++++------------ src/output.h | 3 +- 4 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 6f68202ebcc..e47187a0937 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -1128,7 +1128,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) if (comm->me == 0) { indent.resize(json_level*tab, ' '); - if (deljson_init == 1) { + if (deljson_init > 0) { fprintf(fdel, "%s},\n%s{\n", indent.c_str(), indent.c_str()); } else { fprintf(fdel, "%s{\n", indent.c_str()); @@ -1142,7 +1142,8 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) indent.resize(++json_level*tab, ' '); } - output->write_molecule_json(fdel, json_level, mark); + output->write_molecule_json(fdel, json_level, deljson_init, mark); + if (deljson_init == 1) deljson_init++; if (comm->me == 0) { indent.resize(--json_level*tab, ' '); diff --git a/src/label_map.h b/src/label_map.h index 4af80832538..7dfd500db99 100644 --- a/src/label_map.h +++ b/src/label_map.h @@ -42,12 +42,13 @@ class LabelMap : protected Pointers { void write_data(FILE *); void read_restart(FILE *fp); void write_restart(FILE *); + inline std::vector getTypelabel() const { return typelabel; } +protected: int natomtypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes; std::vector typelabel, btypelabel, atypelabel; std::vector dtypelabel, itypelabel; - protected: std::unordered_map typelabel_map; std::unordered_map btypelabel_map; std::unordered_map atypelabel_map; diff --git a/src/output.cpp b/src/output.cpp index 11b98a1d1ac..37f2a2cd032 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -635,7 +635,7 @@ void Output::write_restart(bigint ntimestep) atoms with integer array value of 0 assumed to not belong to a molecule ------------------------------------------------------------------------- */ -void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) +void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *ivec) { std::string indent; int tab = 4; @@ -647,12 +647,13 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) unique_ivec.erase(0); std::vector mivec(unique_ivec.begin(), unique_ivec.end()); - int approxsize = atom->natoms / comm->nprocs; std::vector atoms_local; - atoms_local.reserve(approxsize); + atoms_local.reserve(atom->nmax); std::vector atoms_root; - atoms_root.reserve(approxsize); + atoms_root.reserve(atom->nmax); + #if !defined(MPI_STUBS) MPI_Datatype ParticleStructType = createParticleStructType(); + #endif for (int sendr = 0; sendr < comm->nprocs; sendr++) { int nvals; @@ -678,60 +679,70 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) n2send++; } } + #if !defined(MPI_STUBS) if (comm->me != 0) { MPI_Send(&n2send, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); } - + #endif + if (comm->me == 0) { + #if !defined(MPI_STUBS) for (int i = 1; i < comm->nprocs; i++) { MPI_Recv(&n2recv, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); std::vector atoms_recv(n2recv); MPI_Recv(atoms_recv.data(), n2recv, ParticleStructType, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); atoms_root.insert(atoms_root.end(), atoms_recv.begin(), atoms_recv.end()); } + #endif atoms_root.insert(atoms_root.end(), atoms_local.begin(), atoms_local.end()); - if (json_init == 1) { + if (json_init > 0) { indent.resize(--json_level*tab, ' '); fprintf(fp, "%s},\n%s{\n", indent.c_str(), indent.c_str()); } else { fprintf(fp, "%s{\n", indent.c_str()); json_init = 1; } + indent.resize(++json_level*tab, ' '); fprintf(fp, "%s\"types\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); - //fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); + if (printflag == 1 && json_init == 1) + fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); fprintf(fp, "%s\"data\": [\n", indent.c_str()); indent.resize(++json_level*tab, ' '); auto it = atoms_root.begin(); for (auto myatom : atoms_root) { int mytype = myatom.type; std::string typestr = std::to_string(mytype); - if (atom->labelmapflag) typestr = atom->lmap->typelabel[mytype-1]; + if (atom->labelmapflag) typestr = atom->lmap->getTypelabel()[mytype-1]; utils::print(fp, "{}[{}, \"{}\"]", indent, myatom.tag, typestr); - if (std::next(it) == atoms_root.end()) fprintf(fp, ",\n"); - else fprintf(fp, "\n"); + if (std::next(it) == atoms_root.end()) fprintf(fp, "\n"); + else fprintf(fp, ",\n"); it++; } + indent.resize(--json_level*tab, ' '); fprintf(fp, "%s]\n", indent.c_str()); indent.resize(--json_level*tab, ' '); fprintf(fp, "%s},\n", indent.c_str()); fprintf(fp, "%s\"coords\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); - //fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); + if (printflag == 1 && json_init == 1) + fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); + if (json_init == 1) json_init++; fprintf(fp, "%s\"data\": [\n", indent.c_str()); indent.resize(++json_level*tab, ' '); it = atoms_root.begin(); for (auto myatom : atoms_root) { utils::print(fp, "{}[{}, {}, {}, {}]", indent, myatom.tag, myatom.x[0], myatom.x[1], myatom.x[2]); - if (std::next(it) == atoms_root.end()) fprintf(fp, ",\n"); - else fprintf(fp, "\n"); + if (std::next(it) == atoms_root.end()) fprintf(fp, "\n"); + else fprintf(fp, ",\n"); it++; } + indent.resize(--json_level*tab, ' '); fprintf(fp, "%s]\n", indent.c_str()); indent.resize(--json_level*tab, ' '); @@ -753,22 +764,25 @@ void Output::write_molecule_json(FILE *fp, int json_level, int *ivec) create Particle struct type for MPI ------------------------------------------------------------------------- */ +#if !defined(MPI_STUBS) MPI_Datatype Output::createParticleStructType() { - MPI_Datatype ParticleStructType; - const int nfields = 3; - int blocklengths[nfields] = {1, 1, 3}; - MPI_Aint offsets[nfields]; - offsets[0] = offsetof(Particle, tag); - offsets[1] = offsetof(Particle, type); - offsets[2] = offsetof(Particle, x); - MPI_Datatype types[nfields] = {MPI_INT, MPI_INT, MPI_DOUBLE}; + MPI_Datatype ParticleStructType; + + const int nfields = 3; + int blocklengths[nfields] = {1, 1, 3}; + MPI_Aint offsets[nfields]; + offsets[0] = offsetof(Particle, tag); + offsets[1] = offsetof(Particle, type); + offsets[2] = offsetof(Particle, x); + MPI_Datatype types[nfields] = {MPI_INT, MPI_INT, MPI_DOUBLE}; - MPI_Type_create_struct(nfields, blocklengths, offsets, types, &ParticleStructType); - MPI_Type_commit(&ParticleStructType); + MPI_Type_create_struct(nfields, blocklengths, offsets, types, &ParticleStructType); + MPI_Type_commit(&ParticleStructType); - return ParticleStructType; + return ParticleStructType; } +#endif /* ---------------------------------------------------------------------- timestep is being changed, called by update->reset_timestep() diff --git a/src/output.h b/src/output.h index 91a7df77ce3..9a6f3cfa8ea 100644 --- a/src/output.h +++ b/src/output.h @@ -77,6 +77,7 @@ class Output : protected Pointers { } Particle; MPI_Datatype createParticleStructType(); + Output(class LAMMPS *); ~Output() override; void init(); @@ -84,7 +85,7 @@ class Output : protected Pointers { void write(bigint); // output for current timestep void write_dump(bigint); // force output of dump snapshots void write_restart(bigint); // force output of a restart file - void write_molecule_json(FILE *, int, int *); // output molecule JSON objects to file + void write_molecule_json(FILE *, int, int, int *); // output molecule JSON objects to file void reset_timestep(bigint); // reset output which depends on timestep void reset_dt(); // reset output which depends on timestep size From a891ba77da790becd734f2d6a68b952cacbdc8ea Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Mon, 30 Jun 2025 20:30:52 -0400 Subject: [PATCH 024/604] tweaks --- src/label_map.h | 2 +- src/output.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/label_map.h b/src/label_map.h index 7dfd500db99..b3f0aa568a7 100644 --- a/src/label_map.h +++ b/src/label_map.h @@ -42,7 +42,7 @@ class LabelMap : protected Pointers { void write_data(FILE *); void read_restart(FILE *fp); void write_restart(FILE *); - inline std::vector getTypelabel() const { return typelabel; } + inline auto getTypelabel() const { return typelabel; } protected: int natomtypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes; diff --git a/src/output.h b/src/output.h index 9a6f3cfa8ea..29926872b53 100644 --- a/src/output.h +++ b/src/output.h @@ -77,7 +77,6 @@ class Output : protected Pointers { } Particle; MPI_Datatype createParticleStructType(); - Output(class LAMMPS *); ~Output() override; void init(); From affd2d928d6a30a6c3d135e120e5767162771b1a Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Mon, 30 Jun 2025 20:38:09 -0400 Subject: [PATCH 025/604] whitespace --- src/output.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/output.cpp b/src/output.cpp index 37f2a2cd032..d7faced9bb7 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -685,7 +685,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); } #endif - + if (comm->me == 0) { #if !defined(MPI_STUBS) for (int i = 1; i < comm->nprocs; i++) { @@ -709,7 +709,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i fprintf(fp, "%s\"types\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); if (printflag == 1 && json_init == 1) - fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); + fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); fprintf(fp, "%s\"data\": [\n", indent.c_str()); indent.resize(++json_level*tab, ' '); auto it = atoms_root.begin(); From 6a7eba912cf75955801096b8666a4efd680fb86e Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 1 Jul 2025 13:33:26 -0400 Subject: [PATCH 026/604] docs --- doc/src/fix_reaxff_species.rst | 107 +++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 24 deletions(-) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index badcf72a568..bd79f736f5a 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -14,8 +14,7 @@ Syntax fix ID group-ID reaxff/species Nevery Nrepeat Nfreq filename keyword value ... * ID, group-ID are documented in :doc:`fix ` command -* reaxff/species = style name of this command -* Nevery = sample bond-order every this many timesteps +* Nevery = sample bond-order every this many Markdown Preview EnhancedMarkdown Preview Enhancedtimesteps * Nrepeat = # of bond-order samples used for calculating averages * Nfreq = calculate average bond-order every this many timesteps * filename = name of output file @@ -140,28 +139,88 @@ be either a list of specific chemical formulae or a range of molecular weights. Molecules are deleted every *Nfreq* timesteps, and bond connectivity is determined using the *Nevery* and *Nrepeat* keywords. The *filedel* argument is the name of the output file that records the species -that are removed from the system. The *specieslist* keyword permits -specific chemical species to be deleted. The *Nspecies* argument specifies -how many species are eligible for deletion and is followed by a list of -chemical formulae, whose strings are compared to species identified by this -fix. For example, "specieslist 2 CO CO2" deletes molecules that are -identified as "CO" and "CO2" in the species output file. When using the -*specieslist* keyword, the *filedel* file has the following format: the -first line lists the chemical formulae eligible for deletion, and each -additional line contains the timestep on which a molecule deletion occurs -and the number of each species deleted on that timestep. The *masslimit* -keyword permits deletion of molecules with molecular weights between -*massmin* and *massmax*. When using the *masslimit* keyword, each line of -the *filedel* file contains the timestep on which deletions occurs, -followed by how many of each species are deleted (with quantities preceding -chemical formulae). The *specieslist* and *masslimit* keywords cannot both -be used in the same *reaxff/species* fix. The *delete_rate_limit* keyword -can enforce an upper limit on the overall rate of molecule deletion. The -number of deletion occurrences is limited to Nlimit within an interval of -Nsteps timesteps. Nlimit can be specified with an equal-style -:doc:`variable `. When using the *delete_rate_limit* keyword, no -deletions are permitted to occur within the first Nsteps timesteps of the -first run (after reading a either a data or restart file). +that are removed from the system (see below for output file format options). +The *specieslist* keyword permits specific chemical species to be deleted. +The *Nspecies* argument specifies how many species are eligible for deletion +and is followed by a list of chemical formulae, whose strings are compared +to species identified by this fix. For example, "specieslist 2 CO CO2" +deletes molecules that are identified as "CO" and "CO2" in the species +output file. The *masslimit* keyword permits deletion of molecules with +molecular weights between *massmin* and *massmax*. The *specieslist* and +*masslimit* keywords cannot both be used in the same *reaxff/species* fix. +The *delete_rate_limit* keyword can enforce an upper limit on the overall +rate of molecule deletion. The number of deletion occurrences is limited to +Nlimit within an interval of Nsteps timesteps. Nlimit can be specified with +an equal-style :doc:`variable `. When using the +*delete_rate_limit* keyword, no deletions are permitted to occur within the +first Nsteps timesteps of the first run (after reading a either a data or +restart file). + +The *delete* keyword can output information about the deleted molecules in +either legacy format or JSON format. The latter is activated when the +*filedel* argument has a '.json' extension. The legacy format lists how +many of each species is deleted, while the JSON format provides the atom ID, +atom type, and coordinates of deleted atoms within each molecule. The +format for legacy output changes depending on the keyword used. When using +the *specieslist* keyword and legacy format, the *filedel* file has the +following format: the first line lists the chemical formulae eligible for +deletion, and each additional line contains the timestep on which a molecule +deletion occurs and the number of each species deleted on that timestep. +When using the *masslimit* keyword and the legacy format, each line of the +*filedel* file contains the timestep on which deletions occurs, followed by +how many of each species are deleted (with quantities preceding chemical +formulae). The JSON format is the same regardless of the keyword, and lists +deleted molecules in the style of the :doc:`JSON molecule file `, +where more discussion of JSON schema can be found. Here is an example of a +JSON output file from a simulation during which one water molecule was +deleted on the first timestep: + +.. code-block:: json + + { + "application": "LAMMPS", + "format": "output", + "subformat": "fix reaxff/species: delete keyword", + "revision": 1, + "run_output": [ + { + "timestep": 1, + "deleted_molecules": [ + { + "types": { + "format": ["atom-tag", "type"], + "data": [ + [1368, "H"], + [1366, "O"], + [1367, "H"] + ] + }, + "coords": { + "format": ["atom-tag", "x", "y", "z"], + "data": [ + [1368, 26.787767440427466, 29.785528640296768, 25.85197353660144], + [1366, 26.641801222582824, 29.868106247702887, 24.91285138212243], + [1367, 25.69611192416744, 30.093425787807448, 24.914380215672846] + ] + } + } + ] + } + ] + } + +The first-level keys of the JSON format output are "application", "format", +"subformat", "revision", and "run_output". The value of the "run_output" +key is an array of objects that contain data for each timestep on which a +molecule was deleted, and the other first-level keys identify this JSON +schema. The "run_output" objects contain two keys, "timestep" and +"deleted_molecules". The "deleted_molecules" key is an array of :doc:`LAMMPS +molecule JSON ` objects, one for each deleted molecule. The +"format" keys within molecule JSON objects are only listed once per output +file, for brevity. The "atom-tag" values are atom IDs from the simulation, +and the "type" values are atom types. In the above example, the types were +reported as strings corresponding to elements using :doc:`type labels +`. ---------- From 33b4586fefa0b08552b36a02e892c46b13690545 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 1 Jul 2025 13:34:43 -0400 Subject: [PATCH 027/604] typos --- doc/src/fix_reaxff_species.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index bd79f736f5a..386d14af1c2 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -14,7 +14,7 @@ Syntax fix ID group-ID reaxff/species Nevery Nrepeat Nfreq filename keyword value ... * ID, group-ID are documented in :doc:`fix ` command -* Nevery = sample bond-order every this many Markdown Preview EnhancedMarkdown Preview Enhancedtimesteps +* Nevery = sample bond-order every this many timesteps * Nrepeat = # of bond-order samples used for calculating averages * Nfreq = calculate average bond-order every this many timesteps * filename = name of output file From a441dea5f48ad23fc371ef215ed801c924a91f05 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 1 Jul 2025 13:36:27 -0400 Subject: [PATCH 028/604] undo mysterious line deletion --- doc/src/fix_reaxff_species.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index 386d14af1c2..a6b3c9e0605 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -14,6 +14,7 @@ Syntax fix ID group-ID reaxff/species Nevery Nrepeat Nfreq filename keyword value ... * ID, group-ID are documented in :doc:`fix ` command +* reaxff/species = style name of this command * Nevery = sample bond-order every this many timesteps * Nrepeat = # of bond-order samples used for calculating averages * Nfreq = calculate average bond-order every this many timesteps From cfb3fb1f5d072fd0cf6f37aca3b88e80d80135dc Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 1 Jul 2025 13:50:50 -0400 Subject: [PATCH 029/604] need to turn auto whitespace cleanup back on --- doc/src/fix_reaxff_species.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index a6b3c9e0605..7a1a75a1a48 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -171,7 +171,7 @@ When using the *masslimit* keyword and the legacy format, each line of the *filedel* file contains the timestep on which deletions occurs, followed by how many of each species are deleted (with quantities preceding chemical formulae). The JSON format is the same regardless of the keyword, and lists -deleted molecules in the style of the :doc:`JSON molecule file `, +deleted molecules in the style of the :doc:`JSON molecule file `, where more discussion of JSON schema can be found. Here is an example of a JSON output file from a simulation during which one water molecule was deleted on the first timestep: @@ -209,7 +209,7 @@ deleted on the first timestep: } ] } - + The first-level keys of the JSON format output are "application", "format", "subformat", "revision", and "run_output". The value of the "run_output" key is an array of objects that contain data for each timestep on which a From 111b356bef26420f796db1d7a33716fc056a6b17 Mon Sep 17 00:00:00 2001 From: megmcca Date: Tue, 22 Jul 2025 08:22:00 -0600 Subject: [PATCH 030/604] update docs --- doc/src/pair_hybrid.rst | 49 +++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/doc/src/pair_hybrid.rst b/doc/src/pair_hybrid.rst index 8d5d03b7e36..65a1ccc3174 100644 --- a/doc/src/pair_hybrid.rst +++ b/doc/src/pair_hybrid.rst @@ -7,6 +7,7 @@ .. index:: pair_style hybrid/overlay/omp .. index:: pair_style hybrid/overlay/kk .. index:: pair_style hybrid/scaled +.. index:: pair_style hybrid/scaled/kk .. index:: pair_style hybrid/scaled/omp pair_style hybrid command @@ -27,7 +28,7 @@ Accelerator Variants: *hybrid/overlay/kk*, *hybrid/overlay/omp* pair_style hybrid/scaled command ================================== -Accelerator Variant: *hybrid/scaled/omp* +Accelerator Variant: *hybrid/overlay/kk*, *hybrid/scaled/omp* Syntax """""" @@ -171,26 +172,29 @@ restrictions discussed below. If the *hybrid/scaled* style is used instead of *hybrid/overlay*, contributions from sub-styles are weighted by their scale factors, which may be fractional or even negative. Furthermore the scale factor for -each sub-style may be a constant, an *equal* style variable, or an -*atom* style variable. Variable scale factors may change during the -simulation. Different sub-styles may use different scale factor styles. -In the case of a sub-style scale factor that is an *atom* style -variable, the force contribution to each atom from that sub-style is -weighted by the value of the variable for that atom, while the -contribution from that sub-style to the global potential energy is zero. -All other contributions to the per-atom energy, per-atom virial, and -global virial (if not obtained from forces) from that sub-style are -zero. This enables switching smoothly between two different pair styles -or two different parameter sets during a run in a similar fashion as -could be done with :doc:`fix adapt ` or :doc:`fix alchemy -`. All pair styles that will be used are listed as -"sub-styles" following the *hybrid* or *hybrid/overlay* keyword, in any -order. In case of the *hybrid/scaled* pair style, each sub-style is -prefixed with a scale factor. The scale factor is either a floating -point number or an *equal* or *atom* style (or equivalent) variable. -Each sub-style's name is followed by its usual arguments, as illustrated -in the examples above. See the doc pages of the individual pair styles -for a listing and explanation of the appropriate arguments for them. +each sub-style may a constant, an *equal* style variable, or an *atom* +style variable. Variable scale factors may change during the simulation. +Different sub-styles may use different scale factor styles. +In the case of a sub-style scale factor that is an *atom* style variable, +the force contribution to each atom from that sub-style is weighted +by the value of the variable for that atom, while the contribution +from that sub-style to the global potential energy is zero. +All other contributions to the per-atom energy, per-atom +virial, and global virial (if not obtained from forces) +from that sub-style are zero. +This enables +switching smoothly between two different pair styles or two different +parameter sets during a run in a similar fashion as could be done +with :doc:`fix adapt ` or :doc:`fix alchemy `. +All pair styles that will be used are listed as "sub-styles" following +the *hybrid* or *hybrid/overlay* keyword, in any order. In case of the +*hybrid/scaled* pair style, each sub-style is prefixed with a scale +factor. The scale factor is either a floating point number or an +*equal* or *atom* +style (or equivalent) variable. Each sub-style's name is followed by +its usual arguments, as illustrated in the examples above. See the doc +pages of the individual pair styles for a listing and explanation of the +appropriate arguments for them. Note that an individual pair style can be used multiple times as a sub-style. For efficiency reasons this should only be done if your @@ -562,9 +566,6 @@ e.g. *lj/cut/coul/long* or *buck/coul/long*\ . You must ensure that the short-range Coulombic cutoff used by each of these long pair styles is the same or else LAMMPS will generate an error. -Pair style *hybrid/scaled* currently only works for non-accelerated -pair styles and pair styles from the OPT package. - Pair style *hybrid/molecular* is not compatible with manybody potentials. When using pair styles from the GPU package they must not be listed From 4bfa71b993248516527d5b44641d9eb27936a0b3 Mon Sep 17 00:00:00 2001 From: megmcca Date: Tue, 22 Jul 2025 08:44:36 -0600 Subject: [PATCH 031/604] update hybrid/scaled to reflect kk accel --- doc/src/Commands_pair.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 48acf3b4995..99106aea081 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -14,7 +14,7 @@ OPT. * :doc:`hybrid (ko) ` * :doc:`hybrid/molecular (o) ` * :doc:`hybrid/overlay (ko) ` - * :doc:`hybrid/scaled (o) ` + * :doc:`hybrid/scaled (ko) ` * :doc:`kim ` * :doc:`list ` * :doc:`tracker ` From 0ad68d7017eaf31f4489a93831b03c06a5ee72fe Mon Sep 17 00:00:00 2001 From: megmcca Date: Tue, 22 Jul 2025 23:49:41 -0600 Subject: [PATCH 032/604] whitespace --- src/KOKKOS/pair_hybrid_scaled_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp b/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp index 9878262162f..691e1306c6b 100644 --- a/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_scaled_kokkos.cpp @@ -32,7 +32,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairHybridScaledKokkos::PairHybridScaledKokkos(LAMMPS *lmp) : PairHybridKokkos(lmp), fsum(nullptr), tsum(nullptr), scaleval(nullptr), scaleidx(nullptr), - atomvar(nullptr), atomscale(nullptr) + atomvar(nullptr), atomscale(nullptr) { nmaxfsum = -1; @@ -793,4 +793,4 @@ void PairHybridScaledKokkos::unpack_forward_comm(int n, int first, double *buf) m = 0; last = first + n; for (i = first; i < last; i++) atomscale[i] = buf[m++]; -} \ No newline at end of file +} From 25c1f355c4629be260f6082914361db1ba6d5261 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 23 Jul 2025 21:30:22 -0400 Subject: [PATCH 033/604] uniquify property/atom property name fix for change from circa Jan 2025 that broke backward compatibility by preventing multiple "fix reaxff/species" from being defined --- src/REAXFF/fix_reaxff_species.cpp | 7 ++++--- src/REAXFF/fix_reaxff_species.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 51a55569f07..969fc87251b 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -404,12 +404,13 @@ void FixReaxFFSpecies::init() f_SPECBOND = dynamic_cast(modify->add_fix(fixcmd)); // create a fix to point to fix_property_atom for storing clusterID - fixcmd = fmt::format("clusterID_{} all property/atom d_clusterID ghost yes", id); + clusterID_propname = fmt::format("clusterID_propname_{}", id); + fixcmd = fmt::format("clusterID_{} all property/atom d_{} ghost yes", id, clusterID_propname); f_clusterID = dynamic_cast(modify->add_fix(fixcmd)); // per-atom property for clusterID int flag,cols; - int index1 = atom->find_custom("clusterID",flag,cols); + int index1 = atom->find_custom(clusterID_propname.c_str(),flag,cols); clusterID = atom->dvector[index1]; vector_atom = clusterID; @@ -455,7 +456,7 @@ void FixReaxFFSpecies::Output_ReaxFF_Bonds(bigint ntimestep, FILE * /*fp*/) // per-atom property for clusterID int flag,cols; - int index1 = atom->find_custom("clusterID",flag,cols); + int index1 = atom->find_custom(clusterID_propname.c_str(),flag,cols); clusterID = atom->dvector[index1]; vector_atom = clusterID; diff --git a/src/REAXFF/fix_reaxff_species.h b/src/REAXFF/fix_reaxff_species.h index 831d5b56187..fe4a792b736 100644 --- a/src/REAXFF/fix_reaxff_species.h +++ b/src/REAXFF/fix_reaxff_species.h @@ -90,6 +90,7 @@ class FixReaxFFSpecies : public Fix { class NeighList *list; class FixAveAtom *f_SPECBOND; class FixPropertyAtom *f_clusterID; + std::string clusterID_propname; class PairReaxFF *reaxff; }; } // namespace LAMMPS_NS From 4cd7a480ae2c5aea26207c84192a6f0efdb8d47c Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 26 Jul 2025 12:56:45 -0400 Subject: [PATCH 034/604] simplify JSON key --- doc/src/fix_reaxff_species.rst | 4 ++-- src/REAXFF/fix_reaxff_species.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index 7a1a75a1a48..02848c83380 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -186,7 +186,7 @@ deleted on the first timestep: "run_output": [ { "timestep": 1, - "deleted_molecules": [ + "molecules": [ { "types": { "format": ["atom-tag", "type"], @@ -215,7 +215,7 @@ The first-level keys of the JSON format output are "application", "format", key is an array of objects that contain data for each timestep on which a molecule was deleted, and the other first-level keys identify this JSON schema. The "run_output" objects contain two keys, "timestep" and -"deleted_molecules". The "deleted_molecules" key is an array of :doc:`LAMMPS +"molecules". The "molecules" key is an array of :doc:`LAMMPS molecule JSON ` objects, one for each deleted molecule. The "format" keys within molecule JSON objects are only listed once per output file, for brevity. The "atom-tag" values are atom IDs from the simulation, diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index 969fc87251b..f8656d21260 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -1144,7 +1144,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) indent.resize(++json_level*tab, ' '); utils::print(fdel, "{}\"timestep\": {},\n", indent, update->ntimestep); - utils::print(fdel, "{}\"deleted_molecules\": [\n", indent); + utils::print(fdel, "{}\"molecules\": [\n", indent); indent.resize(++json_level*tab, ' '); } From 8f331cee70ac66a67ee30993d720dfde087a6ccc Mon Sep 17 00:00:00 2001 From: user Date: Mon, 11 Aug 2025 16:14:54 -0500 Subject: [PATCH 035/604] Fix ttm/thermal - A TTM implementation for modeling thermal transport --- doc/src/fix_ttm.rst | 101 +++- examples/ttm/in.ttm.thermal | 42 ++ src/EXTRA-FIX/fix_ttm_thermal.cpp | 878 ++++++++++++++++++++++++++++++ src/EXTRA-FIX/fix_ttm_thermal.h | 87 +++ 4 files changed, 1096 insertions(+), 12 deletions(-) mode change 100644 => 100755 doc/src/fix_ttm.rst create mode 100644 examples/ttm/in.ttm.thermal create mode 100755 src/EXTRA-FIX/fix_ttm_thermal.cpp create mode 100755 src/EXTRA-FIX/fix_ttm_thermal.h diff --git a/doc/src/fix_ttm.rst b/doc/src/fix_ttm.rst old mode 100644 new mode 100755 index f3e6a08d61b..e40e24915cf --- a/doc/src/fix_ttm.rst +++ b/doc/src/fix_ttm.rst @@ -1,6 +1,7 @@ .. index:: fix ttm .. index:: fix ttm/grid .. index:: fix ttm/mod +.. index:: fix ttm/thermal fix ttm command =============== @@ -11,6 +12,9 @@ fix ttm/grid command fix ttm/mod command =================== +fix ttm/thermal command +======================= + Syntax """""" @@ -18,9 +22,10 @@ Syntax fix ID group-ID ttm seed C_e rho_e kappa_e gamma_p gamma_s v_0 Nx Ny Nz keyword value ... fix ID group-ID ttm/mod seed init_file Nx Ny Nz keyword value ... + fix ID group-ID ttm/thermal seed properties_file Nx Ny Nz keyword value ... * ID, group-ID are documented in :doc:`fix ` command -* style = *ttm* or *ttm/grid* or *ttm/mod* +* style = *ttm* or *ttm/grid* or *ttm/mod* or *ttm/thermal* * seed = random number seed to use for white noise (positive integer) * remaining arguments for fix ttm or fix ttm/grid @@ -44,6 +49,15 @@ Syntax Nx = number of thermal solve grid points in the x-direction (positive integer) Ny = number of thermal solve grid points in the y-direction (positive integer) Nz = number of thermal solve grid points in the z-direction (positive integer) + +* remaining arguments for fix ttm/thermal: + + .. parsed-literal:: + + properties_file = file with grid based TTM properties + Nx = number of thermal solve grid points in the x-direction (positive integer) + Ny = number of thermal solve grid points in the y-direction (positive integer) + Nz = number of thermal solve grid points in the z-direction (positive integer) * zero or more keyword/value(s) pairs may be appended * keyword = *set* or *infile* or *outfile* @@ -57,6 +71,12 @@ Syntax Nout = dump grid temperatures every this many timesteps file.out = filename to write grid temperatures to +* fix ttm/thermal supports an additional keyword: *source* + + .. parsed-literal:: + *source* value = source + source = volumetric heating term applied to electrons (energy/(time\*volume) units) + Examples """""""" @@ -65,7 +85,8 @@ Examples fix 2 all ttm 699489 1.0 1.0 10 0.1 0.0 2.0 1 12 1 infile initial outfile 1000 T.out fix 3 all ttm/grid 123456 1.0 1.0 1.0 1.0 1.0 5.0 5 5 5 infile Te.in fix 4 all ttm/mod 34277 parameters.txt 5 5 5 infile T_init outfile 10 T_out - + fix 5 all ttm/thermal 11111 properties.in 10 10 10 source 0.1 infile temps.in outfile 10 temps.out + Example input scripts using these commands can be found in examples/ttm. Description @@ -86,8 +107,8 @@ Matter papers: :ref:`(Duffy) ` and :ref:`(Rutherford) a primary knock-on atom (PKA) was initialized with a high velocity to simulate a radiation event. -The description in this subsection applies to all 3 fix styles: -*ttm*, *ttm/grid*, and *ttm/mod*. +The description in this subsection applies to all 4 fix styles: +*ttm*, *ttm/grid*, *ttm/mod*, and *ttm/thermal*. Fix *ttm/grid* distributes the regular grid across processors consistent with the subdomains of atoms owned by each processor, but is otherwise @@ -102,6 +123,11 @@ expensive computationally than fix *ttm* because it treats the thermal diffusion equation as non-linear. More details on fix *ttm/mod* are given below. +Fix *ttm/thermal* allows for electronic properties to be assigned +independently to each TTM grid point and supports external heat sources +to the electronic subsystem. More details on fix *ttm/thermal* are +given below. + Heat transfer between the electronic and atomic subsystems is carried out via an inhomogeneous Langevin thermostat. Only atoms in the fix group contribute to and are affected by this heat transfer. @@ -242,11 +268,11 @@ units setting in use, grid size and the current timestep. reads. The file has the same format as the file the *infile* option reads. -For the fix ttm and fix ttm/mod commands, the corresponding atomic -temperature for atoms in each grid cell can be computed and output by -the :doc:`fix ave/chunk ` command using the -:doc:`compute chunk/atom ` command to create a 3d -array of chunks consistent with the grid used by this fix. +For the fix ttm, fix ttm/mod, and fix ttm/thermal commands, the +corresponding atomic temperature for atoms in each grid cell can +be computed and output by the :doc:`fix ave/chunk ` +command using the:doc:`compute chunk/atom ` command +to create a 3d array of chunks consistent with the grid used by this fix. For the fix ttm/grid command the same thing can be done using the :doc:`fix ave/grid ` command and its per-grid values can @@ -354,12 +380,59 @@ ignored. The lines with the even numbers are treated as follows: ---------- +**Additional details for fix ttm/thermal** + +Fix *ttm/thermal* uses the heat diffusion equation with possible external +heat sources (e.g. inductive heating). The effects of electron stopping +have been removed: + +.. math:: + + C_\mathrm{vol} \frac{\partial T_e}{\partial t} = + \bigtriangledown (\kappa_\mathrm{eff} \bigtriangledown T_e) - + g_p (T_e - T_a) + \eta s + +where :math:`s` is the applied heating power density and :math:`\eta` is +the absorption efficency (0-1) defined for each ttm grid cell in the +*properties.in* file. Also note that compared to the original *fix ttm*, +it uses use a volumetric specific heat, :math:`C_\mathrm{vol}` , which +represents the product of :math:`C_e \rho_e`. + +:ref:`(Baer) ` defined :math:`\kappa_\mathrm{eff}` as an effective +electronic thermal conductivity when two adjacent TTM cells (denoted by +the subscripts *a* and *b*) have different conductivities as: + +.. math:: + + \kappa_\mathrm{eff} = \frac{2 \kappa_a \kappa_b}{\kappa_a + \kappa_b} + +The current fix *ttm/thermal* implementation allows TTM simulations with +TTM cells that do not contain electrons (vacuum or insulators). Similar +to *ttm/mod*, the absence of electrons is defined as the grid cells with +zero electronic temperature.The numerical scheme does not allow energy +exchange with such cells. + + +The fix *ttm/thermal* parameter file *properties_file* uses a similar syntax +as the keyword *infile*. The file is read in line by line and each ttm cell's +properties are set. Comment lines are allowed and each line should have +properties listed in the order: + +.. parsed-literal:: + + ix iy iz C_vol kappa_e gamma_p eta + +the grid must match the one declared in the fix and all grid points must have +all properties set or *ttm/thermal* will exit with an error. + +---------- + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -The fix ttm and fix ttm/mod commands write the state of the electronic -subsystem and the energy exchange between the subsystems to -:doc:`binary restart files `. The fix ttm/grid command does +The fix ttm, fix ttm/mod, and fix ttm/thermal commands write the state +of the electronic subsystem and the energy exchange between the subsystems +to :doc:`binary restart files `. The fix ttm/grid command does not yet support writing of its distributed grid to a restart file. See the :doc:`read_restart ` command for info on how to @@ -462,3 +535,7 @@ Plasma Phys., 53, 129-139 (2013). **(Pisarev)** V V Pisarev and S V Starikov, J. Phys.: Condens. Matter, 26, 475401 (2014). + +.. _Baer: + +**(Baer)** B Baer and D G Walker, J. Mol. Model, 31, 220 (2025) diff --git a/examples/ttm/in.ttm.thermal b/examples/ttm/in.ttm.thermal new file mode 100644 index 00000000000..4c768ac921d --- /dev/null +++ b/examples/ttm/in.ttm.thermal @@ -0,0 +1,42 @@ +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & + units lattice +create_box 1 sim_box +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & + units lattice +create_atoms 1 region atom_box + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * FeVoter-ChenRecheck.fs Fe + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm/thermal 11111 props.in 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +run 1000 diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp new file mode 100755 index 00000000000..695a4460f2a --- /dev/null +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -0,0 +1,878 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Original fix ttm + Paul Crozier (SNL) + Carolyn Phillips (University of Michigan) + + ttm/thermal + Bradly Baer (Vanderbilt University) + D. Greg Walker (Vanderbilt University) + +------------------------------------------------------------------------- */ + + +#include "fix_ttm_thermal.h" + +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "random_mars.h" +#include "respa.h" +#include "potential_file_reader.h" +#include "update.h" + +#include +#include +#include +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +// OFFSET avoids outside-of-box atoms being rounded to grid pts incorrectly +// SHIFT = 0.0 assigns atoms to lower-left grid pt +// SHIFT = 0.5 assigns atoms to nearest grid pt +// use SHIFT = 0.0 for now since it allows fix ave/chunk +// to spatially average consistent with the TTM grid + +static constexpr int OFFSET = 16384; +static constexpr double SHIFT = 0.0; + +/* ---------------------------------------------------------------------- */ + +FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : + Fix(lmp, narg, arg), + random(nullptr), + gfactor1(nullptr), gfactor2(nullptr), ratio(nullptr), flangevin(nullptr), + T_electron(nullptr), T_electron_old(nullptr), + net_energy_transfer(nullptr), net_energy_transfer_all(nullptr) , + gamma_p_grid(nullptr), inductive_response_grid(nullptr), + c_e_grid(nullptr), k_e_grid(nullptr) + +{ + if (narg < 8) error->all(FLERR,"Illegal fix ttm command"); + vector_flag = 1; + size_vector = 2; + global_freq = 1; + extvector = 1; + nevery = 1; + restart_peratom = 1; + restart_global = 1; + + e_property_file = nullptr; + + seed = utils::inumeric(FLERR,arg[3],false,lmp); + e_property_file = utils::strdup(arg[4]); + nxgrid = utils::inumeric(FLERR,arg[5],false,lmp); + nygrid = utils::inumeric(FLERR,arg[6],false,lmp); + nzgrid = utils::inumeric(FLERR,arg[7],false,lmp); + + + inductive_power = 0.0; + tinit = 0.0; + infile = outfile = nullptr; + + int iarg = 8; + while (iarg < narg) { + if (strcmp(arg[iarg],"set") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); + tinit = (utils::numeric(FLERR,arg[iarg+1],false,lmp)); + if (tinit <= 0.0) + error->all(FLERR,"Fix ttm initial temperature must be > 0.0"); + iarg += 2; + } else if (strcmp(arg[iarg],"source") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); + inductive_power = (utils::numeric(FLERR,arg[iarg+1],false,lmp)); + iarg += 2; + } else if (strcmp(arg[iarg],"infile") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); + infile = utils::strdup(arg[iarg+1]); + iarg += 2; + } else if (strcmp(arg[iarg],"outfile") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix ttm command"); + outevery = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + outfile = utils::strdup(arg[iarg+2]); + iarg += 3; + } else error->all(FLERR,"Illegal fix ttm command"); + } + + + // error check + + if (seed <= 0) + error->all(FLERR,"Invalid random number seed in fix ttm command"); + if (nxgrid <= 0 || nygrid <= 0 || nzgrid <= 0) + error->all(FLERR,"Fix ttm grid sizes must be > 0"); + + + // grid OFFSET to perform + // SHIFT to map atom to nearest or lower-left grid point + + shift = OFFSET + SHIFT; + + // initialize Marsaglia RNG with processor-unique seed + + random = new RanMars(lmp,seed + comm->me); + + // allocate per-type arrays for force prefactors + + gfactor1 = new double[atom->ntypes+1]; + gfactor2 = new double[atom->ntypes+1]; + + // check for allowed maximum number of total grid points + + bigint totalgrid = (bigint) nxgrid * nygrid * nzgrid; + if (totalgrid > MAXSMALLINT) + error->all(FLERR,"Too many grid points in fix ttm"); + ngridtotal = totalgrid; + + // allocate per-atom flangevin and zero it + + flangevin = nullptr; + FixTTMThermal::grow_arrays(atom->nmax); + + for (int i = 0; i < atom->nmax; i++) { + flangevin[i][0] = 0.0; + flangevin[i][1] = 0.0; + flangevin[i][2] = 0.0; + } + + // set 2 callbacks + + atom->add_callback(Atom::GROW); + atom->add_callback(Atom::RESTART); + + // determines which class deallocate_grid() is called from + + deallocate_flag = 0; + +} + +/* ---------------------------------------------------------------------- */ + +FixTTMThermal::~FixTTMThermal() +{ + delete [] infile; + + delete random; + + delete [] gfactor1; + delete [] gfactor2; + + memory->destroy(flangevin); + + if (!deallocate_flag) FixTTMThermal::deallocate_grid(); +} + +/* ---------------------------------------------------------------------- */ + inline double safe_effective_kappa(double a, double b) { + if (a == 0 || b == 0) return 0; + return 2.0 * a * b / (a + b); + } +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::post_constructor() +{ + // allocate global grid on each proc + // needs to be done in post_contructor() beccause is virtual method + + allocate_grid(); + + // initialize electron temperatures on grid + + int ix,iy,iz; + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) + T_electron[iz][iy][ix] = tinit; + + + // zero net_energy_transfer_all + // in case compute_vector accesses it on timestep 0 + + outflag = 0; + memset(&net_energy_transfer_all[0][0][0],0,ngridtotal*sizeof(double)); + + // set electron grid properties from file + read_electron_properties(e_property_file); + + // set initial electron temperatures from user input file + + if (infile) read_electron_temperatures(infile); +} + +/* ---------------------------------------------------------------------- */ + +int FixTTMThermal::setmask() +{ + int mask = 0; + mask |= POST_FORCE; + mask |= POST_FORCE_RESPA; + mask |= END_OF_STEP; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::init() +{ + if (domain->dimension == 2) + error->all(FLERR,"Cannot use fix ttm with 2d simulation"); + if (domain->nonperiodic != 0) + error->all(FLERR,"Cannot use non-periodic boundares with fix ttm"); + if (domain->triclinic) + error->all(FLERR,"Cannot use fix ttm with triclinic box"); + + // to allow this, would have to reset grid bounds dynamically + // for RCB balancing would have to reassign grid pts to procs + // and create a new GridComm, and pass old GC data to new GC + + if (domain->box_change) + error->all(FLERR,"Cannot use fix ttm with changing box shape, size, or sub-domains"); + + // set force prefactors + + if (utils::strmatch(update->integrate_style,"^respa")) + nlevels_respa = (dynamic_cast(update->integrate))->nlevels; +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::setup(int vflag) +{ + if (utils::strmatch(update->integrate_style,"^verlet")) { + post_force_setup(vflag); + } else { + (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa-1); + post_force_respa_setup(vflag,nlevels_respa-1,0); + (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa-1); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::post_force_setup(int /*vflag*/) +{ + double **f = atom->f; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + // apply langevin forces that have been stored from previous run + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + f[i][0] += flangevin[i][0]; + f[i][1] += flangevin[i][1]; + f[i][2] += flangevin[i][2]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::post_force(int /*vflag*/) +{ + int ix,iy,iz; + double gamma1,gamma2; + + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double *boxlo = domain->boxlo; + double dxinv = nxgrid/domain->xprd; + double dyinv = nygrid/domain->yprd; + double dzinv = nzgrid/domain->zprd; + + // apply damping and thermostat to all atoms in fix group + + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + ix = static_cast ((x[i][0]-boxlo[0])*dxinv + shift) - OFFSET; + iy = static_cast ((x[i][1]-boxlo[1])*dyinv + shift) - OFFSET; + iz = static_cast ((x[i][2]-boxlo[2])*dzinv + shift) - OFFSET; + if (ix < 0) ix += nxgrid; + if (iy < 0) iy += nygrid; + if (iz < 0) iz += nzgrid; + if (ix >= nxgrid) ix -= nxgrid; + if (iy >= nygrid) iy -= nygrid; + if (iz >= nzgrid) iz -= nzgrid; + + if (T_electron[iz][iy][ix] < 0) + error->one(FLERR,"Electronic temperature dropped below zero"); + //Come back and check this for scaling + for (int i = 1; i <= atom->ntypes; i++) { + gfactor1[i] = - gamma_p_grid[iz][iy][ix] / force->ftm2v; + gfactor2[i] = sqrt(24.0*force->boltz*gamma_p_grid[iz][iy][ix]/update->dt/force->mvv2e) / force->ftm2v; + } + + double tsqrt = sqrt(T_electron[iz][iy][ix]); + + gamma1 = gfactor1[type[i]]; + gamma2 = gfactor2[type[i]] * tsqrt; + if (T_electron[iz][iy][ix] > 1e-5) { + flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); + flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); + flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); + + f[i][0] += flangevin[i][0]; + f[i][1] += flangevin[i][1]; + f[i][2] += flangevin[i][2]; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::post_force_respa_setup(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) post_force_setup(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::post_force_respa(int vflag, int ilevel, int /*iloop*/) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixTTMThermal::end_of_step() +{ + int ix,iy,iz; + + double **x = atom->x; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double *boxlo = domain->boxlo; + double dxinv = nxgrid/domain->xprd; + double dyinv = nygrid/domain->yprd; + double dzinv = nzgrid/domain->zprd; + + + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) + net_energy_transfer[iz][iy][ix] = 0.0; + + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + ix = static_cast ((x[i][0]-boxlo[0])*dxinv + shift) - OFFSET; + iy = static_cast ((x[i][1]-boxlo[1])*dyinv + shift) - OFFSET; + iz = static_cast ((x[i][2]-boxlo[2])*dzinv + shift) - OFFSET; + if (ix < 0) ix += nxgrid; + if (iy < 0) iy += nygrid; + if (iz < 0) iz += nzgrid; + if (ix >= nxgrid) ix -= nxgrid; + if (iy >= nygrid) iy -= nygrid; + if (iz >= nzgrid) iz -= nzgrid; + + net_energy_transfer[iz][iy][ix] += + (flangevin[i][0]*v[i][0] + flangevin[i][1]*v[i][1] + + flangevin[i][2]*v[i][2]); + } + + outflag = 0; + MPI_Allreduce(&net_energy_transfer[0][0][0],&net_energy_transfer_all[0][0][0], + ngridtotal,MPI_DOUBLE,MPI_SUM,world); + + double dx = domain->xprd/nxgrid; + double dy = domain->yprd/nygrid; + double dz = domain->zprd/nzgrid; + double del_vol = dx*dy*dz; + + // num_inner_timesteps = # of inner steps (thermal solves) + // required this MD step to maintain a stable explicit solve + // This could be moved out of the loop with an appropriate trigger + int num_inner_timesteps = 1; + double inner_dt = update->dt; + double voxel_coeff =(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz); + + std::vector grid_fourier(nzgrid * nygrid * nxgrid); + int index = 0; // Location unimportant, only max value + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) + grid_fourier[index++] = 2.0/c_e_grid[iz][iy][ix]*(k_e_grid[iz][iy][ix]*voxel_coeff); + + double fourier_max = *std::max_element(grid_fourier.begin(), grid_fourier.end()); + + double stability_criterion = 1.0 - fourier_max*inner_dt; + + if (stability_criterion < 0.0) { + inner_dt = 1/fourier_max; + num_inner_timesteps = static_cast(update->dt/inner_dt) + 1; + inner_dt = update->dt/double(num_inner_timesteps); + if (num_inner_timesteps > 1000000) + error->warning(FLERR,"Too many inner timesteps in fix ttm"); + } + + + + // finite difference iterations to update T_electron + + for (int istep = 0; istep < num_inner_timesteps; istep++) { + + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) + T_electron_old[iz][iy][ix] = T_electron[iz][iy][ix]; + + // compute new electron T profile + + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) { + int xright = ix + 1; + int yright = iy + 1; + int zright = iz + 1; + if (xright == nxgrid) xright = 0; + if (yright == nygrid) yright = 0; + if (zright == nzgrid) zright = 0; + int xleft = ix - 1; + int yleft = iy - 1; + int zleft = iz - 1; + if (xleft == -1) xleft = nxgrid - 1; + if (yleft == -1) yleft = nygrid - 1; + if (zleft == -1) zleft = nzgrid - 1; + + // Initialize flags for vacuum + int left = 1; + int right =1; + int in = 1; + int out = 1; + int up = 1; + int down = 1; + + // Set flags to 0 if vaccum + if (T_electron[iz][iy][xleft] < 1e-5) left = 0; + if (T_electron[iz][iy][xright] < 1e-5) right = 0; + if (T_electron[iz][yright][ix] < 1e-5) in = 0; + if (T_electron[iz][yleft][ix] < 1e-5) out = 0; + if (T_electron[zright][iy][ix] < 1e-5) up = 0; + if (T_electron[zleft][iy][ix] < 1e-5) down = 0; + + if (T_electron[iz][iy][ix] > 1e-5) { + T_electron[iz][iy][ix] = + T_electron_old[iz][iy][ix] + inner_dt/c_e_grid[iz][iy][ix]*( + (safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + + + (safe_effective_kappa(k_e_grid[iz][iy][xright],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xright]-T_electron_old[iz][iy][ix])/dx/dx*right + + + (safe_effective_kappa(k_e_grid[iz][yleft][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yleft][ix]-T_electron_old[iz][iy][ix])/dy/dy*out + + + (safe_effective_kappa(k_e_grid[iz][yright][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yright][ix]-T_electron_old[iz][iy][ix])/dy/dy*in + + + (safe_effective_kappa(k_e_grid[zleft][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zleft][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*down + + + (safe_effective_kappa(k_e_grid[zright][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zright][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*up + + -(net_energy_transfer_all[iz][iy][ix])/(del_vol) + +(inductive_power*inductive_response_grid[iz][iy][ix]));} + } + + } + + // output of grid electron temperatures to file + if (outfile && (update->ntimestep % outevery == 0)) + write_electron_temperatures(fmt::format("{}.{}",outfile,update->ntimestep)); +} + +/* ---------------------------------------------------------------------- + read in initial electron temperatures from a user-specified file + only read by proc 0, grid values are Bcast to other procs +------------------------------------------------------------------------- */ + +void FixTTMThermal::read_electron_properties(const std::string &filename) +{ + if (comm->me == 0) { + + int ***prop_initial_set; + memory->create(prop_initial_set,nzgrid,nygrid,nxgrid,"ttm:prop_initial_set"); + memset(&prop_initial_set[0][0][0],0,ngridtotal*sizeof(int)); + + // read initial electron temperature values from file + bigint nread = 0; + + try { + PotentialFileReader reader(lmp, filename, "electron property grid"); + + while (nread < ngridtotal) { + // reader will skip over comment-only lines + auto values = reader.next_values(4); + ++nread; + + int ix = values.next_int() - 1; + int iy = values.next_int() - 1; + int iz = values.next_int() - 1; + double c_e_tmp = values.next_double(); + double k_e_tmp = values.next_double(); + double gamma_p_tmp = values.next_double(); + double ind_tmp = values.next_double(); + + + + // check correctness of input data + + if ((ix < 0) || (ix >= nxgrid) || (iy < 0) || (iy >= nygrid) || (iz < 0) || (iz >= nzgrid)) + throw TokenizerException("Fix ttm invalid grid index in fix ttm grid file",""); + + if (c_e_tmp < 0.0) + throw TokenizerException("Fix ttm electron specific heat must be > 0.0",""); + + if (k_e_tmp < 0.0) + throw TokenizerException("Fix ttm electron conductivity must be > 0.0",""); + + if (gamma_p_tmp < 0.0) + throw TokenizerException("Fix ttm electron coupling must be > 0.0",""); + + if (ind_tmp < 0.0) + throw TokenizerException("Fix ttm electron inductive response must be >= 0.0",""); + + c_e_grid[iz][iy][ix] = c_e_tmp; + k_e_grid[iz][iy][ix] = k_e_tmp; + gamma_p_grid[iz][iy][ix] = gamma_p_tmp; + inductive_response_grid[iz][iy][ix] = ind_tmp; + prop_initial_set[iz][iy][ix] = 1; + } + } catch (std::exception &e) { + error->one(FLERR, e.what()); + } + + // check completeness of input data + + for (int iz = 0; iz < nzgrid; iz++) + for (int iy = 0; iy < nygrid; iy++) + for (int ix = 0; ix < nxgrid; ix++) + if (prop_initial_set[iz][iy][ix] == 0) + error->all(FLERR,"Fix ttm infile did not set all properties"); + + memory->destroy(prop_initial_set); + } + MPI_Bcast(&c_e_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); + MPI_Bcast(&k_e_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); + MPI_Bcast(&gamma_p_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); + MPI_Bcast(&inductive_response_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); +} +/* ---------------------------------------------------------------------- + read in initial electron temperatures from a user-specified file + only read by proc 0, grid values are Bcast to other procs +------------------------------------------------------------------------- */ + +void FixTTMThermal::read_electron_temperatures(const std::string &filename) +{ + if (comm->me == 0) { + + int ***T_initial_set; + memory->create(T_initial_set,nzgrid,nygrid,nxgrid,"ttm:T_initial_set"); + memset(&T_initial_set[0][0][0],0,ngridtotal*sizeof(int)); + + // read initial electron temperature values from file + bigint nread = 0; + + try { + PotentialFileReader reader(lmp, filename, "electron temperature grid"); + + while (nread < ngridtotal) { + // reader will skip over comment-only lines + auto values = reader.next_values(4); + ++nread; + + int ix = values.next_int() - 1; + int iy = values.next_int() - 1; + int iz = values.next_int() - 1; + double T_tmp = values.next_double(); + + + + + // check correctness of input data + + if ((ix < 0) || (ix >= nxgrid) || (iy < 0) || (iy >= nygrid) || (iz < 0) || (iz >= nzgrid)) + throw TokenizerException("Fix ttm invalid grid index in fix ttm grid file",""); + + if (T_tmp < 0.0) + throw TokenizerException("Fix ttm electron temperatures must be > 0.0",""); + + T_electron[iz][iy][ix] = T_tmp; + T_initial_set[iz][iy][ix] = 1; + } + } catch (std::exception &e) { + error->one(FLERR, e.what()); + } + + // check completeness of input data + + for (int iz = 0; iz < nzgrid; iz++) + for (int iy = 0; iy < nygrid; iy++) + for (int ix = 0; ix < nxgrid; ix++) + if (T_initial_set[iz][iy][ix] == 0) + error->all(FLERR,"Fix ttm infile did not set all temperatures"); + + memory->destroy(T_initial_set); + } + MPI_Bcast(&T_electron[0][0][0],ngridtotal,MPI_DOUBLE,0,world); +} +/* ---------------------------------------------------------------------- + write out current electron temperatures to user-specified file + only written by proc 0 +------------------------------------------------------------------------- */ + +void FixTTMThermal::write_electron_temperatures(const std::string &filename) +{ + if (comm->me) return; + + FILE *fp = fopen(filename.c_str(),"w"); + if (!fp) error->one(FLERR,"Fix ttm could not open output file {}: {}", + filename,utils::getsyserror()); + fmt::print(fp,"# DATE: {} UNITS: {} COMMENT: Electron temperature " + "{}x{}x{} grid at step {}. Created by fix {}\n #Grid X,Y,Z Temperature\n", utils::current_date(), + update->unit_style, nxgrid, nygrid, nzgrid, update->ntimestep, style); + + int ix,iy,iz; + + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) + fprintf(fp,"%d %d %d %20.16g\n",ix+1,iy+1,iz+1,T_electron[iz][iy][ix]); + + fclose(fp); +} + +/* ---------------------------------------------------------------------- */ +void FixTTMThermal::grow_arrays(int ngrow) +{ + memory->grow(flangevin,ngrow,3,"ttm:flangevin"); +} + +/* ---------------------------------------------------------------------- + pack entire state of Fix into one write +------------------------------------------------------------------------- */ + +void FixTTMThermal::write_restart(FILE *fp) +{ + double *rlist; + memory->create(rlist,nxgrid*nygrid*nzgrid+4,"ttm:rlist"); + + int n = 0; + rlist[n++] = nxgrid; + rlist[n++] = nygrid; + rlist[n++] = nzgrid; + rlist[n++] = seed; + + // store global grid values + + for (int iz = 0; iz < nzgrid; iz++) + for (int iy = 0; iy < nygrid; iy++) + for (int ix = 0; ix < nxgrid; ix++) + rlist[n++] = T_electron[iz][iy][ix]; + + if (comm->me == 0) { + int size = n * sizeof(double); + fwrite(&size,sizeof(int),1,fp); + fwrite(rlist,sizeof(double),n,fp); + } + + memory->destroy(rlist); +} + +/* ---------------------------------------------------------------------- + use state info from restart file to restart the Fix +------------------------------------------------------------------------- */ + +void FixTTMThermal::restart(char *buf) +{ + int n = 0; + auto rlist = (double *) buf; + + // check that restart grid size is same as current grid size + + int nxgrid_old = static_cast (rlist[n++]); + int nygrid_old = static_cast (rlist[n++]); + int nzgrid_old = static_cast (rlist[n++]); + + if (nxgrid_old != nxgrid || nygrid_old != nygrid || nzgrid_old != nzgrid) + error->all(FLERR,"Must restart fix ttm with same grid size"); + + // change RN seed from initial seed, to avoid same Langevin factors + // just increment by 1, since for RanMars that is a new RN stream + + seed = static_cast (rlist[n++]) + 1; + delete random; + random = new RanMars(lmp,seed+comm->me); + + // restore global grid values + + for (int iz = 0; iz < nzgrid; iz++) + for (int iy = 0; iy < nygrid; iy++) + for (int ix = 0; ix < nxgrid; ix++) + T_electron[iz][iy][ix] = rlist[n++]; +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for restart file +------------------------------------------------------------------------- */ + +int FixTTMThermal::pack_restart(int i, double *buf) +{ + // pack buf[0] this way because other fixes unpack it + + buf[0] = 4; + buf[1] = flangevin[i][0]; + buf[2] = flangevin[i][1]; + buf[3] = flangevin[i][2]; + return 4; +} + +/* ---------------------------------------------------------------------- + unpack values from atom->extra array to restart the fix +------------------------------------------------------------------------- */ + +void FixTTMThermal::unpack_restart(int nlocal, int nth) +{ + double **extra = atom->extra; + + // skip to Nth set of extra values + // unpack the Nth first values this way because other fixes pack them + + int m = 0; + for (int i = 0; i < nth; i++) m += static_cast (extra[nlocal][m]); + m++; + + flangevin[nlocal][0] = extra[nlocal][m++]; + flangevin[nlocal][1] = extra[nlocal][m++]; + flangevin[nlocal][2] = extra[nlocal][m++]; +} + +/* ---------------------------------------------------------------------- + size of atom nlocal's restart data +------------------------------------------------------------------------- */ + +int FixTTMThermal::size_restart(int /*nlocal*/) +{ + return 4; +} + +/* ---------------------------------------------------------------------- + maxsize of any atom's restart data +------------------------------------------------------------------------- */ + +int FixTTMThermal::maxsize_restart() +{ + return 4; +} + +/* ---------------------------------------------------------------------- + return the energy of the electronic subsystem or the net_energy transfer + between the subsystems +------------------------------------------------------------------------- */ + +double FixTTMThermal::compute_vector(int n) +{ + if (outflag == 0) { + e_energy = 0.0; + transfer_energy = 0.0; + + int ix,iy,iz; + + double dx = domain->xprd/nxgrid; + double dy = domain->yprd/nygrid; + double dz = domain->zprd/nzgrid; + double del_vol = dx*dy*dz; + + for (iz = 0; iz < nzgrid; iz++) + for (iy = 0; iy < nygrid; iy++) + for (ix = 0; ix < nxgrid; ix++) { + e_energy += + T_electron[iz][iy][ix]*c_e_grid[iz][iy][ix]*del_vol; + transfer_energy += + net_energy_transfer_all[iz][iy][ix]*update->dt; + //printf("TRANSFER %d %d %d %g\n",ix,iy,iz,transfer_energy); + } + + //printf("TRANSFER %g\n",transfer_energy); + + outflag = 1; + } + + if (n == 0) return e_energy; + if (n == 1) return transfer_energy; + return 0.0; +} + +/* ---------------------------------------------------------------------- + memory usage for flangevin and 3d grids +------------------------------------------------------------------------- */ + +double FixTTMThermal::memory_usage() +{ + double bytes = 0.0; + bytes += (double) atom->nmax * 3 * sizeof(double); + bytes += (double) 4*ngridtotal * sizeof(int); + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate 3d grid quantities +------------------------------------------------------------------------- */ + +void FixTTMThermal::allocate_grid() +{ + memory->create(T_electron_old,nzgrid,nygrid,nxgrid,"ttm:T_electron_old"); + memory->create(T_electron,nzgrid,nygrid,nxgrid,"ttm:T_electron"); + memory->create(c_e_grid,nzgrid,nygrid,nxgrid,"ttm:c_e_grid"); + memory->create(k_e_grid,nzgrid,nygrid,nxgrid,"ttm:k_e_grid"); + memory->create(gamma_p_grid,nzgrid,nygrid,nxgrid,"ttm:gamma_p_grid"); + memory->create(inductive_response_grid,nzgrid,nygrid,nxgrid,"ttm:gamma_p_grid"); + memory->create(net_energy_transfer,nzgrid,nygrid,nxgrid, + "ttm:net_energy_transfer"); + memory->create(net_energy_transfer_all,nzgrid,nygrid,nxgrid, + "ttm:net_energy_transfer_all"); +} + +/* ---------------------------------------------------------------------- + deallocate 3d grid quantities +------------------------------------------------------------------------- */ + +void FixTTMThermal::deallocate_grid() +{ + memory->destroy(T_electron_old); + memory->destroy(T_electron); + memory->destroy(c_e_grid); + memory->destroy(k_e_grid); + memory->destroy(gamma_p_grid); + memory->destroy(inductive_response_grid); + memory->destroy(net_energy_transfer); + memory->destroy(net_energy_transfer_all); +} diff --git a/src/EXTRA-FIX/fix_ttm_thermal.h b/src/EXTRA-FIX/fix_ttm_thermal.h new file mode 100755 index 00000000000..d6e32e6067b --- /dev/null +++ b/src/EXTRA-FIX/fix_ttm_thermal.h @@ -0,0 +1,87 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(ttm/thermal,FixTTMThermal); +// clang-format on +#else + +#ifndef LMP_FIX_TTM_THERMAL_H +#define LMP_FIX_TTM_THERMAL_H + +#include "fix.h" + +namespace LAMMPS_NS { + +class FixTTMThermal : public Fix { + public: + FixTTMThermal(class LAMMPS *, int, char **); + ~FixTTMThermal() override; + void post_constructor() override; + int setmask() override; + void init() override; + void setup(int) override; + void post_force_setup(int); + void post_force(int) override; + void post_force_respa_setup(int, int, int); + void post_force_respa(int, int, int) override; + void end_of_step() override; + void grow_arrays(int) override; + void write_restart(FILE *) override; + void restart(char *) override; + int pack_restart(int, double *) override; + void unpack_restart(int, int) override; + int size_restart(int) override; + int maxsize_restart() override; + double compute_vector(int) override; + double memory_usage() override; + + protected: + int nlevels_respa; + int seed; + int nxgrid, nygrid, nzgrid; // size of global grid + int ngridtotal; // total size of global grid + int deallocate_flag; + int outflag, outevery; + double shift, tinit; + double e_energy, transfer_energy; + char *infile, *outfile, *e_property_file; + + class RanMars *random; + double inductive_power; + + double *gfactor1, *gfactor2, *ratio, **flangevin; + double ***T_electron, ***T_electron_old; + double ***net_energy_transfer, ***net_energy_transfer_all; + double ***T_atomic; + int ***nsum, ***nsum_all; + double ***sum_vsq, ***sum_vsq_all; + double ***sum_mass_vsq, ***sum_mass_vsq_all; + double ***gamma_p_grid; + double ***inductive_response_grid; + double ***c_e_grid; + double ***k_e_grid; + + + virtual void allocate_grid(); + virtual void deallocate_grid(); + virtual void read_electron_properties(const std::string &); + virtual void read_electron_temperatures(const std::string &); + virtual void write_electron_temperatures(const std::string &); +}; + +} // namespace LAMMPS_NS + +#endif +#endif From ee1c2f4713c2b1e1cc06cc7f5e8b732041974d49 Mon Sep 17 00:00:00 2001 From: user Date: Mon, 11 Aug 2025 17:49:04 -0500 Subject: [PATCH 036/604] Fixed a few typos in the docs --- doc/src/Commands_fix.rst | 1 + doc/src/fix.rst | 1 + doc/src/fix_ttm.rst | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index d008808ea3d..3dcd27d4c24 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -260,6 +260,7 @@ OPT. * :doc:`ttm ` * :doc:`ttm/grid ` * :doc:`ttm/mod ` + * :doc:`ttm/thermal ` * :doc:`tune/kspace ` * :doc:`vector ` * :doc:`viscosity ` diff --git a/doc/src/fix.rst b/doc/src/fix.rst index cfa9e4d34c5..b8a1e3db20d 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -439,6 +439,7 @@ accelerated styles exist. * :doc:`ttm ` - two-temperature model for electronic/atomic coupling (replicated grid) * :doc:`ttm/grid ` - two-temperature model for electronic/atomic coupling (distributed grid) * :doc:`ttm/mod ` - enhanced two-temperature model with additional options +* :doc:`ttm/thermal ` - a two-temperature model for thermal transport * :doc:`tune/kspace ` - auto-tune :math:`k`-space parameters * :doc:`vector ` - accumulate a global vector every *N* timesteps * :doc:`viscosity ` - Mueller-Plathe momentum exchange for viscosity calculation diff --git a/doc/src/fix_ttm.rst b/doc/src/fix_ttm.rst index e40e24915cf..14af7a6b5c7 100755 --- a/doc/src/fix_ttm.rst +++ b/doc/src/fix_ttm.rst @@ -393,7 +393,7 @@ have been removed: g_p (T_e - T_a) + \eta s where :math:`s` is the applied heating power density and :math:`\eta` is -the absorption efficency (0-1) defined for each ttm grid cell in the +the absorption efficiency (0-1) defined for each ttm grid cell in the *properties.in* file. Also note that compared to the original *fix ttm*, it uses use a volumetric specific heat, :math:`C_\mathrm{vol}` , which represents the product of :math:`C_e \rho_e`. From 56759d38ab83dd7c0073ffddd3ad2592bbe6b753 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:33:14 -0400 Subject: [PATCH 037/604] add new files in package to .gitignore --- src/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/.gitignore b/src/.gitignore index 56406ae4738..2f260c41ccd 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1745,6 +1745,8 @@ /fix_ttm_grid.h /fix_ttm_mod.cpp /fix_ttm_mod.h +/fix_ttm_thermal.cpp +/fix_ttm_thermal.h /granular_model.cpp /granular_model.h /gran_sub_mod_normal.cpp From 65e7e38b089da8b54a8537c48d562643ba5c001e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:35:59 -0400 Subject: [PATCH 038/604] replace new potential file with existing one --- examples/ttm/FeVoter-ChenRecheck.fs | 4606 --------------------------- examples/ttm/Fe_mm.eam.fs | 1 + examples/ttm/in.ttm.thermal | 2 +- 3 files changed, 2 insertions(+), 4607 deletions(-) delete mode 100644 examples/ttm/FeVoter-ChenRecheck.fs create mode 120000 examples/ttm/Fe_mm.eam.fs diff --git a/examples/ttm/FeVoter-ChenRecheck.fs b/examples/ttm/FeVoter-ChenRecheck.fs deleted file mode 100644 index 5623bfcec61..00000000000 --- a/examples/ttm/FeVoter-ChenRecheck.fs +++ /dev/null @@ -1,4606 +0,0 @@ - UNITS: metal DATE: 2016-04-04 - My transcription of Voter Chen from the potential file from Tim Germann - 4/4/2016 (Fembed, rhoatomic, phipair) -1 Fe -3000 2.7915816152066654e-6 10000 0.0004524 4.524 -26 55.85 2.87 bcc - 0 -0.79395541130871E0 -0.97373788441046E0 -0.10896006180792E1 -0.11759534709491E1 --0.12447984733467E1 -0.13018961056419E1 -0.13505005179897E1 -0.13926464893073E1 -0.14296982548672E1 --0.14626190981611E1 -0.14921160996901E1 -0.15187250288449E1 -0.15428621070094E1 -0.15648577598736E1 --0.15849789112574E1 -0.16034443822871E1 -0.16204361204241E1 -0.1636107061927E1 -0.16505869597989E1 --0.16639871993477E1 -0.16764038348153E1 -0.16879203472668E1 -0.16986100807586E1 -0.17085376629779E1 --0.17177599325665E1 -0.17263279769279E1 -0.17342872480389E1 -0.17416784832022E1 -0.17485385011835E1 --0.17549008289582E1 -0.17607954059781E1 -0.17662499208085E1 -0.17712896096966E1 -0.17759373804232E1 --0.17802144364243E1 -0.17841403783433E1 -0.1787732945564E1 -0.17910090691012E1 -0.17939838627922E1 --0.1796671785428E1 -0.1799086127315E1 -0.18012393577778E1 -0.18031133671671E1 -0.18046003093578E1 --0.18057098932139E1 -0.18064644261379E1 -0.18068845672559E1 -0.18069890298749E1 -0.18067953652118E1 --0.18063195558552E1 -0.18055764935024E1 -0.18045798377181E1 -0.18033424742025E1 -0.18018760550675E1 --0.1800191816349E1 -0.1798299895423E1 -0.17962099103763E1 -0.17939309845112E1 -0.17914712099674E1 --0.17888387525323E1 -0.17860408670916E1 -0.17830844700077E1 -0.17799761636234E1 -0.17767221070622E1 --0.17733279473156E1 -0.17697992626795E1 -0.17661412436743E1 -0.17623587329223E1 -0.17584563755726E1 --0.17544386265262E1 -0.17503093394205E1 -0.1746072887359E1 -0.174173259035E1 -0.17372922764117E1 --0.1732755283485E1 -0.17281248172223E1 -0.17234039679407E1 -0.17185957006217E1 -0.17137025596536E1 --0.17087275717331E1 -0.17036729777054E1 -0.16985414640841E1 -0.16933352547209E1 -0.16880564780777E1 --0.16827073973404E1 -0.16772900560579E1 -0.16718063694637E1 -0.16662581711845E1 -0.16606473898282E1 --0.16549757839179E1 -0.16492447973722E1 -0.16434563108117E1 -0.1637611657078E1 -0.16317123941462E1 --0.16257600004798E1 -0.16197558147978E1 -0.16137011444858E1 -0.16075972603296E1 -0.16014453995499E1 --0.15952467652173E1 -0.1589002528347E1 -0.15827138299404E1 -0.15763815711164E1 -0.15700069469862E1 --0.15635909333234E1 -0.15571344306349E1 -0.15506384283815E1 -0.15441038969763E1 -0.15375314933704E1 --0.15309222600597E1 -0.15242769918622E1 -0.15175964648332E1 -0.15108814376675E1 -0.15041326518757E1 --0.14973508320429E1 -0.14905366870669E1 -0.14836909058044E1 -0.14768141691118E1 -0.1469907139505E1 --0.14629703758517E1 -0.14560044671972E1 -0.14490101323441E1 -0.1441987794457E1 -0.14349381211267E1 --0.14278615880222E1 -0.14207587827735E1 -0.14136301195382E1 -0.14064762419748E1 -0.13992974911646E1 --0.13920943931718E1 -0.13848674517477E1 -0.13776171017031E1 -0.13703437660937E1 -0.13630477945996E1 --0.13557296904683E1 -0.13483898449455E1 -0.13410286522481E1 -0.13336464992978E1 -0.13262437566239E1 --0.13188207843313E1 -0.131137788857E1 -0.13039154871085E1 -0.1296433916383E1 -0.12889335345909E1 --0.12814145866646E1 -0.12738773983737E1 -0.12663223637609E1 -0.12587497021936E1 -0.12511597174437E1 --0.12435527795616E1 -0.1235929017924E1 -0.12282888867109E1 -0.12206324769036E1 -0.12129602034923E1 --0.12052722379659E1 -0.11975688376355E1 -0.11898503277262E1 -0.11821168227083E1 -0.11743686277441E1 --0.11666060056846E1 -0.11588291794975E1 -0.11510382479628E1 -0.11432335388256E1 -0.11354152614176E1 --0.11275836101127E1 -0.11197387864983E1 -0.11118809923523E1 -0.11040104136839E1 -0.10961272388801E1 --0.1088231661504E1 -0.10803238662282E1 -0.10724040245365E1 -0.10644723227941E1 -0.10565289373038E1 --0.1048574043073E1 -0.10406078035769E1 -0.10326303883614E1 -0.10246419363382E1 -0.10166425719205E1 --0.10086325143559E1 -0.10006119187903E1 -0.99258090773476E0 -0.98453957068765E0 -0.97648815195561E0 --0.96842677376409E0 -0.96035546603204E0 -0.9522745088491E0 -0.94418394221332E0 -0.93608392530594E0 --0.9279746207178E0 -0.91985606612037E0 -0.91172850488967E0 -0.90359191451101E0 -0.89544655735942E0 --0.88729243537507E0 -0.87912974463787E0 -0.87095857088822E0 -0.86277899085954E0 -0.85459120761419E0 --0.84639519322239E0 -0.8381911529234E0 -0.82997920950322E0 -0.82175934737788E0 -0.813531755936E0 - -0.805296549792E0 -0.79705379533626E0 -0.7888035351519E0 -0.78054593596043E0 -0.77228108600028E0 --0.76400907269621E0 -0.75572995776098E0 -0.747443812483E0 -0.73915076284561E0 -0.73085089256469E0 --0.72254428355471E0 -0.71423101615189E0 -0.70591116977814E0 -0.69758482166759E0 -0.68925205083668E0 --0.68091293402157E0 -0.67256754752303E0 -0.66421158403223E0 -0.65583067812319E0 -0.64742401709209E0 --0.63899204070781E0 -0.6305351839364E0 -0.62205387034781E0 -0.6135485180479E0 -0.60501953649207E0 --0.59646732749201E0 -0.58789228539804E0 -0.57929476356652E0 -0.57067496630881E0 -0.56203345908053E0 --0.55337061071586E0 -0.54468677946105E0 -0.53598231835682E0 -0.52725728153479E0 -0.51851225448089E0 --0.50974760688104E0 -0.50096366648295E0 -0.49216040314182E0 -0.48333844448316E0 -0.47449813382775E0 --0.46563953545981E0 -0.45676299684443E0 -0.44786899746824E0 -0.43895754632733E0 -0.43002901663422E0 --0.42108386939356E0 -0.41212192502099E0 -0.40314386004635E0 -0.39414975566028E0 -0.38513980105004E0 --0.37611447959895E0 -0.36707362577462E0 -0.35801795967776E0 -0.3489471957329E0 -0.33986205494689E0 --0.33076232102721E0 -0.32164862459804E0 -0.3125207659386E0 -0.30337939696328E0 -0.29422422372366E0 --0.28505594153086E0 -0.27587432869089E0 -0.26667989991963E0 -0.25747268565303E0 -0.24825287012179E0 --0.23902083532771E0 -0.22977635590324E0 -0.2205200138812E0 -0.21125183129736E0 -0.20197188425114E0 --0.19268056400437E0 -0.1833778451171E0 -0.17406390538864E0 -0.16473906221818E0 -0.15540335444593E0 --0.14605685688145E0 -0.136699900063E0 -0.12733262490725E0 -0.11795495215909E0 -0.10856722941178E0 --0.99169614309124E-1 -0.89762232229278E-1 -0.8034503379362E-1 -0.70918335524611E-1 -0.61482279936515E-1 --0.52036988338297E-1 -0.42582551387731E-1 -0.33119001249779E-1 -0.23646602439931E-1 -0.14165461234213E-1 --0.46756958895336E-2 0.48225703428742E-2 0.14329222845464E-1 0.23844262632323E-1 0.33367466818521E-1 - 0.42898716214859E-1 0.52437899348478E-1 0.61984920596885E-1 0.71539677047653E-1 0.81102054938355E-1 - 0.90671950643626E-1 0.10024927648289E0 0.10983393063518E0 0.11942580552375E0 0.12902481533664E0 - 0.13863088240408E0 0.14824388023496E0 0.15786371320489E0 0.16749029784154E0 0.1771235497297E0 - 0.18676336628784E0 0.19640965584211E0 0.20606235882675E0 0.21572137189559E0 0.22538661913291E0 - 0.23505802982716E0 0.24473551476305E0 0.25441897824488E0 0.26410834531613E0 0.27380355480829E0 - 0.2835045075848E0 0.29321159824905E0 0.3029273531564E0 0.31265214771259E0 0.32238585079406E0 - 0.33212836485611E0 0.34187963062685E0 0.3516395256441E0 0.36140788850889E0 0.37118460539049E0 - 0.38096954926881E0 0.39076262601961E0 0.40056373392939E0 0.41037289521628E0 0.42018989979465E0 - 0.43001462332746E0 0.43984694886826E0 0.44968679193928E0 0.45953417149325E0 0.46938893468917E0 - 0.47925092551049E0 0.48912004958807E0 0.49899622010939E0 0.50887953272795E0 0.51876969823827E0 - 0.52866661580579E0 0.53857025119313E0 0.54848067028998E0 0.55839758438064E0 0.56832092878908E0 - 0.57825071880538E0 0.58818687952471E0 0.59812922402108E0 0.6080776770441E0 0.61803237078673E0 - 0.62799299988538E0 0.63795947082261E0 0.64793199499564E0 0.65791022460468E0 0.66789407614338E0 - 0.67788376872066E0 0.68787893919222E0 0.69787956323923E0 0.70788578121809E0 0.71789726083952E0 - 0.72791407790358E0 0.73793619122856E0 0.74796337693536E0 0.75799581186545E0 0.768033225679E0 - 0.77807562240874E0 0.78812303354915E0 0.79817518684201E0 0.80823230437044E0 0.81829407373667E0 - 0.82836058204713E0 0.8384317384239E0 0.84850741330807E0 0.85858772567875E0 0.86867235717073E0 - 0.87876159619764E0 0.88885504692613E0 0.89895292448681E0 0.9090549765402E0 0.91916130375759E0 - 0.92927173360511E0 0.93938632268519E0 0.94950492409163E0 0.95962758277769E0 0.96975414866703E0 - 0.97988469356239E0 0.99001902634177E0 0.10001572708796E1 0.10102991989649E1 0.10204449470354E1 - 0.10305942768228E1 0.10407473644555E1 0.10509039383638E1 0.10610641675877E1 0.10712278368713E1 - 0.10813950109641E1 0.10915656040959E1 0.11017395588425E1 0.11119169074901E1 0.11220974829525E1 - 0.11322814307592E1 0.11424685366474E1 0.11526588471025E1 0.11628523327879E1 0.11730488490939E1 - 0.11832485620265E1 0.11934512350315E1 0.12036569210515E1 0.12138656170441E1 0.12240771516728E1 - 0.12342916928265E1 0.12445090413858E1 0.12547291779191E1 0.1264952202488E1 0.12751779329394E1 - 0.12854063661115E1 0.12956375678939E1 0.13058713773583E1 0.13161078048533E1 0.13263469022411E1 - 0.13365885053253E1 0.13468326465991E1 0.13570793460411E1 0.13673284981527E1 0.13775800321027E1 - 0.13878341040736E1 0.13980905104016E1 0.1408349247005E1 0.14186103522656E1 0.14288737827675E1 - 0.14391394450805E1 0.14494073469188E1 0.14596775352903E1 0.14699498943696E1 0.14802243936255E1 - 0.14905010438473E1 0.15007798600285E1 0.15110607301807E1 0.15213436549296E1 0.15316286284656E1 - 0.15419156612107E1 0.15522046557072E1 0.1562495606962E1 0.15727884925663E1 0.15830833613918E1 - 0.15933800938582E1 0.16036787027682E1 0.16139791251548E1 0.16242814378901E1 0.16345855451765E1 - 0.16448914444559E1 0.16551990746798E1 0.16655084518347E1 0.16758195692811E1 0.16861324079574E1 - 0.16964469016446E1 0.1706763057513E1 0.17170808261603E1 0.1727400230825E1 0.17377212534386E1 - 0.17480438645722E1 0.17583680069593E1 0.17686937008044E1 0.17790208909363E1 0.17893495928111E1 - 0.17996797991771E1 0.18100114790812E1 0.18203445940464E1 0.18306791341E1 0.18410150813774E1 - 0.1851352414409E1 0.18616911240245E1 0.18720311986047E1 0.18823726369522E1 0.18927153878824E1 - 0.19030594521572E1 0.19134047956671E1 0.19237514199224E1 0.19340992924735E1 0.19444484144087E1 - 0.19547987471425E1 0.19651503108247E1 0.19755030709105E1 0.19858570292776E1 0.19962121337208E1 - 0.20065684027983E1 0.20169257904004E1 0.2027284311888E1 0.20376439264438E1 0.20480046411158E1 - 0.20583664273591E1 0.20687292788198E1 0.20790931812529E1 0.20894581131612E1 0.20998240776469E1 - 0.21101910335528E1 0.21205590087433E1 0.2130927943884E1 0.21412978736149E1 0.21516687440983E1 - 0.21620405708448E1 0.21724133176904E1 0.2182786987152E1 0.21931615621411E1 0.22035370230788E1 - 0.22139133784281E1 0.22242905875759E1 0.22346686697062E1 0.22450475829135E1 0.22554273405722E1 - 0.22658079129266E1 0.22761892979358E1 0.22865714861712E1 0.22969544500947E1 0.23073382110401E1 - 0.23177227107886E1 0.23281079983888E1 0.23384939996079E1 0.23488807613585E1 0.23592682237482E1 - 0.23696564140837E1 0.23800453042147E1 0.23904348859371E1 0.24008251588699E1 0.24112160863593E1 - 0.24216076996439E1 0.24319999455665E1 0.24423928462361E1 0.24527863736931E1 0.24631805202639E1 - 0.24735752930914E1 0.24839706520934E1 0.2494366627692E1 0.25047631657503E1 0.25151603025274E1 - 0.2525557990417E1 0.25359562443309E1 0.25463550488811E1 0.25567543936729E1 0.25671542926928E1 - 0.25775546967019E1 0.25879556289659E1 0.25983570624095E1 0.26087589883386E1 0.2619161420767E1 - 0.26295643125512E1 0.26399677053108E1 0.26503715372352E1 0.26607758584069E1 0.26711806202537E1 - 0.26815858290069E1 0.26919914771153E1 0.27023975418303E1 0.27128040389617E1 0.27232109403048E1 - 0.27336182444855E1 0.27440259627964E1 0.27544340582846E1 0.2764842565318E1 0.27752514248206E1 - 0.27856606663169E1 0.27960702669765E1 0.28064802090847E1 0.28168905309552E1 0.28273011774577E1 - 0.28377121790417E1 0.28481234958427E1 0.28585351329218E1 0.28689470966737E1 0.28793593553972E1 - 0.28897719425624E1 0.2900184821216E1 0.29105979797661E1 0.29210114421587E1 0.29314251570459E1 - 0.29418391712041E1 0.29522534436256E1 0.29626679766507E1 0.29730827752863E1 0.29834978006745E1 - 0.29939130923263E1 0.30043286123361E1 0.30147443712076E1 0.30251603587637E1 0.3035576554868E1 - 0.30459929763643E1 0.30564096119054E1 0.30668264453823E1 0.30772434913258E1 0.30876607112862E1 - 0.30980781365294E1 0.31084957464564E1 0.3118913524441E1 0.31293314944836E1 0.3139749605835E1 - 0.31501679091085E1 0.31605863607657E1 0.31710049606978E1 0.31814237222113E1 0.31918426139312E1 - 0.32022616569194E1 0.32126808349305E1 0.32231001310391E1 0.32335195649734E1 0.32439391207332E1 - 0.32543587805765E1 0.32647785729666E1 0.32751984482426E1 0.32856184485823E1 0.32960385421209E1 - 0.33064587110352E1 0.33168790040062E1 0.33272993567334E1 0.33377198025509E1 0.33481403273358E1 - 0.33585609130199E1 0.33689815872498E1 0.33794023141066E1 0.33898230952229E1 0.34002439563952E1 - 0.34106648554729E1 0.34210858036944E1 0.34315068103388E1 0.34419278374734E1 0.34523489240921E1 - 0.34627700370515E1 0.34731911699568E1 0.34836123531215E1 0.34940335357965E1 0.35044547514516E1 - 0.35148759854201E1 0.3525297216749E1 0.35357184764322E1 0.35461397302105E1 0.35565609824922E1 - 0.35669822473184E1 0.35774034970423E1 0.35878247338951E1 0.35982459785576E1 0.36086671895052E1 - 0.36190883890512E1 0.36295095789644E1 0.3639930717448E1 0.36503518561494E1 0.36607729553125E1 - 0.36711940057673E1 0.36816150454191E1 0.36920360250154E1 0.37024569663252E1 0.3712877871199E1 - 0.37232987135137E1 0.37337195104943E1 0.37441402564535E1 0.37545609355815E1 0.37649815615928E1 - 0.37754021268955E1 0.37858226101559E1 0.37962430441091E1 0.38066634018433E1 0.38170836682288E1 - 0.38275038837561E1 0.38379240047511E1 0.3848344037173E1 0.38587640040426E1 0.38691838628013E1 - 0.38796036394904E1 0.38900233332936E1 0.39004429124551E1 0.39108624085131E1 0.39212818044331E1 - 0.39317010824461E1 0.394212027292E1 0.39525393531062E1 0.39629583072219E1 0.39733771694175E1 - 0.39837959095427E1 0.3994214519924E1 0.40046330375145E1 0.40150514115648E1 0.40254696567419E1 - 0.40358878045566E1 0.40463057988174E1 0.40567236687493E1 0.40671414160666E1 0.40775590141566E1 - 0.40879764806251E1 0.40983938151183E1 0.41088109982072E1 0.41192280363955E1 0.41296449466902E1 - 0.41400616877571E1 0.41504782896922E1 0.41608947474885E1 0.4171311035404E1 0.4181727181657E1 - 0.41921431679614E1 0.42025589891198E1 0.42129746488329E1 0.42233901607882E1 0.42338054835153E1 - 0.42442206562261E1 0.42546356695596E1 0.42650504795424E1 0.4275465154475E1 0.42858796324786E1 - 0.42962939352953E1 0.4306708070136E1 0.43171220233407E1 0.43275357885689E1 0.43379493771952E1 - 0.43483627900903E1 0.43587759939201E1 0.43691890355973E1 0.43796018726991E1 0.43900145207634E1 - 0.44004269717626E1 0.44108392488539E1 0.44212513003354E1 0.44316631743441E1 0.44420748549615E1 - 0.44524863116562E1 0.44628975890822E1 0.44733086570893E1 0.44837195109971E1 0.44941301638399E1 - 0.45045406248967E1 0.45149508454863E1 0.45253608843855E1 0.45357706931578E1 0.45461803033142E1 - 0.45565896733571E1 0.4566998859968E1 0.45774078011865E1 0.45878165338568E1 0.4598225050006E1 - 0.46086333499606E1 0.46190413983459E1 0.46294492660202E1 0.46398568720025E1 0.46502642564568E1 - 0.4660671430703E1 0.46710783716387E1 0.46814850551001E1 0.46918915510478E1 0.4702297776623E1 - 0.47127037843981E1 0.47231095515157E1 0.47335150959623E1 0.474392037371E1 0.4754325447862E1 - 0.47647302538729E1 0.47751348374578E1 0.47855391529832E1 0.47959432640421E1 0.48063470946059E1 - 0.48167506930565E1 0.482715405221E1 0.48375571595493E1 0.48479600002984E1 0.48583626236285E1 - 0.48687649763684E1 0.4879167064042E1 0.48895689345959E1 0.48999705250901E1 0.49103718646679E1 - 0.49207729498464E1 0.49311737955645E1 0.49415743452226E1 0.49519746728629E1 0.49623747185115E1 - 0.49727745155023E1 0.49831740334129E1 0.49935733115639E1 0.50039723194241E1 0.50143710449932E1 - 0.50247695350749E1 0.50351677410151E1 0.50455656856715E1 0.50559633490708E1 0.50663607741255E1 - 0.50767578906029E1 0.50871547597015E1 0.50975513674618E1 0.51079476746449E1 0.51183437272382E1 - 0.51287395077448E1 0.51391350163822E1 0.51495302263473E1 0.51599251899403E1 0.51703198623714E1 - 0.51807142492733E1 0.51911083801703E1 0.52015022119052E1 0.5211895781295E1 0.52222890415633E1 - 0.52326820519493E1 0.52430747725338E1 0.52534671856568E1 0.5263859361061E1 0.52742512164319E1 - 0.52846427930492E1 0.52950340857428E1 0.53054251058326E1 0.53158158317127E1 0.53262062484441E1 - 0.53365964211709E1 0.53469862670887E1 0.53573758260352E1 0.53677651287319E1 0.53781541050006E1 - 0.53885428093458E1 0.5398931206828E1 0.54093193272438E1 0.54197071588106E1 0.54300946671876E1 - 0.54404819166208E1 0.5450868854021E1 0.5461255486327E1 0.5471641848587E1 0.54820278959022E1 - 0.5492413652627E1 0.55027990971281E1 0.55131842671206E1 0.5523569145543E1 0.55339536782173E1 - 0.55443379528187E1 0.55547219297002E1 0.55651055676365E1 0.55754889396629E1 0.55858720047448E1 - 0.55962547507186E1 0.56066372167143E1 0.56170193611801E1 0.56274012111914E1 0.56377827623398E1 - 0.56481639860535E1 0.56585449366735E1 0.56689255670426E1 0.567930587463E1 0.5689685911709E1 - 0.57000656205096E1 0.57104450149096E1 0.5720824123073E1 0.57312029088901E1 0.57415813904566E1 - 0.57519595591464E1 0.57623374197487E1 0.57727149861141E1 0.57830922222692E1 0.57934691440238E1 - 0.58038457894194E1 0.58142220945365E1 0.58245980768447E1 0.58349737894879E1 0.58453491618254E1 - 0.58557242129595E1 0.58660989760337E1 0.58764734139861E1 0.58868475328305E1 0.58972213524583E1 - 0.5907594840107E1 0.59179680250832E1 0.5928340907115E1 0.59387134362694E1 0.59490856791326E1 - 0.59594576216635E1 0.59698292025822E1 0.5980200485264E1 0.59905714870152E1 0.60009421253854E1 - 0.60113124449741E1 0.60216824928025E1 0.60320521900392E1 0.60424215555962E1 0.6052790632413E1 - 0.60631593879993E1 0.60735278005042E1 0.60838959120238E1 0.6094263709415E1 0.61046311699835E1 - 0.61149983233897E1 0.61253651535517E1 0.61357316552721E1 0.61460978495037E1 0.61564637301783E1 - 0.61668292479889E1 0.61771944829642E1 0.6187559405607E1 0.61979239617396E1 0.62082882152541E1 - 0.62186521761916E1 0.62290157834898E1 0.62393790519789E1 0.62497420384762E1 0.62601047004504E1 - 0.62704669927305E1 0.62808289900662E1 0.62911906949882E1 0.63015520271685E1 0.63119130368664E1 - 0.63222737570623E1 0.6332634143592E1 0.63429941772987E1 0.63533539016933E1 0.63637133335383E1 - 0.63740723941925E1 0.63844311400108E1 0.63947895693606E1 0.64051476813982E1 0.64155054493476E1 - 0.64258628959576E1 0.64362200299359E1 0.64465768234179E1 0.64569332919958E1 0.64672894340913E1 - 0.64776452565889E1 0.64880007492844E1 0.64983559089939E1 0.65087107453232E1 0.65190652620274E1 - 0.65294194419905E1 0.65397732928317E1 0.65501268313677E1 0.65604800279108E1 0.65708328949238E1 - 0.65811854418491E1 0.65915376622921E1 0.66018895473818E1 0.66122411048734E1 0.66225923447808E1 - 0.66329432456341E1 0.66432938164676E1 0.66536440656623E1 0.66639939914224E1 0.66743435729582E1 - 0.66846928317396E1 0.6695041768202E1 0.67053903745084E1 0.67157386412438E1 0.67260865893065E1 - 0.67364342038391E1 0.67467814896035E1 0.67571284505726E1 0.67674750764334E1 0.67778213685464E1 - 0.67881673394831E1 0.67985129888084E1 0.6808858290127E1 0.68192032612082E1 0.68295479176711E1 - 0.68398922510258E1 0.68502362274758E1 0.68605798816612E1 0.68709232171739E1 0.68812662355649E1 - 0.68916088901396E1 0.69019512251979E1 0.6912293236223E1 0.6922634940385E1 0.69329762777102E1 - 0.69433172850427E1 0.69536579713351E1 0.69639983566695E1 0.69743383833304E1 0.69846780614676E1 - 0.69950174231091E1 0.70053564875749E1 0.70156952055307E1 0.70260336424744E1 0.70363721732157E1 - 0.70467108593886E1 0.70570496667883E1 0.706738861166E1 0.70777277056501E1 0.70880669387178E1 - 0.70984062853385E1 0.71087457584379E1 0.71190853723563E1 0.71294251204541E1 0.71397649761185E1 - 0.71501049478396E1 0.71604450385561E1 0.71707852691392E1 0.71811256033433E1 0.71914660399439E1 - 0.72018065780975E1 0.72121472542945E1 0.72224880344663E1 0.72328289017145E1 0.72431698653727E1 - 0.72535109525163E1 0.72638521389815E1 0.72741934048767E1 0.72845347667466E1 0.72948762393124E1 - 0.73052177923423E1 0.73155594276323E1 0.73259011541077E1 0.73362429828436E1 0.73465848822746E1 - 0.73569268483192E1 0.73672689110187E1 0.73776110626178E1 0.73879532808848E1 0.73982955662007E1 - 0.74086379203071E1 0.74189803638029E1 0.74293228735857E1 0.74396654460907E1 0.74500080740526E1 - 0.74603507778254E1 0.74706935471233E1 0.74810363774019E1 0.7491379252144E1 0.75017221909417E1 - 0.75120652014686E1 0.75224082526222E1 0.75327513446745E1 0.75430944956395E1 0.75534377176262E1 - 0.7563780968838E1 0.75741242519688E1 0.75844675852501E1 0.7594810990941E1 0.76051544162785E1 - 0.76154978707323E1 0.76258413703297E1 0.76361849200233E1 0.76465284943433E1 0.76568720936433E1 - 0.76672157323998E1 0.76775594077557E1 0.76879031069216E1 0.76982468311763E1 0.77085905755131E1 - 0.77189343528845E1 0.7729278158583E1 0.77396219797375E1 0.77499658112052E1 0.77603096659114E1 - 0.77706535489175E1 0.77809974466721E1 0.77913413425764E1 0.78016852521408E1 0.78120291933248E1 - 0.78223731390532E1 0.78327170784685E1 0.78430610199246E1 0.78534049958848E1 0.78637489692055E1 - 0.78740929296552E1 0.78844368892342E1 0.78947808716435E1 0.79051248499439E1 0.79154688119446E1 - 0.79258127676051E1 0.79361567306944E1 0.79465006938924E1 0.7956844636301E1 0.79671885734033E1 - 0.79775324939339E1 0.79878764211313E1 0.79982203276752E1 0.80085642180011E1 0.80189080845443E1 - 0.8029251948965E1 0.8039595800433E1 0.80499396289074E1 0.80602834223608E1 0.80706271978546E1 - 0.80809709735933E1 0.80913147174403E1 0.81016584227559E1 0.81120020977853E1 0.81223457791277E1 - 0.81326894191087E1 0.81430330144359E1 0.81533765765107E1 0.8163720126939E1 0.81740636483328E1 - 0.81844071205828E1 0.81947505535939E1 0.82050939617066E1 0.82154373431249E1 0.82257806707463E1 - 0.82361239543669E1 0.82464672051161E1 0.82568104217422E1 0.82671535926531E1 0.82774967190462E1 - 0.82878397951526E1 0.82981828263058E1 0.83085258212406E1 0.83188687635214E1 0.83292116509167E1 - 0.83395544858757E1 0.83498972825755E1 0.8360240034322E1 0.83705827210605E1 0.8380925342274E1 - 0.8391267918643E1 0.84016104567529E1 0.84119529228069E1 0.84222953199073E1 0.84326376660135E1 - 0.84429799711937E1 0.84533222077468E1 0.84636643693597E1 0.84740064666449E1 0.84843485186503E1 - 0.84946905045591E1 0.85050324119359E1 0.8515374250223E1 0.85257160335974E1 0.85360577550387E1 - 0.85463993989003E1 0.85567409729919E1 0.85670824727691E1 0.85774239083274E1 0.85877652716667E1 - 0.85981065563922E1 0.86084477632932E1 0.86187888945736E1 0.86291299594311E1 0.8639470952015E1 - 0.86498118597266E1 0.86601526772868E1 0.86704934283968E1 0.86808341153151E1 0.8691174702939E1 - 0.87015151973638E1 0.87118556093926E1 0.87221959600324E1 0.87325362248746E1 0.87428763895245E1 - 0.87532164619278E1 0.87635564677447E1 0.87738963924717E1 0.87842362170063E1 0.87945759418383E1 - 0.88049155815803E1 0.88152551450215E1 0.88255946099847E1 0.88359339728507E1 0.8846273244368E1 - 0.88566124279925E1 0.88669515241868E1 0.88772905196716E1 0.88876294181207E1 0.88979682144725E1 - 0.89083069245642E1 0.89186455370759E1 0.8928984054783E1 0.89393224569233E1 0.89496607512792E1 - 0.89599989632935E1 0.8970337078027E1 0.89806750805679E1 0.89910129695828E1 0.90013507620789E1 - 0.90116884672808E1 0.90220260627119E1 0.90323635387395E1 0.90427009001253E1 0.90530381819451E1 - 0.90633753578037E1 0.90737124107211E1 0.90840493428199E1 0.90943861678963E1 0.9104722902903E1 - 0.91150595193768E1 0.91253960120116E1 0.9135732391719E1 0.91460686692204E1 0.9156404839019E1 - 0.91667408863936E1 0.917707681763E1 0.91874126307688E1 0.91977483361233E1 0.92080839275981E1 - 0.92184193993156E1 0.92287547505409E1 0.92390899738455E1 0.92494250923861E1 0.92597600919313E1 - 0.92700949681273E1 0.92804297097628E1 0.92907643275593E1 0.93010988408912E1 0.93114332378672E1 - 0.93217674986229E1 0.93321016249543E1 0.93424356344399E1 0.93527695374745E1 0.93631033086427E1 - 0.93734369405974E1 0.93837704408489E1 0.93941038319555E1 0.94044371025699E1 0.94147702340253E1 - 0.94251032254236E1 0.94354360928468E1 0.94457688458189E1 0.94561014670877E1 0.94664339455511E1 - 0.94767662926651E1 0.94870985031779E1 0.94974305923278E1 0.95077625484684E1 0.95180943628648E1 - 0.95284260457486E1 0.95387575896637E1 0.954908900759E1 0.95594202921804E1 0.95697514404962E1 - 0.95800824417344E1 0.95904133052556E1 0.96007440447745E1 0.9611074650114E1 0.96214051086752E1 - 0.96317354180907E1 0.96420655909866E1 0.96523956427102E1 0.96627255534787E1 0.96730553088157E1 - 0.96833849146805E1 0.96937143932231E1 0.97040437421034E1 0.97143729396883E1 0.97247019827397E1 - 0.973503087904E1 0.9745359647474E1 0.97556882788985E1 0.97660167533949E1 0.97763450726503E1 - 0.97866732549506E1 0.9797001295398E1 0.98073291963053E1 0.98176569395225E1 0.98279845351723E1 - 0.98383119784927E1 0.98486392772661E1 0.98589664360097E1 0.98692934380428E1 0.98796202937058E1 - 0.98899469882501E1 0.99002735363427E1 0.99105999479156E1 0.99209262080571E1 0.99312523050935E1 - 0.9941578243569E1 0.99519040352119E1 0.99622296933556E1 0.99725551920949E1 0.99828805237562E1 - 0.99932056949247E1 0.10003530722825E2 0.10013855618016E2 0.10024180341584E2 0.10034504899489E2 - 0.10044829298425E2 0.10055153555617E2 0.10065477669514E2 0.10075801611516E2 0.10086125387393E2 - 0.10096449007212E2 0.10106772479437E2 0.10117095802924E2 0.10127418954708E2 0.10137741943005E2 - 0.10148064775081E2 0.10158387448434E2 0.10168709974926E2 0.10179032330695E2 0.10189354525126E2 - 0.10199676554875E2 0.10209998421817E2 0.1022032014141E2 0.10230641695912E2 0.10240963086002E2 - 0.10251284302428E2 0.10261605356757E2 0.10271926264307E2 0.10282247010167E2 0.10292567582528E2 - 0.10302887979837E2 0.10313208212898E2 0.10323528303753E2 0.10333848229189E2 0.10344167974568E2 - 0.10354487546011E2 0.10364806952064E2 0.10375126218467E2 0.10385445312214E2 0.10395764226067E2 - 0.10406082964297E2 0.10416401541572E2 0.10426719968694E2 0.10437038223351E2 0.1044735629765E2 - 0.10457674199375E2 0.10467991938348E2 0.10478309514896E2 0.10488626923088E2 0.10498944148528E2 - 0.10509261207243E2 0.10519578092187E2 0.10529894810705E2 0.10540211363887E2 0.10550527743814E2 - 0.10560843950867E2 0.10571159976475E2 0.1058147583348E2 0.10591791527653E2 0.10602107054094E2 - 0.10612422400287E2 0.10622737563484E2 0.10633052551847E2 0.10643367384289E2 0.10653682047449E2 - 0.10663996525408E2 0.10674310818874E2 0.10684624936091E2 0.10694938896762E2 0.10705252690218E2 - 0.10715566294936E2 0.10725879714426E2 0.10736192956471E2 0.10746506037696E2 0.10756818954346E2 - 0.10767131679374E2 0.10777444219555E2 0.10787756583869E2 0.10798068778056E2 0.10808380808459E2 - 0.10818692651157E2 0.10829004306396E2 0.1083931578936E2 0.10849627089408E2 0.108599382211E2 - 0.10870249165522E2 0.10880559929533E2 0.10890870513732E2 0.10901180909709E2 0.10911491138172E2 - 0.10921801188319E2 0.10932111061266E2 0.1094242074757E2 0.10952730244945E2 0.10963039566316E2 - 0.10973348718542E2 0.10983657692505E2 0.10993966476479E2 0.11004275068721E2 0.11014583480442E2 - 0.11024891725957E2 0.11035199797971E2 0.11045507673901E2 0.11055815359296E2 0.11066122857803E2 - 0.11076430191381E2 0.1108673735478E2 0.11097044310801E2 0.1110735107158E2 0.11117657648627E2 - 0.11127964053834E2 0.11138270285107E2 0.11148576325191E2 0.11158882171313E2 0.11169187834139E2 - 0.11179493314377E2 0.11189798618693E2 0.11200103740284E2 0.11210408668375E2 0.11220713410857E2 - 0.11231017966832E2 0.11241322335403E2 0.1125162653382E2 0.11261930538873E2 0.112722343618E2 - 0.11282537983635E2 0.11292841405902E2 0.11303144658668E2 0.11313447727078E2 0.11323750607185E2 - 0.11334053290825E2 0.1134435577887E2 0.11354658088789E2 0.11364960226269E2 0.11375262175093E2 - 0.11385563924049E2 0.11395865477599E2 0.11406166844997E2 0.11416468042868E2 0.11426769056436E2 - 0.11437069869075E2 0.11447370478036E2 0.11457670885633E2 0.11467971126561E2 0.11478271182559E2 - 0.11488571039207E2 0.11498870698573E2 0.11509170169091E2 0.11519469454898E2 0.11529768559586E2 - 0.11540067475863E2 0.11550366191863E2 0.11560664715724E2 0.11570963054466E2 0.11581261199734E2 - 0.11591559169108E2 0.11601856921852E2 0.1161215448711E2 0.11622451855738E2 0.1163274902722E2 - 0.11643046021582E2 0.11653342828349E2 0.11663639441697E2 0.11673935859886E2 0.11684232078097E2 - 0.1169452810455E2 0.11704823955571E2 0.11715119622181E2 0.11725415085494E2 0.11735710327823E2 - 0.11746005374421E2 0.11756300245614E2 0.11766594933429E2 0.11776889422764E2 0.11787183709211E2 - 0.11797477795247E2 0.11807771695891E2 0.1181806542162E2 0.11828358953359E2 0.11838652279679E2 - 0.1184894540575E2 0.11859238323845E2 0.11869531057066E2 0.11879823604405E2 0.11890115950951E2 - 0.1190040809431E2 0.11910700045339E2 0.11920991806786E2 0.11931283379005E2 0.11941574762921E2 - 0.11951865943432E2 0.11962156931422E2 0.11972447713639E2 0.11982738281649E2 0.11993028675396E2 - 0.12003318868922E2 0.1201360886873E2 0.12023898670957E2 0.12034188270812E2 0.12044477677738E2 - 0.12054766902538E2 0.12065055935199E2 0.12075344769325E2 0.12085633380317E2 0.12095921783442E2 - 0.12106210003979E2 0.12116498043042E2 0.12126785882238E2 0.12137073516015E2 0.12147360946985E2 - 0.12157648182659E2 0.12167935238384E2 0.12178222109752E2 0.12188508758993E2 0.12198795189183E2 - 0.12209081419749E2 0.12219367467809E2 0.12229653330451E2 0.12239938994258E2 0.12250224451967E2 - 0.1226050970812E2 0.12270794770175E2 0.12281079639736E2 0.12291364313239E2 0.12301648768425E2 - 0.12311933019009E2 0.12322217078868E2 0.12332500939566E2 0.1234278460701E2 0.12353068085606E2 - 0.12363351363164E2 0.12373634442161E2 0.12383917314106E2 0.12394199963326E2 0.12404482426147E2 - 0.12414764692666E2 0.12425046766365E2 0.12435328639978E2 0.1244561030654E2 0.12455891772744E2 - 0.1246617305168E2 0.12476454140952E2 0.12486735003716E2 0.12497015659265E2 0.12507296111001E2 - 0.12517576365373E2 0.1252785643702E2 0.12538136322405E2 0.12548415998698E2 0.12558695466524E2 - 0.1256897471587E2 0.12579253760478E2 0.12589532627585E2 0.12599811290797E2 0.1261008974636E2 - 0.12620367996993E2 0.12630646048085E2 0.1264092391169E2 0.1265120159238E2 0.12661479034808E2 - 0.12671756268387E2 0.12682033303977E2 0.12692310142801E2 0.12702586788615E2 0.12712863241146E2 - 0.12723139492639E2 0.12733415539366E2 0.12743691370525E2 0.12753966985707E2 0.12764242408368E2 - 0.12774517637719E2 0.12784792668836E2 0.12795067500003E2 0.12805342131302E2 0.12815616555277E2 - 0.12825890756283E2 0.12836164764631E2 0.12846438578963E2 0.12856712188038E2 0.12866985589736E2 - 0.12877258786967E2 0.12887531787079E2 0.12897804613038E2 0.12908077205278E2 0.12918349585678E2 - 0.12928621758644E2 0.12938893726818E2 0.12949165508104E2 0.12959437109197E2 0.12969708504543E2 - 0.12979979678444E2 0.12990250622408E2 0.13000521367266E2 0.13010791925536E2 0.13021062292536E2 - 0.13031332457329E2 0.13041602413166E2 0.13051872162685E2 0.13062141691112E2 0.1307241101861E2 - 0.13082680153179E2 0.1309294908467E2 0.13103217811259E2 0.13113486338388E2 0.13123754668098E2 - 0.13134022770811E2 0.13144290677167E2 0.13154558376797E2 0.13164825875607E2 0.1317509317474E2 - 0.13185360270387E2 0.13195627162383E2 0.13205893853531E2 0.13216160322717E2 0.13226426594429E2 - 0.13236692659879E2 0.13246958518495E2 0.13257224178032E2 0.13267489649294E2 0.13277754915137E2 - 0.1328801995156E2 0.1329828478115E2 0.13308549404395E2 0.13318813828181E2 0.13329078063882E2 - 0.13339342119525E2 0.13349605938021E2 0.13359869528621E2 0.13370132915435E2 0.13380396106273E2 - 0.13390659110195E2 0.13400921926223E2 0.13411184532424E2 0.13421446895327E2 0.13431709044567E2 - 0.13441971001802E2 0.13452232771824E2 0.13462494347447E2 0.13472755715665E2 0.13483016867716E2 - 0.13493277789041E2 0.13503538510607E2 0.13513799037911E2 0.13524059370708E2 0.13534319503474E2 - 0.13544579429052E2 0.13554839131878E2 0.13565098615244E2 0.13575357900239E2 0.13585616991679E2 - 0.13595875887769E2 0.13606134582929E2 0.13616393068072E2 0.13626651309809E2 0.13636909348277E2 - 0.13647167200074E2 0.13657424863025E2 0.13667682325291E2 0.13677939580479E2 0.13688196586546E2 - 0.13698453384854E2 0.13708709996084E2 0.13718966420236E2 0.13729222647104E2 0.13739478663765E2 - 0.13749734437972E2 0.13759989996257E2 0.13770245370048E2 0.13780500554993E2 0.13790755540478E2 - 0.13801010317251E2 0.13811264860006E2 0.13821519179492E2 0.13831773310675E2 0.13842027254371E2 - 0.13852281001737E2 0.13862534539439E2 0.13872787843123E2 0.13883040930069E2 0.13893293818779E2 - 0.13903546516874E2 0.13913799022175E2 0.13924051322031E2 0.13934303387052E2 0.13944555237512E2 - 0.13954806885654E2 0.13965058339507E2 0.1397530960084E2 0.13985560661216E2 0.1399581148703E2 - 0.14006062098963E2 0.1401631250613E2 0.14026562713429E2 0.14036812727935E2 0.14047062552233E2 - 0.14057312136935E2 0.14067561506668E2 0.14077810672178E2 0.14088059636052E2 0.14098308404956E2 - 0.14108556988551E2 0.14118805326835E2 0.14129053458176E2 0.14139301383119E2 0.14149549103977E2 - 0.14159796628095E2 0.14170043959966E2 0.14180291057002E2 0.14190537946687E2 0.14200784631878E2 - 0.14211031114889E2 0.14221277397488E2 0.14231523463348E2 0.14241769320632E2 0.14252014972743E2 - 0.14262260418591E2 0.1427250566185E2 0.14282750698575E2 0.14292995580331E2 0.14303240948491E2 - 0.14313486891491E2 0.14323733434775E2 0.14333980579295E2 0.1434422845825E2 0.14354476979667E2 - 0.14364726110619E2 0.14374975840902E2 0.14385226157047E2 0.14395477107897E2 0.14405728776993E2 - 0.14415981025011E2 0.1442623385889E2 0.14436487279991E2 0.14446741280968E2 0.14456996015019E2 - 0.1446725133466E2 0.1447750722057E2 0.1448776367411E2 0.14498020698953E2 0.14508278395917E2 - 0.1451853673167E2 0.14528795622399E2 0.14539055063886E2 0.14549315054092E2 0.14559575640499E2 - 0.14569836930185E2 0.14580098767364E2 0.14590361143057E2 0.14600624054134E2 0.14610887509983E2 - 0.14621151662716E2 0.14631416373011E2 0.14641681624405E2 0.14651947404924E2 0.14662213708583E2 - 0.14672480676199E2 0.14682748192398E2 0.14693016243572E2 0.14703284827002E2 0.14713553931938E2 - 0.14723823664777E2 0.14734093952592E2 0.14744364756472E2 0.14754636073559E2 0.14764907899635E2 - 0.14775180355655E2 0.14785453367603E2 0.14795726875752E2 0.14806000884182E2 0.14816275396023E2 - 0.14826550495767E2 0.1483682616763E2 0.14847102331339E2 0.14857378985126E2 0.1486765612899E2 - 0.1487793385096E2 0.14888212118246E2 0.14898490870576E2 0.14908770109445E2 0.1491904983295E2 - 0.14929330133608E2 0.14939610965433E2 0.14949892263389E2 0.14960174026932E2 0.14970456258648E2 - 0.14980739088606E2 0.14991022436533E2 0.15001306239298E2 0.15011590493909E2 0.15021875201182E2 - 0.1503216049037E2 0.1504244628569E2 0.15052732539114E2 0.15063019240438E2 0.15073306383675E2 - 0.15083594116991E2 0.15093882297391E2 0.15104170930724E2 0.15114460019168E2 0.1512474957524E2 - 0.15135039690642E2 0.1514533023408E2 0.15155621210722E2 0.15165912622748E2 0.1517620452417E2 - 0.1518649696914E2 0.15196789835207E2 0.15207083118968E2 0.15217376820969E2 0.15227671044612E2 - 0.15237965744047E2 0.15248260867163E2 0.15258556410559E2 0.15268852366072E2 0.15279148866629E2 - 0.15289445797263E2 0.1529974313389E2 0.1531004087243E2 0.15320339055196E2 0.1533063776872E2 - 0.1534093688198E2 0.15351236392934E2 0.15361536300359E2 0.15371836688744E2 0.15382137527615E2 - 0.1539243876867E2 0.15402740411094E2 0.15413042448218E2 0.15423345012292E2 0.15433647956644E2 - 0.15443951279916E2 0.15454254985644E2 0.15464559150292E2 0.15474863791411E2 0.15485168803422E2 - 0.15495474184012E2 0.15505779934404E2 0.15516086178683E2 0.1552639282297E2 0.15536699848352E2 - 0.1554700724966E2 0.15557315058051E2 0.15567623316246E2 0.15577931933631E2 0.15588240914697E2 - 0.15598550264477E2 0.15608860111002E2 0.15619170327601E2 0.15629480894003E2 0.15639791810616E2 - 0.15650103130093E2 0.15660414894069E2 0.15670727012473E2 0.15681039482312E2 0.15691352299505E2 - 0.15701665584462E2 0.15711979222215E2 0.15722293202014E2 0.15732607520732E2 0.15742922249798E2 - 0.15753237393158E2 0.15763552869721E2 0.15773868678127E2 0.15784184844227E2 0.15794501449246E2 - 0.15804818380939E2 0.15815135644339E2 0.15825453242983E2 0.15835771276329E2 0.15846089663287E2 - 0.15856408368347E2 0.15866727387698E2 0.15877046780253E2 0.15887366572E2 0.15897686691781E2 - 0.15908007133063E2 0.15918327900883E2 0.15928649086126E2 0.15938970578177E2 0.15949292375676E2 - 0.15959614479303E2 0.15969937037905E2 0.15980259912024E2 0.15990583081385E2 0.16000906545175E2 - 0.16011230382304E2 0.1602155457223E2 0.16031879072637E2 0.16042203881078E2 0.16052529026667E2 - 0.16062854541108E2 0.16073180343581E2 0.16083506436127E2 0.16093832838884E2 0.1610415964641E2 - 0.16114486749452E2 0.16124814138621E2 0.16135141803851E2 0.16145469863917E2 0.16155798220997E2 - 0.16166126858761E2 0.1617645577245E2 0.16186785074581E2 0.16197114677398E2 0.16207444553281E2 - 0.16217774702776E2 0.1622810520248E2 0.16238436015797E2 0.16248767100955E2 0.16259098459451E2 - 0.16269430150607E2 0.16279762171838E2 0.16290094456066E2 0.16300427000844E2 0.16310759846988E2 - 0.16321093027696E2 0.16331426478205E2 0.16341760197426E2 0.16352094213116E2 0.16362428562282E2 - 0.16372763158799E2 0.16383098004028E2 0.16393433125045E2 0.16403768608246E2 0.16414104348322E2 - 0.16424440337791E2 0.16434776584135E2 0.16445113171302E2 0.1645544999793E2 0.16465787069188E2 - 0.16476124405893E2 0.16486462107367E2 0.16496800040818E2 0.1650713820475E2 0.16517476610318E2 - 0.16527815356302E2 0.1653815434297E2 0.16548493571004E2 0.165588330536E2 0.16569172867903E2 - 0.16579512906292E2 0.16589853169583E2 0.16600193671791E2 0.16610534509242E2 0.16620875577175E2 - 0.16631216873411E2 0.16641558413053E2 0.1665190026236E2 0.16662242334801E2 0.16672584628879E2 - 0.16682927165139E2 0.16693270000588E2 0.16703613065156E2 0.16713956352178E2 0.16724299878117E2 - 0.16734643700795E2 0.16744987733137E2 0.16755331973238E2 0.16765676460283E2 0.16776021234E2 - 0.16786366218469E2 0.16796711413146E2 0.16807056873407E2 0.16817402584556E2 0.16827748493533E2 - 0.16838094601561E2 0.16848440977212E2 0.16858787615727E2 0.16869134456829E2 0.16879481493038E2 - 0.16889828797007E2 0.16900176320572E2 0.16910524035433E2 0.1692087194227E2 0.16931220161222E2 - 0.16941568586029E2 0.16951917198866E2 0.16962265999598E2 0.16972615078294E2 0.16982964344205E2 - 0.16993313801548E2 0.17003663483521E2 0.17014013435976E2 0.17024363566801E2 0.170347138745E2 - 0.17045064404652E2 0.17055415231817E2 0.17065766651646E2 0.170761187987E2 0.17086472023734E2 - 0.17096826161982E2 0.17107181063646E2 0.17117536726956E2 0.17127893544575E2 0.17138251171052E2 - 0.17148609556864E2 0.17158968757385E2 0.17169329093301E2 0.17179690175354E2 0.17190052003544E2 - 0.17200414747805E2 0.17210778490997E2 0.17221142971754E2 0.17231508191029E2 0.17241874442159E2 - 0.17252241565279E2 0.17262609419569E2 0.17272978006254E2 0.17283347706974E2 0.17293718149069E2 - 0.17304089311313E2 0.17314461300375E2 0.17324834335988E2 0.17335208082227E2 0.17345582534601E2 - 0.17355957919238E2 0.17366334190831E2 0.17376711162437E2 0.1738708883392E2 0.1739746759726E2 - 0.17407847101703E2 0.17418227293234E2 0.17428608257841E2 0.17438990226004E2 0.17449372876765E2 - 0.17459756213525E2 0.1747014048976E2 0.17480525618527E2 0.17490911416695E2 0.17501297880725E2 - 0.17511685396068E2 0.17522073603533E2 0.17532462478358E2 0.17542852152109E2 0.17553242780573E2 - 0.17563634063606E2 0.17574025996856E2 0.17584418858559E2 0.17594812498236E2 0.17605206788673E2 - 0.17615601766335E2 0.17625997764287E2 0.17636394401708E2 0.17646791676556E2 0.17657189791556E2 - 0.17667588741945E2 0.17677988323231E2 0.17688388532012E2 0.17698789746118E2 0.17709191619829E2 - 0.17719594119946E2 0.1772999738307E2 0.17740401521585E2 0.17750806275622E2 0.17761211641372E2 - 0.17771617934622E2 0.17782024921218E2 0.17792432523064E2 0.17802840844108E2 0.17813250053739E2 - 0.17823659859301E2 0.17834070258615E2 0.17844481545429E2 0.17854893527766E2 0.17865306110795E2 - 0.17875719389484E2 0.17886133552271E2 0.17896548296837E2 0.17906963621687E2 0.17917379801655E2 - 0.17927796665989E2 0.17938214117546E2 0.17948632255238E2 0.1795905126043E2 0.17969470830938E2 - 0.17979890965539E2 0.17990311945053E2 0.18000733590295E2 0.18011155803574E2 0.18021578689383E2 - 0.18032002424461E2 0.18042426713154E2 0.18052851552197E2 0.18063277238059E2 0.18073703552369E2 - 0.1808413041839E2 0.18094557966736E2 0.18104986319452E2 0.18115415214219E2 0.18125844646819E2 - 0.18136274948006E2 0.18146705828253E2 0.1815713724538E2 0.18167569367283E2 0.18178002222805E2 - 0.18188435613031E2 0.18198869552111E2 0.18209304364133E2 0.18219739702423E2 0.1823017556426E2 - 0.18240612172506E2 0.18251049433962E2 0.18261487214884E2 0.18271925599082E2 0.18282364781526E2 - 0.18292804477722E2 0.18303244686037E2 0.18313685689196E2 0.18324127257672E2 0.18334569329287E2 - 0.18345012067173E2 0.18355455519087E2 0.18365899470737E2 0.18376343938724E2 0.18386789232169E2 - 0.18397235013106E2 0.18407681280855E2 0.18418128279638E2 0.18428575895304E2 0.18439024004176E2 - 0.1844947271442E2 0.1845992213393E2 0.18470372027053E2 0.18480822393111E2 0.18491273581226E2 - 0.18501725260097E2 0.18512177420203E2 0.18522630282907E2 0.18533083733785E2 0.18543537646985E2 - 0.18553992128903E2 0.1856444731029E2 0.18574902957129E2 0.18585359070642E2 0.185958160039E2 - 0.18606273391723E2 0.18616731228943E2 0.18627189745087E2 0.18637648825868E2 0.18648108358222E2 - 0.18658568469499E2 0.18669029244325E2 0.18679490467868E2 0.18689952165433E2 0.18700414613217E2 - 0.1871087749992E2 0.18721340824727E2 0.18731804852268E2 0.18742269385261E2 0.18752734353364E2 - 0.18763199934949E2 0.18773666108245E2 0.18784132710393E2 0.18794599821802E2 0.18805067611047E2 - 0.18815535819348E2 0.18826004445752E2 0.1883647381734E2 0.18846943626759E2 0.18857413860405E2 - 0.18867884747396E2 0.18878356136165E2 0.18888827933377E2 0.18899300292233E2 0.18909773230827E2 - 0.18920246581266E2 0.18930720423143E2 0.18941194933332E2 0.1895166984244E2 0.18962145147202E2 - 0.18972621164358E2 0.18983097579345E2 0.18993574391074E2 0.19004051856557E2 0.19014529803138E2 - 0.19025008146188E2 0.190354870468E2 0.19045966475586E2 0.1905644628846E2 0.19066926593453E2 - 0.19077407495872E2 0.19087888789455E2 0.19098370522229E2 0.19108852905084E2 0.191193356606E2 - 0.1912981878755E2 0.19140302582405E2 0.19150786773866E2 0.19161271343837E2 0.19171756524432E2 - 0.19182242155512E2 0.19192728157483E2 0.19203214703138E2 0.19213701738463E2 0.19224189137875E2 - 0.19234677033216E2 0.19245165467206E2 0.1925565426406E2 0.19266143504868E2 0.19276633331538E2 - 0.19287123510867E2 0.19297614070204E2 0.19308105267104E2 0.19318596811085E2 0.1932908870065E2 - 0.19339581222337E2 0.19350074118181E2 0.19360567367772E2 0.19371061202273E2 0.19381555439366E2 - 0.19392050016466E2 0.19402545124596E2 0.19413040671646E2 0.19423536558159E2 0.19434032947401E2 - 0.19444529818422E2 0.19455027025368E2 0.19465524685383E2 0.19476022843367E2 0.19486521326255E2 - 0.19497020219491E2 0.19507519644982E2 0.19518019409255E2 0.19528519574488E2 0.19539020290207E2 - 0.19549521321851E2 0.19560022693093E2 0.19570524645979E2 0.19581026913701E2 0.19591529495714E2 - 0.19602032701004E2 0.19612536225485E2 0.19623040064529E2 0.19633544463855E2 0.19644049197066E2 - 0.19654554235588E2 0.196650598114E2 0.19675565753341E2 0.19686072007397E2 0.19696578787177E2 - 0.19707085936351E2 0.19717593382539E2 0.19728101312272E2 0.19738609634258E2 0.1974911825489E2 - 0.19759627342877E2 0.19770136837946E2 0.19780646630708E2 0.19791156877629E2 0.1980166753272E2 - 0.19812178475436E2 0.19822689845235E2 0.19833201644158E2 0.198437137288E2 0.19854226217396E2 - 0.19864739150082E2 0.19875252368759E2 0.1988576598173E2 0.19896280037566E2 0.19906794372183E2 - 0.19917309085039E2 0.19927824235453E2 0.19938339662336E2 0.19948855463239E2 0.19959371720886E2 - 0.19969888254864E2 0.19980405151979E2 0.19990922490871E2 0.20001440096026E2 0.20011958049623E2 - 0.20022476444997E2 0.200329951099E2 0.2004351412964E2 0.20054033611021E2 0.20064553352952E2 - 0.20075073426453E2 0.2008559393384E2 0.20096114698375E2 0.20106635799242E2 0.20117157338894E2 - 0.20127679143857E2 0.20138201301615E2 0.20148723881967E2 0.20159246710622E2 0.20169769868672E2 - 0.2018029344292E2 0.20190817266016E2 0.20201341427077E2 0.20211866017807E2 0.20222390858201E2 - 0.20232916037378E2 0.20243441610575E2 0.20253967423369E2 0.20264493574808E2 0.20275020120405E2 - 0.20285546907231E2 0.20296074041818E2 0.20306601564304E2 0.20317129325978E2 0.20327657439224E2 - 0.20338185908122E2 0.20348714609406E2 0.20359243675051E2 0.20369773088185E2 0.20380302732616E2 - 0.20390832749162E2 0.20401363101906E2 0.20411893682817E2 0.20422424636797E2 0.20432955911327E2 - 0.20443487412392E2 0.20454019295096E2 0.20464551469507E2 0.20475083865827E2 0.20485616678345E2 - 0.20496149765698E2 0.20506683071831E2 0.20517216796476E2 0.20527750772689E2 0.20538284961424E2 - 0.20548819572072E2 0.20559354416601E2 0.2056988947406E2 0.20580424992071E2 0.20590960725053E2 - 0.20601496690556E2 0.20612033065183E2 0.20622569643487E2 0.20633106464244E2 0.20643643677118E2 - 0.20654181101832E2 0.20664718804103E2 0.20675256882843E2 0.20685795161315E2 0.20696333718024E2 - 0.20706872613106E2 0.20717411705063E2 0.20727951098386E2 0.2073849080709E2 0.20749030719742E2 - 0.20759570982878E2 0.20770111522754E2 0.20780652252156E2 0.20791193322518E2 0.20801734645266E2 - 0.20812276158901E2 0.20822818053224E2 0.20833360175442E2 0.20843902489773E2 0.20854445207785E2 - 0.20864988116821E2 0.20875531229671E2 0.20886074713956E2 0.20896618384911E2 0.2090716229315E2 - 0.2091770654466E2 0.20928250982297E2 0.20938795689598E2 0.20949340706021E2 0.20959885898638E2 - 0.20970431371804E2 0.2098097713137E2 0.20991523069715E2 0.21002069317318E2 0.21012615804789E2 - 0.21023162466822E2 0.21033709489133E2 0.21044256715938E2 0.2105480411227E2 0.21065351884527E2 - 0.21075899825359E2 0.21086447961569E2 0.21096996436561E2 0.21107545080944E2 0.21118093960297E2 - 0.211286431485E2 0.21139192504733E2 0.21149742127638E2 0.21160292007418E2 0.21170842045977E2 - 0.21181392372432E2 0.21191942922566E2 0.21202493634062E2 0.21213044695226E2 0.21223595944828E2 - 0.21234147361644E2 0.21244699109351E2 0.21255251011211E2 0.21265803110897E2 0.21276355509093E2 - 0.21286908063481E2 0.21297460858691E2 0.21308013929961E2 0.21318567155927E2 0.21329120644619E2 - 0.21339674347057E2 0.2135022819494E2 0.21360782339971E2 0.21371336670584E2 0.21381891160656E2 - 0.21392445988693E2 0.21403000961495E2 0.21413556124233E2 0.2142411156235E2 0.21434667141558E2 - 0.2144522294526E2 0.214557789906E2 0.21466335177983E2 0.21476891628908E2 0.21487448288681E2 - 0.21498005095123E2 0.21508562191367E2 0.21519119437681E2 0.21529676842366E2 0.21540234542159E2 - 0.21550792376647E2 0.2156135040978E2 0.21571908688359E2 0.21582467102314E2 0.21593025760901E2 - 0.21603584616089E2 0.2161414360162E2 0.21624702869742E2 0.21635262288206E2 0.2164582184749E2 - 0.21656381686507E2 0.21666941658315E2 0.21677501824549E2 0.21688062225617E2 0.21698622753082E2 - 0.21709183510347E2 0.21719744455644E2 0.21730305521895E2 0.21740866844613E2 0.21751428311279E2 - 0.2176198991618E2 0.21772551811698E2 0.2178311382926E2 0.21793676028865E2 0.21804238439087E2 - 0.21814800963597E2 0.21825363704437E2 0.21835926617662E2 0.21846489646943E2 0.21857052947386E2 - 0.21867616383206E2 0.2187817997277E2 0.21888743795399E2 0.21899307727147E2 0.21909871838897E2 - 0.21920436139904E2 0.2193100055098E2 0.21941565190904E2 0.21952129992736E2 0.21962694922053E2 - 0.21973260098722E2 0.21983825378658E2 0.21994390809889E2 0.22004956446567E2 0.22015522189369E2 - 0.22026088133535E2 0.22036654239338E2 0.220472204514E2 0.22057786919386E2 0.22068353497578E2 - 0.22078920207473E2 0.22089487122543E2 0.2210005413843E2 0.22110621335273E2 0.22121188701371E2 - 0.22131756168288E2 0.22142323857793E2 0.22152891669886E2 0.22163459603886E2 0.22174027748367E2 - 0.22184595989719E2 0.22195164396245E2 0.22205732968897E2 0.22216301634476E2 0.22226870497065E2 - 0.22237439491221E2 0.22248008602386E2 0.22258577923895E2 0.22269147337107E2 0.22279716902431E2 - 0.22290286630886E2 0.22300856447779E2 0.22311426452975E2 0.22321996575452E2 0.22332566798339E2 - 0.22343137241774E2 0.22353707776368E2 0.22364278461985E2 0.22374849298081E2 0.22385420215539E2 - 0.22395991310687E2 0.2240656251713E2 0.22417133823031E2 0.22427705331112E2 0.22438276928039E2 - 0.22448848679118E2 0.22459420570744E2 0.22469992540195E2 0.22480564688288E2 0.22491136933662E2 - 0.22501709276045E2 0.22512281795846E2 0.22522854402452E2 0.22533427173959E2 0.22544000075264E2 - 0.22554573051264E2 0.22565146200329E2 0.22575719434157E2 0.22586292769892E2 0.22596866268352E2 - 0.22607439843547E2 0.22618013573711E2 0.2262858742796E2 0.2263916135976E2 0.22649735469932E2 - 0.22660309649629E2 0.22670883938851E2 0.22681458365763E2 0.22692032862745E2 0.22702607518777E2 - 0.22713182276716E2 0.22723757121187E2 0.22734332133417E2 0.22744907210001E2 0.22755482401553E2 - 0.22766057714741E2 0.22776633095277E2 0.22787208640441E2 0.22797784258124E2 0.22808359966013E2 - 0.22818935824653E2 0.22829511749689E2 0.22840087809694E2 0.22850663963169E2 0.22861240184537E2 - 0.22871816564411E2 0.22882393004015E2 0.22892969549471E2 0.22903546204316E2 0.22914122919027E2 - 0.2292469979823E2 0.22935276747367E2 0.22945853780723E2 0.22956430945239E2 0.22967008166081E2 - 0.22977585502572E2 0.22988162921378E2 0.22998740402362E2 0.230093180326E2 0.23019895723928E2 - 0.23030473530768E2 0.23041051434073E2 0.23051629389623E2 0.23062207474564E2 0.23072785618417E2 - 0.23083363845673E2 0.23093942178102E2 0.23104520563456E2 0.23115099085276E2 0.2312567768356E2 - 0.23136256354634E2 0.2314683513537E2 0.23157413961958E2 0.23167992884465E2 0.23178571885614E2 - 0.231891509386E2 0.23199730127235E2 0.23210309369613E2 0.23220888712809E2 0.23231468138591E2 - 0.23242047607776E2 0.23252627191656E2 0.23263206824382E2 0.23273786528401E2 0.23284366330245E2 - 0.23294946177804E2 0.23305526136794E2 0.23316106160683E2 0.23326686254913E2 0.23337266439213E2 - 0.23347846661882E2 0.23358426972579E2 0.23369007351168E2 0.23379587786494E2 0.23390168322773E2 - 0.23400748899734E2 0.23411329563499E2 0.23421910297742E2 0.23432491078108E2 0.23443071958476E2 - 0.23453652876939E2 0.23464233871323E2 0.23474814932374E2 0.23485396028665E2 0.23495977227406E2 - 0.235065584671E2 0.23517139781354E2 0.23527721164044E2 0.23538302581157E2 0.23548884097863E2 - 0.23559465651985E2 0.23570047262979E2 0.23580628939689E2 0.23591210647011E2 0.23601792437328E2 - 0.23612374277306E2 0.23622956183271E2 0.23633538162436E2 0.23644120172621E2 0.23654702254644E2 - 0.23665284373539E2 0.23675866536244E2 0.23686448767522E2 0.2369703102846E2 0.23707613366134E2 - 0.23718195753741E2 0.23728778196044E2 0.2373936070134E2 0.23749943231807E2 0.23760525822548E2 - 0.23771108448391E2 0.23781691110425E2 0.23792273834773E2 0.23802856589326E2 0.23813439431228E2 - 0.23824022309049E2 0.23834605222788E2 0.23845188188638E2 0.23855771176256E2 0.23866354220747E2 - 0.23876937297619E2 0.23887520406737E2 0.23898103580345E2 0.23908686780349E2 0.23919270049469E2 - 0.23929853348658E2 0.23940436679141E2 0.23951020053569E2 0.2396160344623E2 0.23972186886782E2 - 0.23982770362029E2 0.23993353875372E2 0.24003937434838E2 0.24014521014713E2 0.24025104655677E2 - 0.24035688318003E2 0.24046272005499E2 0.2405685573422E2 0.24067439480764E2 0.24078023281051E2 - 0.2408860709957E2 0.24099190942579E2 0.24109774829671E2 0.24120358735674E2 0.24130942694331E2 - 0.24141526667683E2 0.24152110663484E2 0.24162694697653E2 0.24173278748421E2 0.24183862838374E2 - 0.24194446940571E2 0.24205031064946E2 0.2421561522456E2 0.2422619940118E2 0.2423678362338E2 - 0.24247367859186E2 0.24257952124652E2 0.24268536406581E2 0.24279120698442E2 0.24289705017786E2 - 0.24300289347471E2 0.24310873702327E2 0.24321458085618E2 0.24332042490951E2 0.24342626928529E2 - 0.24353211374678E2 0.24363795838243E2 0.24374380312557E2 0.24384964797483E2 0.2439554929928E2 - 0.24406133808152E2 0.24416718339745E2 0.24427302896101E2 0.24437887485383E2 0.24448472081195E2 - 0.24459056679729E2 0.24469641287378E2 0.2448022590115E2 0.24490810526079E2 0.245013951623E2 - 0.24511979804508E2 0.24522564478145E2 0.24533149158448E2 0.24543733853446E2 0.24554318553887E2 - 0.24564903258272E2 0.24575487964291E2 0.24586072671806E2 0.2459665738558E2 0.24607242109286E2 - 0.24617826843604E2 0.24628411591392E2 0.24638996343398E2 0.24649581100574E2 0.24660165856118E2 - 0.24670750609076E2 0.24681335365163E2 0.24691920122611E2 0.24702504887679E2 0.24713089651249E2 - 0.24723674410602E2 0.24734259178663E2 0.24744843953662E2 0.24755428724716E2 0.24766013493593E2 - 0.24776598254306E2 0.24787183014612E2 0.24797767774373E2 0.24808352528283E2 0.24818937279881E2 - 0.24829522020458E2 0.2484010676294E2 0.2485069150964E2 0.24861276247359E2 0.24871860979637E2 - 0.24882445708241E2 0.24893030431811E2 0.24903615135516E2 0.24914199829426E2 0.24924784512587E2 - 0.2493536919085E2 0.24945953867344E2 0.24956538537988E2 0.24967123203053E2 0.2497770785737E2 - 0.24988292494136E2 0.24998877120289E2 0.25009461723584E2 0.25020046318988E2 0.25030630890855E2 - 0.25041215457279E2 0.25051800017716E2 0.25062384572984E2 0.25072969125258E2 0.25083553640933E2 - 0.25094138144907E2 0.25104722623574E2 0.25115307081561E2 0.25125891525125E2 0.25136475944744E2 - 0.25147060359872E2 0.25157644768743E2 0.25168229160469E2 0.25178813529339E2 0.25189397869228E2 - 0.25199982197144E2 0.25210566484515E2 0.25221150760185E2 0.25231735004427E2 0.25242319232613E2 - 0.25252903446513E2 0.25263487645992E2 0.25274071837851E2 0.25284655979505E2 0.252952401032E2 - 0.25305824193016E2 0.25316408255757E2 0.25326992301899E2 0.25337576314707E2 0.2534816031527E2 - 0.25358744273383E2 0.25369328216803E2 0.25379912140357E2 0.2539049602636E2 0.25401079895628E2 - 0.25411663708841E2 0.25422247509536E2 0.25432831275537E2 0.25443415012421E2 0.25453998719509E2 - 0.25464582389726E2 0.25475166047153E2 0.25485749656961E2 0.25496333247992E2 0.25506916791812E2 - 0.25517500309372E2 0.25528083805028E2 0.25538667245988E2 0.25549250669262E2 0.25559834027908E2 - 0.25570417369819E2 0.25581000677988E2 0.25591583953095E2 0.25602167205208E2 0.25612750403851E2 - 0.25623333584671E2 0.25633916696237E2 0.25644499783314E2 0.25655082827396E2 0.25665665824811E2 - 0.25676248793518E2 0.25686831712972E2 0.25697414617868E2 0.25707997472559E2 0.25718580296229E2 - 0.25729163069421E2 0.25739745788872E2 0.25750328477437E2 0.25760911095117E2 0.2577149369334E2 - 0.25782076225848E2 0.25792658740261E2 0.25803241226645E2 0.2581382365575E2 0.25824406052473E2 - 0.25834988368786E2 0.25845570663467E2 0.25856152885084E2 0.25866735073504E2 0.25877317208182E2 - 0.25887899297144E2 0.25898481359303E2 0.259090633601E2 0.25919645340761E2 0.25930227237882E2 - 0.25940809098813E2 0.2595139089743E2 0.25961972637678E2 0.25972554340646E2 0.2598313597994E2 - 0.25993717599642E2 0.26004299136756E2 0.26014880644619E2 0.26025462093841E2 0.26036043486056E2 - 0.26046624836228E2 0.26057206103814E2 0.26067787347726E2 0.26078368504562E2 0.26088949631465E2 - 0.26099530685714E2 0.26110111693975E2 0.26120692661147E2 0.26131273552264E2 0.26141854419706E2 - 0.26152435177623E2 0.26163015912682E2 0.26173596574134E2 0.26184177184973E2 0.26194757738668E2 - 0.26205338213722E2 0.26215918655714E2 0.26226499008317E2 0.26237079337245E2 0.26247659579098E2 - 0.26258239778908E2 0.26268819915317E2 0.26279399977166E2 0.26289979995068E2 0.26300559911472E2 - 0.26311139801345E2 0.26321719588631E2 0.26332299345032E2 0.26342879033812E2 0.26353458661776E2 - 0.26364038238309E2 0.26374617723821E2 0.26385197181712E2 0.26395776514976E2 0.26406355819668E2 - 0.26416935025174E2 0.26427514176802E2 0.26438093260401E2 0.26448672286312E2 0.2645925128297E2 - 0.26469830161668E2 0.26480409010976E2 0.26490987740419E2 0.2650156642782E2 0.26512145028961E2 - 0.26522723558808E2 0.26533302029879E2 0.26543880411288E2 0.26554458765485E2 0.26565037008252E2 - 0.26575615221631E2 0.26586193326163E2 0.26596771371784E2 0.26607349335906E2 0.26617927212135E2 - 0.26628505037887E2 0.2663908275493E2 0.26649660444897E2 0.2666023802044E2 0.26670815559656E2 - 0.26681393004857E2 0.26691970376995E2 0.2670254767335E2 0.26713124870926E2 0.26723702026734E2 - 0.26734279058321E2 0.2674485606052E2 0.26755432937411E2 0.26766009769675E2 0.26776586512685E2 - 0.26787163182769E2 0.26797739791356E2 0.26808316283205E2 0.26818892736006E2 0.26829469048804E2 - 0.26840045332214E2 0.26850621497935E2 0.26861197606648E2 0.26871773617807E2 0.26882349549102E2 - 0.26892925421076E2 0.26903501177129E2 0.26914076899984E2 0.26924652477258E2 0.26935228023103E2 - 0.26945803442143E2 0.26956378799685E2 0.26966954058722E2 0.26977529225104E2 0.26988104325498E2 - 0.26998679317319E2 0.27009254279343E2 0.27019829088167E2 0.2703040386529E2 0.27040978510982E2 - 0.27051553094496E2 0.27062127576375E2 0.27072701962742E2 0.27083276278904E2 0.27093850465268E2 - 0.27104424613399E2 0.27114998627106E2 0.27125572610745E2 0.27136146462272E2 0.27146720250669E2 - 0.27157293923689E2 0.27167867500789E2 0.2717844099816E2 0.27189014372535E2 0.27199587703508E2 - 0.27210160879104E2 0.27220734025856E2 0.27231307044442E2 0.27241880007245E2 0.27252452844875E2 - 0.27263025594613E2 0.27273598255914E2 0.27284170792042E2 0.27294743271026E2 0.27305315595721E2 - 0.27315887888307E2 0.27326460032047E2 0.27337032135242E2 0.2734760411272E2 0.27358176009108E2 - 0.27368747798011E2 0.27379319471538E2 0.2738989107241E2 0.27400462529606E2 0.27411033950067E2 - 0.27421605201816E2 0.27432176420913E2 0.2744274748817E2 0.27453318495561E2 0.2746388938676E2 - 0.27474460179182E2 0.27485030880717E2 0.27495601446468E2 0.27506171953986E2 0.27516742302045E2 - 0.27527312617451E2 0.27537882754622E2 0.27548452849343E2 0.27559022804742E2 0.27569592683133E2 - 0.27580162449685E2 0.27590732100316E2 0.2760130166768E2 0.27611871097899E2 0.27622440483491E2 - 0.27633009681053E2 0.27643578843784E2 0.27654147825014E2 0.27664716756992E2 0.2767528555754E2 - 0.27685854271554E2 0.27696422877268E2 0.27706991361346E2 0.27717559769505E2 0.27728128016844E2 - 0.27738696220373E2 0.27749264230429E2 0.27759832205655E2 0.27770399998019E2 0.27780967736505E2 - 0.27791535340839E2 0.27802102856464E2 0.2781267026542E2 0.27823237545666E2 0.27833804746455E2 - 0.27844371779078E2 0.27854938765441E2 0.2786550554799E2 0.27876072296526E2 0.27886638859207E2 - 0.27897205372636E2 0.27907771753546E2 0.27918338041392E2 0.27928904207876E2 0.27939470242112E2 - 0.27950036186006E2 0.27960601969897E2 0.27971167700999E2 0.27981733230191E2 0.27992298725097E2 - 0.2800286402136E2 0.28013429275174E2 0.28023994384494E2 0.28034559407283E2 0.28045124290477E2 - 0.28055689049859E2 0.28066253703116E2 0.28076818200997E2 0.2808738262976E2 0.28097946876752E2 - 0.45437337287424E-5 0.45396580133992E-5 0.45355848227549E-5 0.45315141583465E-5 0.45274460393158E-5 - 0.45233804551641E-5 0.45193175072177E-5 0.4515257203871E-5 0.45111997367094E-5 0.45071453352842E-5 - 0.45030942353219E-5 0.4499046893577E-5 0.44950038522627E-5 0.44909658300168E-5 0.44869337400011E-5 - 0.44829087205391E-5 0.44788921675136E-5 0.44748857684322E-5 0.44708915380713E-5 0.44669118556134E-5 - 0.4462949503191E-5 0.44590077057561E-5 0.44550901721937E-5 0.44512011376023E-5 0.44473454066628E-5 - 0.44435283980237E-5 0.44397561896285E-5 0.44360355649147E-5 0.44323740598168E-5 0.44287800105048E-5 - 0.44252626017946E-5 0.4421831916165E-5 0.44184989833217E-5 0.44152758302471E-5 0.44121755316773E-5 - 0.4409212260951E-5 0.44064013411737E-5 0.44037592966449E-5 0.44013039044952E-5 0.43990542464847E-5 - 0.4397030760911E-5 0.43952552945822E-5 0.4393751154807E-5 0.43925431613575E-5 0.43916576983625E-5 - 0.43911227660874E-5 0.43909680325625E-5 0.43912248850182E-5 0.43919264810905E-5 0.43931077997593E-5 - 0.43948056919844E-5 0.43970589310041E-5 0.43999082622645E-5 0.44033964529458E-5 0.44075683410558E-5 - 0.44124708840612E-5 0.44181532070258E-5 0.44246666502312E-5 0.44320648162501E-5 0.44404036164489E-5 - 0.4449741316894E-5 0.4460138583638E-5 0.4471658527364E-5 0.44843667473649E-5 0.44983313748386E-5 - 0.45136231154771E-5 0.45303152913325E-5 0.45484838819399E-5 0.45682075646814E-5 0.45895677543734E-5 - 0.46126486420617E-5 0.46375372330106E-5 0.46643233838698E-5 0.46930998390078E-5 0.47239622659968E-5 - 0.47570092902396E-5 0.47923425287253E-5 0.48300666229034E-5 0.48702892706682E-5 0.4913121257441E-5 - 0.49586764863448E-5 0.50070720074612E-5 0.50584280461629E-5 0.51128680305151E-5 0.51705186177391E-5 - 0.52315097197334E-5 0.52959745276452E-5 0.53640495354905E-5 0.54358745628162E-5 0.55115927764021E-5 - 0.55913507109992E-5 0.56752982891025E-5 0.57635888397544E-5 0.58563791163798E-5 0.59538293136492E-5 - 0.60561030833705E-5 0.61633675494085E-5 0.62757933216332E-5 0.63935545088961E-5 0.65168287310365E-5 - 0.66457971299179E-5 0.67806443794982E-5 0.69215586949333E-5 0.70687318407179E-5 0.72223591378668E-5 - 0.73826394701375E-5 0.75497752892999E-5 0.77239726194557E-5 0.79054410604113E-5 0.80943937901087E-5 - 0.82910475661191E-5 0.84956227262046E-5 0.87083431879515E-5 0.89294364474822E-5 0.91591335772508E-5 - 0.93976692229275E-5 0.96452815993795E-5 0.99022124857533E-5 0.10168707219665E-4 0.10445014690508E-4 - 0.10731387331879E-4 0.11028081113137E-4 0.11335355530097E-4 0.11653473594871E-4 0.11982701824855E-4 - 0.12323310230881E-4 0.12675572304537E-4 0.13039765004658E-4 0.13416168743009E-4 0.13805067369153E-4 - 0.14206748154527E-4 0.14621501775726E-4 0.15049622297002E-4 0.15491407152E-4 0.15947157124726E-4 - 0.16417176329757E-4 0.1690177219172E-4 0.17401255424021E-4 0.1791594000686E-4 0.18446143164529E-4 - 0.18992185341994E-4 0.195543901808E-4 0.20133084494268E-4 0.20728598242032E-4 0.21341264503905E-4 - 0.21971419453083E-4 0.22619402328712E-4 0.23285555407813E-4 0.23970223976581E-4 0.24673756301074E-4 - 0.25396503597288E-4 0.26138820000645E-4 0.26901062534893E-4 0.27683591080427E-4 0.28486768342058E-4 - 0.29310959816215E-4 0.30156533757613E-4 0.31023861145386E-4 0.31913315648692E-4 0.32825273591818E-4 - 0.33760113918772E-4 0.34718218157394E-4 0.35699970382974E-4 0.36705757181417E-4 0.37735967611931E-4 - 0.3879099316928E-4 0.3987122774559E-4 0.40977067591728E-4 0.42108911278268E-4 0.43267159656041E-4 - 0.44452215816299E-4 0.45664485050478E-4 0.46904374809598E-4 0.4817229466329E-4 0.49468656258467E-4 - 0.50793873277647E-4 0.52148361396947E-4 0.53532538243749E-4 0.54946823354041E-4 0.56391638129468E-4 - 0.57867405794077E-4 0.59374551350775E-4 0.60913501537522E-4 0.62484684783247E-4 0.64088531163512E-4 - 0.65725472355929E-4 0.67395941595343E-4 0.69100373628786E-4 0.7083920467021E-4 0.72612872355016E-4 - 0.74421815694383E-4 0.76266475029412E-4 0.78147291985079E-4 0.8006470942403E-4 0.82019171400206E-4 - 0.8401112311232E-4 0.86041010857188E-4 0.88109281982928E-4 0.90216384842028E-4 0.92362768744308E-4 - 0.94548883909765E-4 0.96775181421319E-4 0.99042113177476E-4 0.1013501318449E-3 0.10369969081092E-3 - 0.10609124413597E-3 0.10852524650594E-3 0.11100215318458E-3 0.1135224199657E-3 0.1160865031255E-3 - 0.11869485937478E-3 0.12134794581116E-3 0.12404621987129E-3 0.12679013928306E-3 0.12958016201782E-3 - 0.13241674624262E-3 0.13530035027246E-3 0.13823143252255E-3 0.14121045146066E-3 0.1442378655595E-3 - 0.14731413324907E-3 0.15043971286923E-3 0.15361506262214E-3 0.15684064052496E-3 0.1601169043625E-3 - 0.16344431163999E-3 0.16682331953598E-3 0.17025438485531E-3 0.17373796398217E-3 0.17727451283329E-3 - 0.18086448681129E-3 0.18450834075806E-3 0.18820652890835E-3 0.19195950484349E-3 0.19576772144522E-3 - 0.19963163084967E-3 0.20355168440157E-3 0.20752833260848E-3 0.21156202509534E-3 0.21565321055911E-3 - 0.21980233672357E-3 0.22400985029434E-3 0.22827619691412E-3 0.23260182111806E-3 0.23698716628934E-3 - 0.24143267461499E-3 0.24593878704191E-3 0.25050594323307E-3 0.25513458152397E-3 0.25982513887928E-3 - 0.26457805084977E-3 0.26939375152942E-3 0.27427267351278E-3 0.27921524785259E-3 0.28422190401763E-3 - 0.28929306985082E-3 0.29442917152758E-3 0.29963063351446E-3 0.30489787852801E-3 0.31023132749393E-3 - 0.31563139950652E-3 0.3210985117883E-3 0.32663307965008E-3 0.33223551645112E-3 0.33790623355976E-3 - 0.34364564031419E-3 0.34945414398357E-3 0.35533214972951E-3 0.36128006056771E-3 0.36729827733003E-3 - 0.37338719862681E-3 0.37954722080949E-3 0.38577873793356E-3 0.39208214172181E-3 0.39845782152795E-3 - 0.40490616430045E-3 0.4114275545468E-3 0.41802237429803E-3 0.42469100307361E-3 0.43143381784662E-3 - 0.43825119300931E-3 0.44514350033896E-3 0.45211110896411E-3 0.45915438533106E-3 0.46627369317081E-3 - 0.47346939346625E-3 0.48074184441976E-3 0.48809140142114E-3 0.49551841701588E-3 0.50302324087376E-3 - 0.51060621975787E-3 0.51826769749392E-3 0.52600801493991E-3 0.53382750995622E-3 0.54172651737597E-3 - 0.54970536897584E-3 0.55776439344714E-3 0.56590391636732E-3 0.57412426017186E-3 0.58242574412641E-3 - 0.59080868429949E-3 0.59927339353533E-3 0.60782018142726E-3 0.6164493542914E-3 0.62516121514069E-3 - 0.63395606365934E-3 0.64283419617766E-3 0.6517959056472E-3 0.66084148161631E-3 0.6699712102061E-3 - 0.67918537408669E-3 0.68848425245391E-3 0.69786812100636E-3 0.70733725192282E-3 0.71689191384004E-3 - 0.72653237183096E-3 0.73625888738321E-3 0.7460717183781E-3 0.75597111906988E-3 0.76595734006544E-3 - 0.77603062830439E-3 0.78619122703948E-3 0.79643937581741E-3 0.80677531046006E-3 0.817199263046E-3 - 0.8277114618925E-3 0.83831213153781E-3 0.84900149272387E-3 0.85977976237938E-3 0.87064715360328E-3 - 0.88160387564851E-3 0.89265013390628E-3 0.90378612989059E-3 0.91501206122319E-3 0.92632812161892E-3 - 0.93773450087137E-3 0.94923138483897E-3 0.9608189554314E-3 0.9724973905964E-3 0.98426686430694E-3 - 0.99612754654877E-3 0.10080796033083E-2 0.1020123196561E-2 0.10322584842598E-2 0.10444856203243E-2 - 0.10568047546302E-2 0.10692160329986E-2 0.10817195971866E-2 0.10943155848777E-2 0.11070041296723E-2 - 0.11197853610789E-2 0.11326594045062E-2 0.11456263812542E-2 0.1158686408507E-2 0.1171839599325E-2 - 0.11850860626384E-2 0.11984259032399E-2 0.12118592217787E-2 0.12253861147543E-2 0.1239006674511E-2 - 0.12527209892327E-2 0.12665291429375E-2 0.12804312154737E-2 0.12944272825147E-2 0.13085174155559E-2 - 0.13227016819107E-2 0.1336980144707E-2 0.13513528628848E-2 0.13658198911932E-2 0.13803812801881E-2 - 0.13950370762308E-2 0.14097873214856E-2 0.14246320539191E-2 0.14395713072991E-2 0.14546051111938E-2 - 0.14697334909717E-2 0.14849564678016E-2 0.15002740586525E-2 0.15156862762949E-2 0.15311931293012E-2 - 0.15467946220474E-2 0.15624907547142E-2 0.15782815232893E-2 0.15941669195695E-2 0.16101469311629E-2 - 0.16262215414919E-2 0.16423907297966E-2 0.16586544711377E-2 0.16750127364002E-2 0.16914654922979E-2 - 0.17080127013772E-2 0.17246543220218E-2 0.17413903084576E-2 0.17582206107578E-2 0.17751451748483E-2 - 0.17921639425135E-2 0.18092768514022E-2 0.18264838350339E-2 0.18437848228053E-2 0.18611797399972E-2 - 0.18786685077816E-2 0.18962510432288E-2 0.19139272593155E-2 0.19316970649321E-2 0.19495603648914E-2 - 0.19675170599367E-2 0.19855670467505E-2 0.20037102179635E-2 0.20219464621637E-2 0.2040275663906E-2 - 0.20586977037216E-2 0.20772124581282E-2 0.20958197996402E-2 0.21145195967788E-2 0.21333117140828E-2 - 0.21521960121197E-2 0.21711723474967E-2 0.21902405728719E-2 0.22094005369661E-2 0.22286520845748E-2 - 0.22479950565796E-2 0.22674292899613E-2 0.22869546178119E-2 0.23065708693474E-2 0.23262778699207E-2 - 0.23460754410352E-2 0.23659634003573E-2 0.23859415617311E-2 0.2406009735191E-2 0.24261677269768E-2 - 0.2446415339547E-2 0.24667523715939E-2 0.24871786180577E-2 0.25076938701417E-2 0.25282979153269E-2 - 0.25489905373876E-2 0.25697715164064E-2 0.25906406287902E-2 0.26115976472855E-2 0.26326423409946E-2 - 0.26537744753917E-2 0.26749938123391E-2 0.2696300110104E-2 0.27176931233745E-2 0.27391726032773E-2 - 0.27607382973937E-2 0.27823899497778E-2 0.28041273009731E-2 0.282595008803E-2 0.28478580445241E-2 - 0.28698509005733E-2 0.2891928382856E-2 0.29140902146294E-2 0.29363361157476E-2 0.29586658026802E-2 - 0.29810789885306E-2 0.30035753830547E-2 0.30261546926804E-2 0.30488166205256E-2 0.30715608664183E-2 - 0.30943871269151E-2 0.31172950953212E-2 0.31402844617095E-2 0.31633549129407E-2 0.31865061326827E-2 - 0.32097378014308E-2 0.32330495965276E-2 0.32564411921831E-2 0.32799122594955E-2 0.33034624664708E-2 - 0.3327091478044E-2 0.33507989560994E-2 0.33745845594914E-2 0.33984479440655E-2 0.34223887626789E-2 - 0.3446406665222E-2 0.34705012986394E-2 0.34946723069511E-2 0.35189193312737E-2 0.35432420098423E-2 - 0.35676399780318E-2 0.35921128683784E-2 0.36166603106016E-2 0.36412819316259E-2 0.36659773556028E-2 - 0.36907462039324E-2 0.37155880952862E-2 0.37405026456287E-2 0.37654894682394E-2 0.37905481737361E-2 - 0.38156783700961E-2 0.38408796626795E-2 0.38661516542513E-2 0.38914939450041E-2 0.39169061325808E-2 - 0.39423878120971E-2 0.39679385761647E-2 0.39935580149138E-2 0.40192457160158E-2 0.40450012647068E-2 - 0.40708242438104E-2 0.40967142337604E-2 0.41226708126244E-2 0.41486935561267E-2 0.41747820376714E-2 - 0.42009358283659E-2 0.42271544970443E-2 0.42534376102902E-2 0.42797847324604E-2 0.43061954257085E-2 - 0.43326692500079E-2 0.43592057631758E-2 0.43858045208962E-2 0.44124650767437E-2 0.4439186982207E-2 - 0.44659697867126E-2 0.44928130376484E-2 0.45197162803872E-2 0.45466790583104E-2 0.45737009128319E-2 - 0.46007813834216E-2 0.46279200076292E-2 0.46551163211077E-2 0.46823698576378E-2 0.47096801491508E-2 - 0.4737046725753E-2 0.47644691157493E-2 0.47919468456669E-2 0.48194794402793E-2 0.48470664226297E-2 - 0.48747073140554E-2 0.49024016342111E-2 0.49301489010928E-2 0.49579486310619E-2 0.49858003388685E-2 - 0.50137035376757E-2 0.50416577390831E-2 0.50696624531504E-2 0.50977171884217E-2 0.51258214519488E-2 - 0.5153974749315E-2 0.51821765846591E-2 0.52104264606988E-2 0.52387238787545E-2 0.52670683387732E-2 - 0.52954593393518E-2 0.53238963777608E-2 0.53523789499683E-2 0.53809065506632E-2 0.54094786732787E-2 - 0.54380948100163E-2 0.54667544518688E-2 0.54954570886442E-2 0.55242022089889E-2 0.55529893004111E-2 - 0.55818178493045E-2 0.56106873409712E-2 0.56395972596456E-2 0.56685470885169E-2 0.56975363097532E-2 - 0.5726564404524E-2 0.57556308530241E-2 0.57847351344957E-2 0.58138767272526E-2 0.58430551087025E-2 - 0.58722697553703E-2 0.5901520142921E-2 0.59308057461823E-2 0.59601260391682E-2 0.5989480495101E-2 - 0.60188685864345E-2 0.60482897848765E-2 0.60777435614119E-2 0.61072293863246E-2 0.61367467292204E-2 - 0.61662950590498E-2 0.61958738441298E-2 0.62254825521668E-2 0.62551206502785E-2 0.62847876050167E-2 - 0.63144828823888E-2 0.63442059478806E-2 0.63739562664778E-2 0.64037333026886E-2 0.64335365205651E-2 - 0.64633653837254E-2 0.64932193553755E-2 0.6523097898331E-2 0.65530004750387E-2 0.65829265475982E-2 - 0.66128755777836E-2 0.66428470270648E-2 0.6672840356629E-2 0.67028550274019E-2 0.67328905000691E-2 - 0.67629462350973E-2 0.67930216927552E-2 0.68231163331348E-2 0.68532296161721E-2 0.68833610016681E-2 - 0.69135099493095E-2 0.69436759186898E-2 0.69738583693293E-2 0.70040567606963E-2 0.7034270552227E-2 - 0.70644992033464E-2 0.70947421734884E-2 0.71249989221159E-2 0.71552689087412E-2 0.71855515929461E-2 - 0.72158464344014E-2 0.72461528928874E-2 0.72764704283135E-2 0.73067985007376E-2 0.73371365703863E-2 - 0.7367484097674E-2 0.73978405432227E-2 0.7428205367881E-2 0.74585780327437E-2 0.7488957999171E-2 - 0.75193447288072E-2 0.75497376836003E-2 0.75801363258205E-2 0.76105401180791E-2 0.76409485233474E-2 - 0.76713610049751E-2 0.77017770267091E-2 0.77321960527118E-2 0.77626175475796E-2 0.77930409763607E-2 - 0.78234658045741E-2 0.7853891498227E-2 0.7884317523833E-2 0.79147433484299E-2 0.79451684395977E-2 - 0.79755922654761E-2 0.8006014294782E-2 0.80364339968272E-2 0.80668508415359E-2 0.80972642994616E-2 - 0.81276738418045E-2 0.81580789404287E-2 0.8188479067879E-2 0.82188736973978E-2 0.82492623029423E-2 - 0.82796443592005E-2 0.83100193416083E-2 0.83403867263658E-2 0.83707459904539E-2 0.84010966116501E-2 - 0.84314380685453E-2 0.84617698405594E-2 0.84920914079573E-2 0.85224022518652E-2 0.8552701854286E-2 - 0.85829896981149E-2 0.86132652671552E-2 0.86435280461336E-2 0.86737775207158E-2 0.87040131775212E-2 - 0.87342345041388E-2 0.87644409891416E-2 0.87946321221017E-2 0.88248073936054E-2 0.88549662952675E-2 - 0.88851083197462E-2 0.89152329607575E-2 0.89453397130898E-2 0.89754280726178E-2 0.9005497536317E-2 - 0.90355476022776E-2 0.90655777697189E-2 0.90955875390024E-2 0.91255764116462E-2 0.91555438903387E-2 - 0.91854894789514E-2 0.92154126825534E-2 0.92453130074237E-2 0.92751899610652E-2 0.93050430522176E-2 - 0.933487179087E-2 0.93646756882745E-2 0.93944542569584E-2 0.94242070107374E-2 0.94539334647278E-2 - 0.94836331353589E-2 0.95133055403859E-2 0.95429501989017E-2 0.95725666313491E-2 0.96021543595331E-2 - 0.96317129066325E-2 0.9661241797212E-2 0.96907405572339E-2 0.97202087140694E-2 0.97496457965106E-2 - 0.97790513347816E-2 0.98084248605497E-2 0.9837765906937E-2 0.9867074008531E-2 0.98963487013959E-2 - 0.99255895230834E-2 0.99547960126432E-2 0.99839677106341E-2 0.10013104159134E-1 0.10042204901752E-1 - 0.10071269483635E-1 0.10100297451481E-1 0.10129288353552E-1 0.10158241739675E-1 0.10187157161261E-1 - 0.10216034171311E-1 0.10244872324425E-1 0.10273671176813E-1 0.10302430286304E-1 0.10331149212355E-1 - 0.10359827516062E-1 0.10388464760167E-1 0.10417060509066E-1 0.10445614328824E-1 0.10474125787177E-1 - 0.10502594453544E-1 0.10531019899034E-1 0.10559401696458E-1 0.10587739420334E-1 0.10616032646897E-1 - 0.10644280954104E-1 0.10672483921649E-1 0.10700641130964E-1 0.1072875216523E-1 0.10756816609386E-1 - 0.10784834050134E-1 0.10812804075949E-1 0.10840726277084E-1 0.10868600245579E-1 0.10896425575271E-1 - 0.10924201861793E-1 0.10951928702592E-1 0.10979605696927E-1 0.11007232445881E-1 0.11034808552365E-1 - 0.11062333621127E-1 0.11089807258757E-1 0.11117229073693E-1 0.1114459867623E-1 0.11171915678525E-1 - 0.111991796946E-1 0.11226390340355E-1 0.11253547233567E-1 0.112806499939E-1 0.1130769824291E-1 - 0.11334691604051E-1 0.11361629702678E-1 0.11388512166057E-1 0.11415338623368E-1 0.11442108705708E-1 - 0.11468822046101E-1 0.11495478279499E-1 0.11522077042791E-1 0.11548617974804E-1 0.11575100716311E-1 - 0.11601524910032E-1 0.11627890200645E-1 0.11654196234782E-1 0.11680442661042E-1 0.11706629129989E-1 - 0.11732755294161E-1 0.1175882080807E-1 0.11784825328209E-1 0.11810768513055E-1 0.11836650023073E-1 - 0.11862469520721E-1 0.11888226670452E-1 0.11913921138716E-1 0.11939552593971E-1 0.11965120706675E-1 - 0.11990625149302E-1 0.12016065596334E-1 0.12041441724273E-1 0.12066753211639E-1 0.12091999738972E-1 - 0.12117180988842E-1 0.12142296645844E-1 0.12167346396605E-1 0.12192329929785E-1 0.12217246936082E-1 - 0.1224209710823E-1 0.12266880141008E-1 0.12291595731235E-1 0.12316243577778E-1 0.12340823381552E-1 - 0.12365334845521E-1 0.12389777674701E-1 0.12414151576164E-1 0.12438456259036E-1 0.12462691434502E-1 - 0.12486856815806E-1 0.12510952118251E-1 0.12534977059206E-1 0.12558931358102E-1 0.12582814736435E-1 - 0.12606626917768E-1 0.12630367627732E-1 0.12654036594027E-1 0.12677633546422E-1 0.12701158216757E-1 - 0.12724610338946E-1 0.12747989648973E-1 0.12771295884896E-1 0.12794528786848E-1 0.12817688097035E-1 - 0.12840773559739E-1 0.12863784921318E-1 0.12886721930204E-1 0.12909584336908E-1 0.12932371894014E-1 - 0.12955084356186E-1 0.12977721480162E-1 0.13000283024756E-1 0.13022768750861E-1 0.13045178421445E-1 - 0.13067511801552E-1 0.13089768658302E-1 0.13111948760889E-1 0.13134051880583E-1 0.13156077790731E-1 - 0.13178026266748E-1 0.13199897086128E-1 0.13221690028434E-1 0.13243404875302E-1 0.13265041410439E-1 - 0.1328659941962E-1 0.13308078690692E-1 0.13329479013568E-1 0.13350800180227E-1 0.13372041984715E-1 - 0.13393204223142E-1 0.13414286693681E-1 0.13435289196566E-1 0.13456211534092E-1 0.13477053510612E-1 - 0.13497814932537E-1 0.13518495608332E-1 0.13539095348517E-1 0.13559613965664E-1 0.13580051274393E-1 - 0.13600407091376E-1 0.13620681235327E-1 0.13640873527009E-1 0.13660983789224E-1 0.13681011846814E-1 - 0.13700957526662E-1 0.13720820657683E-1 0.13740601070828E-1 0.13760298599079E-1 0.13779913077444E-1 - 0.13799444342961E-1 0.13818892234688E-1 0.13838256593708E-1 0.13857537263119E-1 0.13876734088036E-1 - 0.13895846915588E-1 0.13914875594913E-1 0.13933819977156E-1 0.13952679915468E-1 0.13971455264999E-1 - 0.13990145882901E-1 0.14008751628319E-1 0.1402727236239E-1 0.14045707948242E-1 0.14064058250986E-1 - 0.14082323137719E-1 0.14100502477514E-1 0.14118596141424E-1 0.1413660400247E-1 0.14154525935645E-1 - 0.14172361817906E-1 0.14190111528173E-1 0.14207774947325E-1 0.14225351958193E-1 0.14242842445561E-1 - 0.14260246296161E-1 0.14277563398665E-1 0.14294793643689E-1 0.14311936923781E-1 0.14328993133423E-1 - 0.14345962169023E-1 0.14362843928915E-1 0.14379638313351E-1 0.14396345224498E-1 0.14412964566437E-1 - 0.14429496245153E-1 0.14445940168536E-1 0.14462296246375E-1 0.14478564390352E-1 0.1449474451404E-1 - 0.14510836532896E-1 0.14526840364261E-1 0.1454275592735E-1 0.14558583143252E-1 0.14574321934923E-1 - 0.1458997222718E-1 0.14605533946703E-1 0.14621007022021E-1 0.14636391383515E-1 0.14651686963409E-1 - 0.14666893695766E-1 0.14682011516486E-1 0.14697040363295E-1 0.14711980175747E-1 0.14726830895215E-1 - 0.14741592464887E-1 0.14756264829761E-1 0.14770847936639E-1 0.14785341734126E-1 0.14799746172619E-1 - 0.14814061204304E-1 0.14828286783155E-1 0.14842422864923E-1 0.14856469407132E-1 0.14870426369078E-1 - 0.14884293711819E-1 0.14898071398171E-1 0.14911759392703E-1 0.14925357661732E-1 0.14938866173318E-1 - 0.14952284897255E-1 0.14965613805071E-1 0.14978852870019E-1 0.14992002067071E-1 0.15005061372917E-1 - 0.15018030765952E-1 0.15030910226277E-1 0.15043699735691E-1 0.15056399277684E-1 0.15069008837434E-1 - 0.15081528401799E-1 0.15093957959313E-1 0.15106297500178E-1 0.15118547016261E-1 0.15130706501087E-1 - 0.15142775949832E-1 0.15154755359318E-1 0.1516664472801E-1 0.15178444056003E-1 0.15190153345024E-1 - 0.15201772598422E-1 0.15213301821162E-1 0.15224741019819E-1 0.15236090202575E-1 0.15247349379208E-1 - 0.1525851856109E-1 0.15269597761179E-1 0.15280586994015E-1 0.1529148627571E-1 0.15302295623946E-1 - 0.15313015057965E-1 0.15323644598569E-1 0.15334184268106E-1 0.15344634090468E-1 0.15354994091087E-1 - 0.15365264296923E-1 0.15375444736463E-1 0.15385535439713E-1 0.15395536438189E-1 0.15405447764916E-1 - 0.15415269454417E-1 0.15425001542708E-1 0.15434644067295E-1 0.15444197067162E-1 0.15453660582769E-1 - 0.15463034656043E-1 0.15472319330374E-1 0.15481514650606E-1 0.15490620663033E-1 0.15499637415392E-1 - 0.15508564956853E-1 0.15517403338019E-1 0.15526152610915E-1 0.15534812828983E-1 0.15543384047072E-1 - 0.15551866321441E-1 0.15560259709739E-1 0.15568564271012E-1 0.15576780065686E-1 0.15584907155566E-1 - 0.15592945603828E-1 0.15600895475012E-1 0.15608756835018E-1 0.15616529751094E-1 0.15624214291835E-1 - 0.15631810527174E-1 0.15639318528376E-1 0.1564673836803E-1 0.15654070120043E-1 0.15661313859635E-1 - 0.15668469663331E-1 0.15675537608953E-1 0.15682517775618E-1 0.15689410243725E-1 0.15696215094954E-1 - 0.15702932412255E-1 0.15709562279844E-1 0.15716104783197E-1 0.1572256000904E-1 0.15728928045345E-1 - 0.15735208981323E-1 0.15741402907416E-1 0.15747509915293E-1 0.15753530097839E-1 0.15759463549154E-1 - 0.1576531036454E-1 0.157710706405E-1 0.15776744474728E-1 0.15782331966101E-1 0.15787833214679E-1 - 0.15793248321688E-1 0.15798577389525E-1 0.1580382052174E-1 0.15808977823038E-1 0.15814049399267E-1 - 0.15819035357415E-1 0.15823935805599E-1 0.15828750853063E-1 0.15833480610167E-1 0.15838125188385E-1 - 0.15842684700293E-1 0.15847159259566E-1 0.15851548980972E-1 0.15855853980359E-1 0.15860074374657E-1 - 0.15864210281867E-1 0.1586826182105E-1 0.15872229112331E-1 0.15876112276881E-1 0.15879911436918E-1 - 0.15883626715697E-1 0.15887258237503E-1 0.15890806127647E-1 0.15894270512457E-1 0.1589765151927E-1 - 0.15900949276432E-1 0.1590416391328E-1 0.15907295560149E-1 0.15910344348352E-1 0.15913310410184E-1 - 0.1591619387891E-1 0.15918994888757E-1 0.15921713574912E-1 0.15924350073514E-1 0.15926904521643E-1 - 0.1592937705732E-1 0.15931767819496E-1 0.15934076948046E-1 0.15936304583764E-1 0.15938450868355E-1 - 0.15940515944429E-1 0.15942499955493E-1 0.15944403045949E-1 0.15946225361079E-1 0.15947967047048E-1 - 0.1594962825089E-1 0.15951209120507E-1 0.15952709804658E-1 0.15954130452954E-1 0.15955471215854E-1 - 0.15956732244654E-1 0.15957913691485E-1 0.15959015709301E-1 0.1596003845188E-1 0.1596098207381E-1 - 0.15961846730488E-1 0.1596263257811E-1 0.15963339773666E-1 0.15963968474934E-1 0.15964518840474E-1 - 0.1596499102962E-1 0.15965385202472E-1 0.15965701519895E-1 0.15965940143509E-1 0.15966101235681E-1 - 0.15966184959522E-1 0.1596619147888E-1 0.15966120958333E-1 0.1596597356318E-1 0.15965749459441E-1 - 0.15965448813845E-1 0.15965071793825E-1 0.15964618567515E-1 0.15964089303738E-1 0.15963484172005E-1 - 0.15962803342507E-1 0.15962046986106E-1 0.15961215274333E-1 0.1596030837938E-1 0.15959326474092E-1 - 0.15958269731964E-1 0.15957138327134E-1 0.15955932434374E-1 0.15954652229086E-1 0.159532978873E-1 - 0.15951869585658E-1 0.15950367501417E-1 0.15948791812439E-1 0.15947142697185E-1 0.1594542033471E-1 - 0.15943624904655E-1 0.15941756587243E-1 0.15939815563273E-1 0.15937802014111E-1 0.15935716121689E-1 - 0.15933558068495E-1 0.15931328037566E-1 0.15929026212486E-1 0.15926652777381E-1 0.15924207916905E-1 - 0.15921691816244E-1 0.15919104661103E-1 0.15916446637703E-1 0.15913717932775E-1 0.15910918733554E-1 - 0.15908049227774E-1 0.15905109603659E-1 0.15902100049922E-1 0.15899020755754E-1 0.15895871910822E-1 - 0.15892653705264E-1 0.15889366329677E-1 0.1588600997512E-1 0.15882584833102E-1 0.15879091095577E-1 - 0.15875528954941E-1 0.15871898604025E-1 0.15868200236088E-1 0.15864434044813E-1 0.15860600224302E-1 - 0.15856698969067E-1 0.15852730474029E-1 0.15848694934509E-1 0.15844592546224E-1 0.1584042350528E-1 - 0.15836188008169E-1 0.15831886251762E-1 0.15827518433302E-1 0.158230847504E-1 0.15818585401033E-1 - 0.15814020583532E-1 0.15809390496579E-1 0.15804695339206E-1 0.15799935310784E-1 0.15795110611017E-1 - 0.15790221439945E-1 0.15785267997928E-1 0.15780250485649E-1 0.15775169104102E-1 0.15770024054592E-1 - 0.15764815538729E-1 0.1575954375842E-1 0.15754208915865E-1 0.15748811213553E-1 0.15743350854255E-1 - 0.15737828041022E-1 0.15732242977175E-1 0.15726595866305E-1 0.15720886912262E-1 0.15715116319157E-1 - 0.15709284291351E-1 0.15703391033453E-1 0.15697436750314E-1 0.15691421647021E-1 0.15685345928895E-1 - 0.15679209801484E-1 0.15673013470555E-1 0.15666757142097E-1 0.15660441022307E-1 0.15654065317593E-1 - 0.15647630234562E-1 0.15641135980021E-1 0.15634582760969E-1 0.15627970784594E-1 0.15621300258264E-1 - 0.1561457138953E-1 0.15607784386111E-1 0.156009394559E-1 0.15594036806951E-1 0.15587076647478E-1 - 0.1558005918585E-1 0.15572984630585E-1 0.15565853190347E-1 0.1555866507394E-1 0.15551420490306E-1 - 0.15544119648515E-1 0.15536762757767E-1 0.15529350027383E-1 0.15521881666801E-1 0.15514357885573E-1 - 0.1550677889336E-1 0.15499144899925E-1 0.15491456115134E-1 0.15483712748946E-1 0.15475915011412E-1 - 0.15468063112668E-1 0.15460157262932E-1 0.15452197672502E-1 0.15444184551748E-1 0.15436118111107E-1 - 0.15427998561083E-1 0.15419826112241E-1 0.15411600975199E-1 0.15403323360629E-1 0.15394993479251E-1 - 0.15386611541827E-1 0.15378177759158E-1 0.15369692342082E-1 0.15361155501466E-1 0.15352567448203E-1 - 0.15343928393211E-1 0.15335238547426E-1 0.15326498121796E-1 0.15317707327281E-1 0.15308866374848E-1 - 0.15299975475466E-1 0.15291034840101E-1 0.15282044679714E-1 0.15273005205257E-1 0.15263916627667E-1 - 0.15254779157866E-1 0.1524559300675E-1 0.15236358385196E-1 0.15227075504045E-1 0.1521774457411E-1 - 0.15208365806166E-1 0.15198939410946E-1 0.15189465599139E-1 0.15179944581387E-1 0.15170376568278E-1 - 0.15160761770347E-1 0.15151100398067E-1 0.15141392661849E-1 0.15131638772038E-1 0.15121838938907E-1 - 0.15111993372657E-1 0.1510210228341E-1 0.15092165881206E-1 0.15082184376002E-1 0.15072157977666E-1 - 0.15062086895973E-1 0.15051971340605E-1 0.15041811521143E-1 0.15031607647066E-1 0.15021359927748E-1 - 0.15011068572454E-1 0.15000733790335E-1 0.14990355790426E-1 0.14979934781644E-1 0.14969470972783E-1 - 0.14958964572509E-1 0.1494841578936E-1 0.14937824831742E-1 0.14927191907924E-1 0.14916517226035E-1 - 0.14905800994062E-1 0.14895043419848E-1 0.14884244711085E-1 0.14873405075313E-1 0.14862524719917E-1 - 0.14851603852125E-1 0.14840642679001E-1 0.14829641407446E-1 0.14818600244193E-1 0.14807519395803E-1 - 0.14796399068664E-1 0.14785239468989E-1 0.14774040802807E-1 0.14762803275967E-1 0.14751527094132E-1 - 0.14740212462774E-1 0.14728859587177E-1 0.14717468672426E-1 0.14706039923411E-1 0.14694573544822E-1 - 0.14683069741143E-1 0.14671528716654E-1 0.14659950675426E-1 0.14648335821317E-1 0.14636684357971E-1 - 0.14624996488814E-1 0.14613272417054E-1 0.14601512345673E-1 0.1458971647743E-1 0.14577885014855E-1 - 0.14566018160246E-1 0.14554116115668E-1 0.14542179082951E-1 0.14530207263684E-1 0.14518200859217E-1 - 0.14506160070654E-1 0.14494085098852E-1 0.14481976144421E-1 0.14469833407718E-1 0.14457657088846E-1 - 0.1444544738765E-1 0.14433204503719E-1 0.14420928636377E-1 0.14408619984685E-1 0.1439627874744E-1 - 0.14383905123165E-1 0.14371499310117E-1 0.14359061506276E-1 0.14346591909346E-1 0.14334090716755E-1 - 0.14321558125649E-1 0.14308994332891E-1 0.14296399535057E-1 0.1428377392844E-1 0.14271117709038E-1 - 0.14258431072561E-1 0.14245714214423E-1 0.14232967329741E-1 0.14220190613336E-1 0.14207384259727E-1 - 0.14194548463128E-1 0.1418168341745E-1 0.14168789316299E-1 0.14155866352967E-1 0.14142914720439E-1 - 0.14129934611384E-1 0.14116926218158E-1 0.14103889732797E-1 0.1409082534702E-1 0.14077733252224E-1 - 0.1406461363948E-1 0.14051466699539E-1 0.1403829262282E-1 0.14025091599414E-1 0.14011863819084E-1 - 0.13998609471256E-1 0.13985328745023E-1 0.13972021829141E-1 0.13958688912029E-1 0.13945330181764E-1 - 0.13931945826081E-1 0.13918536032371E-1 0.1390510098768E-1 0.13891640878707E-1 0.138781558918E-1 - 0.13864646212958E-1 0.13851112027826E-1 0.13837553521696E-1 0.13823970879503E-1 0.13810364285825E-1 - 0.1379673392488E-1 0.13783079980526E-1 0.13769402636257E-1 0.13755702075205E-1 0.13741978480135E-1 - 0.13728232033444E-1 0.13714462917162E-1 0.13700671312948E-1 0.13686857402087E-1 0.13673021365495E-1 - 0.1365916338371E-1 0.13645283636893E-1 0.1363138230483E-1 0.13617459566926E-1 0.13603515602205E-1 - 0.13589550589311E-1 0.13575564706502E-1 0.13561558131653E-1 0.13547531042251E-1 0.13533483615398E-1 - 0.13519416027806E-1 0.13505328455795E-1 0.13491221075295E-1 0.13477094061844E-1 0.13462947590585E-1 - 0.13448781836265E-1 0.13434596973236E-1 0.13420393175451E-1 0.13406170616464E-1 0.13391929469429E-1 - 0.13377669907098E-1 0.13363392101822E-1 0.13349096225547E-1 0.13334782449813E-1 0.13320450945756E-1 - 0.13306101884104E-1 0.13291735435177E-1 0.13277351768884E-1 0.13262951054726E-1 0.13248533461791E-1 - 0.13234099158755E-1 0.1321964831388E-1 0.13205181095013E-1 0.13190697669586E-1 0.13176198204615E-1 - 0.13161682866697E-1 0.13147151822012E-1 0.13132605236319E-1 0.13118043274957E-1 0.13103466102844E-1 - 0.13088873884476E-1 0.13074266783924E-1 0.13059644964837E-1 0.13045008590437E-1 0.13030357823524E-1 - 0.13015692826466E-1 0.13001013761208E-1 0.12986320789264E-1 0.12971614071719E-1 0.12956893769231E-1 - 0.12942160042024E-1 0.12927413049891E-1 0.12912652952194E-1 0.12897879907861E-1 0.12883094075388E-1 - 0.12868295612834E-1 0.12853484677824E-1 0.12838661427548E-1 0.12823826018759E-1 0.12808978607772E-1 - 0.12794119350465E-1 0.12779248402277E-1 0.12764365918208E-1 0.12749472052819E-1 0.1273456696023E-1 - 0.1271965079412E-1 0.12704723707726E-1 0.12689785853845E-1 0.12674837384828E-1 0.12659878452585E-1 - 0.12644909208582E-1 0.12629929803841E-1 0.12614940388938E-1 0.12599941114006E-1 0.12584932128728E-1 - 0.12569913582346E-1 0.1255488562365E-1 0.12539848400988E-1 0.12524802062257E-1 0.12509746754905E-1 - 0.12494682625936E-1 0.124796098219E-1 0.12464528488902E-1 0.12449438772593E-1 0.12434340818178E-1 - 0.12419234770409E-1 0.12404120773588E-1 0.12388998971566E-1 0.12373869507742E-1 0.12358732525063E-1 - 0.12343588166026E-1 0.12328436572673E-1 0.12313277886593E-1 0.12298112248925E-1 0.12282939800351E-1 - 0.12267760681103E-1 0.12252575030957E-1 0.12237382989236E-1 0.12222184694807E-1 0.12206980286084E-1 - 0.12191769901027E-1 0.12176553677139E-1 0.1216133175147E-1 0.12146104260614E-1 0.12130871340707E-1 - 0.12115633127435E-1 0.12100389756022E-1 0.12085141361241E-1 0.12069888077406E-1 0.12054630038377E-1 - 0.12039367377556E-1 0.12024100227888E-1 0.12008828721865E-1 0.11993552991518E-1 0.11978273168426E-1 - 0.11962989383707E-1 0.11947701768025E-1 0.11932410451587E-1 0.11917115564141E-1 0.11901817234981E-1 - 0.11886515592943E-1 0.11871210766405E-1 0.1185590288329E-1 0.11840592071062E-1 0.11825278456732E-1 - 0.11809962166848E-1 0.11794643327508E-1 0.11779322064348E-1 0.1176399850255E-1 0.11748672766839E-1 - 0.11733344981482E-1 0.11718015270292E-1 0.11702683756623E-1 0.11687350563374E-1 0.11672015812989E-1 - 0.11656679627453E-1 0.11641342128297E-1 0.11626003436596E-1 0.11610663672969E-1 0.1159532295758E-1 - 0.11579981410136E-1 0.1156463914989E-1 0.1154929629564E-1 0.11533952965728E-1 0.11518609278044E-1 - 0.11503265350019E-1 0.11487921298634E-1 0.11472577240413E-1 0.11457233291427E-1 0.11441889567295E-1 - 0.11426546183179E-1 0.11411203253791E-1 0.11395860893388E-1 0.11380519215775E-1 0.11365178334305E-1 - 0.11349838361879E-1 0.11334499410944E-1 0.11319161593497E-1 0.11303825021084E-1 0.11288489804799E-1 - 0.11273156055286E-1 0.11257823882738E-1 0.11242493396899E-1 0.1122716470706E-1 0.11211837922066E-1 - 0.11196513150312E-1 0.11181190499744E-1 0.11165870077858E-1 0.11150551991705E-1 0.11135236347886E-1 - 0.11119923252555E-1 0.11104612811419E-1 0.1108930512974E-1 0.11074000312332E-1 0.11058698463564E-1 - 0.11043399687359E-1 0.11028104087196E-1 0.11012811766109E-1 0.10997522826687E-1 0.10982237371077E-1 - 0.10966955500981E-1 0.1095167731766E-1 0.10936402921931E-1 0.1092113241417E-1 0.10905865894311E-1 - 0.10890603461848E-1 0.10875345215833E-1 0.10860091254879E-1 0.1084484167716E-1 0.10829596580409E-1 - 0.10814356061923E-1 0.10799120218558E-1 0.10783889146734E-1 0.10768662942436E-1 0.10753441701208E-1 - 0.10738225518162E-1 0.10723014487972E-1 0.10707808704878E-1 0.10692608262687E-1 0.10677413254769E-1 - 0.10662223774063E-1 0.10647039913075E-1 0.10631861763878E-1 0.10616689418115E-1 0.10601522966996E-1 - 0.10586362501301E-1 0.10571208111381E-1 0.10556059887158E-1 0.10540917918123E-1 0.10525782293341E-1 - 0.10510653101449E-1 0.10495530430656E-1 0.10480414368748E-1 0.1046530500308E-1 0.10450202420588E-1 - 0.10435106707778E-1 0.10420017950736E-1 0.10404936235123E-1 0.10389861646178E-1 0.10374794268719E-1 - 0.10359734187141E-1 0.10344681485419E-1 0.10329636247109E-1 0.10314598555348E-1 0.10299568492852E-1 - 0.10284546141922E-1 0.10269531584439E-1 0.10254524901869E-1 0.10239526175263E-1 0.10224535485255E-1 - 0.10209552912065E-1 0.10194578535499E-1 0.10179612434952E-1 0.10164654689403E-1 0.10149705377422E-1 - 0.10134764577167E-1 0.10119832366386E-1 0.10104908822418E-1 0.1008999402219E-1 0.10075088042226E-1 - 0.10060190958639E-1 0.10045302847137E-1 0.1003042378302E-1 0.10015553841185E-1 0.10000693096124E-1 - 0.99858416219255E-2 0.9970999492274E-2 0.99561667804529E-2 0.99413435593438E-2 0.99265299014278E-2 - 0.9911725878786E-2 0.98969315631004E-2 0.98821470256546E-2 0.98673723373349E-2 0.98526075686305E-2 - 0.98378527896348E-2 0.98231080700458E-2 0.98083734791672E-2 0.97936490859091E-2 0.97789349587886E-2 - 0.97642311659309E-2 0.97495377750698E-2 0.97348548535488E-2 0.97201824683216E-2 0.97055206859533E-2 - 0.96908695726207E-2 0.96762291941137E-2 0.96615996158355E-2 0.96469809028039E-2 0.9632373119652E-2 - 0.96177763306288E-2 0.96031905996004E-2 0.95886159900506E-2 0.95740525650817E-2 0.95595003874154E-2 - 0.95449595193937E-2 0.95304300229796E-2 0.95159119597583E-2 0.95014053909374E-2 0.94869103773484E-2 - 0.94724269794472E-2 0.94579552573149E-2 0.94434952706589E-2 0.94290470788137E-2 0.94146107407416E-2 - 0.94001863150337E-2 0.93857738599107E-2 0.93713734332239E-2 0.93569850924557E-2 0.93426088947209E-2 - 0.93282448967676E-2 0.93138931549775E-2 0.92995537253674E-2 0.92852266635897E-2 0.92709120249336E-2 - 0.92566098643255E-2 0.92423202363304E-2 0.92280431951526E-2 0.92137787946364E-2 0.91995270882672E-2 - 0.91852881291724E-2 0.9171061970122E-2 0.915684866353E-2 0.91426482614549E-2 0.91284608156006E-2 - 0.91142863773177E-2 0.91001249976038E-2 0.90859767271049E-2 0.90718416161161E-2 0.90577197145825E-2 - 0.90436110721001E-2 0.90295157379169E-2 0.90154337609335E-2 0.90013651897041E-2 0.89873100724377E-2 - 0.89732684569987E-2 0.89592403909078E-2 0.89452259213432E-2 0.89312250951412E-2 0.89172379587973E-2 - 0.89032645584672E-2 0.88893049399675E-2 0.88753591487767E-2 0.88614272300361E-2 0.88475092285511E-2 - 0.88336051887914E-2 0.88197151548926E-2 0.88058391706567E-2 0.87919772795533E-2 0.87781295247203E-2 - 0.87642959489652E-2 0.87504765947655E-2 0.87366715042699E-2 0.87228807192996E-2 0.87091042813486E-2 - 0.8695342231585E-2 0.86815946108517E-2 0.86678614596678E-2 0.8654142818229E-2 0.86404387264089E-2 - 0.86267492237597E-2 0.86130743495133E-2 0.85994141425822E-2 0.85857686415605E-2 0.85721378847247E-2 - 0.85585219100347E-2 0.85449207551348E-2 0.85313344573546E-2 0.851776305371E-2 0.85042065809039E-2 - 0.84906650753277E-2 0.84771385730616E-2 0.84636271098759E-2 0.8450130721232E-2 0.84366494422829E-2 - 0.84231833078749E-2 0.84097323525479E-2 0.83962966105366E-2 0.83828761157714E-2 0.83694709018795E-2 - 0.83560810021854E-2 0.83427064497126E-2 0.83293472771838E-2 0.83160035170223E-2 0.83026752013528E-2 - 0.82893623620024E-2 0.82760650305014E-2 0.82627832380846E-2 0.82495170156918E-2 0.8236266393969E-2 - 0.82230314032695E-2 0.82098120736545E-2 0.81966084348942E-2 0.8183420516469E-2 0.817024834757E-2 - 0.81570919571004E-2 0.81439513736759E-2 0.81308266256263E-2 0.8117717740996E-2 0.81046247475451E-2 - 0.80915476727503E-2 0.8078486543806E-2 0.80654413876249E-2 0.80524122308395E-2 0.80393990998025E-2 - 0.8026402020588E-2 0.80134210189925E-2 0.80004561205359E-2 0.79875073504621E-2 0.79745747337404E-2 - 0.7961658295066E-2 0.79487580588613E-2 0.79358740492768E-2 0.79230062901919E-2 0.79101548052159E-2 - 0.7897319617689E-2 0.78845007506833E-2 0.78716982270034E-2 0.78589120691879E-2 0.78461422995099E-2 - 0.78333889399782E-2 0.7820652012338E-2 0.7807931538072E-2 0.77952275384015E-2 0.7782540034287E-2 - 0.77698690464294E-2 0.77572145952708E-2 0.77445767009955E-2 0.7731955383531E-2 0.77193506625489E-2 - 0.77067625574656E-2 0.76941910874438E-2 0.76816362713927E-2 0.76690981279697E-2 0.76565766755807E-2 - 0.76440719323814E-2 0.76315839162783E-2 0.76191126449291E-2 0.76066581357444E-2 0.75942204058879E-2 - 0.7581799472278E-2 0.75693953515882E-2 0.75570080602481E-2 0.75446376144449E-2 0.75322840301236E-2 - 0.7519947322988E-2 0.75076275085024E-2 0.74953246018916E-2 0.74830386181423E-2 0.74707695720039E-2 - 0.74585174779894E-2 0.74462823503766E-2 0.74340642032086E-2 0.74218630502949E-2 0.74096789052125E-2 - 0.73975117813065E-2 0.73853616916912E-2 0.73732286492511E-2 0.73611126666417E-2 0.73490137562903E-2 - 0.73369319303972E-2 0.73248672009363E-2 0.73128195796562E-2 0.73007890780813E-2 0.72887757075121E-2 - 0.72767794790269E-2 0.72648004034819E-2 0.7252838491513E-2 0.72408937535357E-2 0.72289661997469E-2 - 0.72170558401253E-2 0.72051626844324E-2 0.71932867422135E-2 0.71814280227985E-2 0.71695865353027E-2 - 0.71577622886282E-2 0.7145955291464E-2 0.71341655522876E-2 0.71223930793654E-2 0.71106378807541E-2 - 0.7098899964301E-2 0.70871793376454E-2 0.70754760082192E-2 0.70637899832478E-2 0.70521212697512E-2 - 0.70404698745447E-2 0.70288358042398E-2 0.7017219065245E-2 0.70056196637669E-2 0.69940376058111E-2 - 0.69824728971827E-2 0.69709255434876E-2 0.69593955501331E-2 0.69478829223289E-2 0.69363876650881E-2 - 0.69249097832276E-2 0.69134492813696E-2 0.69020061639421E-2 0.68905804351797E-2 0.68791720991247E-2 - 0.68677811596278E-2 0.68564076203492E-2 0.6845051484759E-2 0.68337127561387E-2 0.68223914375813E-2 - 0.6811087531993E-2 0.67998010420932E-2 0.67885319704162E-2 0.67772803193112E-2 0.6766046090944E-2 - 0.67548292872972E-2 0.67436299101712E-2 0.67324479611853E-2 0.67212834417784E-2 0.67101363532097E-2 - 0.66990066965598E-2 0.66878944727312E-2 0.66767996824495E-2 0.6665722326264E-2 0.66546624045488E-2 - 0.66436199175033E-2 0.66325948651531E-2 0.66215872473511E-2 0.66105970637781E-2 0.65996243139436E-2 - 0.65886689971869E-2 0.65777311126775E-2 0.65668106594163E-2 0.65559076362362E-2 0.6545022041803E-2 - 0.65341538746164E-2 0.65233031330104E-2 0.65124698151545E-2 0.65016539190542E-2 0.64908554425522E-2 - 0.64800743833288E-2 0.6469310738903E-2 0.64585645066332E-2 0.6447835683718E-2 0.6437124267197E-2 - 0.64264302539516E-2 0.64157536407059E-2 0.64050944240273E-2 0.63944526003275E-2 0.63838281658632E-2 - 0.63732211167368E-2 0.63626314488975E-2 0.63520591581418E-2 0.63415042401142E-2 0.63309666903084E-2 - 0.63204465040678E-2 0.63099436765863E-2 0.62994582029089E-2 0.62889900779332E-2 0.62785392964091E-2 - 0.62681058529406E-2 0.62576897419858E-2 0.62472909578582E-2 0.62369094947272E-2 0.62265453466189E-2 - 0.62161985074169E-2 0.62058689708633E-2 0.61955567305588E-2 0.61852617799642E-2 0.61749841124008E-2 - 0.61647237210512E-2 0.61544805989601E-2 0.6144254739035E-2 0.61340461340469E-2 0.61238547766313E-2 - 0.61136806592886E-2 0.61035237743852E-2 0.60933841141539E-2 0.60832616706949E-2 0.60731564359765E-2 - 0.60630684018357E-2 0.60529975599792E-2 0.60429439019839E-2 0.60329074192975E-2 0.60228881032398E-2 - 0.60128859450028E-2 0.60029009356519E-2 0.59929330661262E-2 0.59829823272397E-2 0.59730487096816E-2 - 0.59631322040173E-2 0.5953232800689E-2 0.59433504900165E-2 0.59334852621977E-2 0.59236371073096E-2 - 0.59138060153088E-2 0.59039919760323E-2 0.58941949791984E-2 0.58844150144069E-2 0.58746520711404E-2 - 0.58649061387644E-2 0.58551772065287E-2 0.58454652635673E-2 0.58357702988999E-2 0.5826092301432E-2 - 0.58164312599558E-2 0.58067871631509E-2 0.57971599995852E-2 0.57875497577151E-2 0.57779564258865E-2 - 0.57683799923356E-2 0.57588204451893E-2 0.57492777724661E-2 0.57397519620765E-2 0.5730243001824E-2 - 0.57207508794057E-2 0.57112755824128E-2 0.57018170983313E-2 0.56923754145431E-2 0.56829505183258E-2 - 0.56735423968543E-2 0.56641510372008E-2 0.56547764263359E-2 0.56454185511289E-2 0.56360773983487E-2 - 0.56267529546643E-2 0.56174452066456E-2 0.56081541407641E-2 0.55988797433931E-2 0.55896220008089E-2 - 0.55803808991914E-2 0.55711564246241E-2 0.55619485630957E-2 0.55527573004999E-2 0.55435826226365E-2 - 0.5534424515212E-2 0.55252829638401E-2 0.55161579540424E-2 0.55070494712489E-2 0.54979575007989E-2 - 0.54888820279415E-2 0.54798230378361E-2 0.54707805155531E-2 0.54617544460747E-2 0.54527448142952E-2 - 0.54437516050218E-2 0.54347748029753E-2 0.54258143927906E-2 0.54168703590171E-2 0.54079426861198E-2 - 0.53990313584795E-2 0.53901363603936E-2 0.53812576760765E-2 0.53723952896606E-2 0.53635491851964E-2 - 0.53547193466534E-2 0.53459057579208E-2 0.53371084028077E-2 0.53283272650441E-2 0.53195623282812E-2 - 0.53108135760921E-2 0.53020809919726E-2 0.52933645593412E-2 0.52846642615403E-2 0.52759800818366E-2 - 0.52673120034214E-2 0.52586600094115E-2 0.52500240828497E-2 0.52414042067052E-2 0.52328003638744E-2 - 0.52242125371815E-2 0.52156407093787E-2 0.52070848631471E-2 0.51985449810972E-2 0.51900210457694E-2 - 0.51815130396347E-2 0.51730209450949E-2 0.51645447444837E-2 0.51560844200666E-2 0.51476399540423E-2 - 0.51392113285423E-2 0.51307985256321E-2 0.51224015273115E-2 0.51140203155152E-2 0.51056548721135E-2 - 0.50973051789124E-2 0.50889712176546E-2 0.50806529700199E-2 0.50723504176254E-2 0.50640635420266E-2 - 0.50557923247175E-2 0.50475367471314E-2 0.50392967906413E-2 0.50310724365602E-2 0.50228636661421E-2 - 0.50146704605822E-2 0.50064928010175E-2 0.49983306685272E-2 0.49901840441336E-2 0.4982052908802E-2 - 0.49739372434418E-2 0.49658370289066E-2 0.4957752245995E-2 0.4949682875451E-2 0.49416288979642E-2 - 0.4933590294171E-2 0.49255670446543E-2 0.49175591299447E-2 0.49095665305204E-2 0.49015892268082E-2 - 0.48936271991836E-2 0.48856804279715E-2 0.48777488934469E-2 0.48698325758346E-2 0.48619314553108E-2 - 0.48540455120026E-2 0.4846174725989E-2 0.48383190773015E-2 0.48304785459239E-2 0.48226531117936E-2 - 0.48148427548016E-2 0.4807047454793E-2 0.47992671915675E-2 0.47915019448801E-2 0.47837516944412E-2 - 0.47760164199173E-2 0.47682961009315E-2 0.47605907170637E-2 0.47529002478514E-2 0.47452246727899E-2 - 0.47375639713328E-2 0.47299181228926E-2 0.4722287106841E-2 0.47146709025094E-2 0.47070694891894E-2 - 0.46994828461331E-2 0.46919109525538E-2 0.46843537876262E-2 0.46768113304868E-2 0.46692835602348E-2 - 0.46617704559318E-2 0.46542719966031E-2 0.46467881612373E-2 0.46393189287874E-2 0.46318642781707E-2 - 0.46244241882696E-2 0.4616998637932E-2 0.46095876059714E-2 0.46021910711678E-2 0.45948090122678E-2 - 0.4587441407985E-2 0.45800882370006E-2 0.45727494779637E-2 0.45654251094919E-2 0.45581151101713E-2 - 0.45508194585575E-2 0.45435381331753E-2 0.45362711125199E-2 0.45290183750566E-2 0.45217798992217E-2 - 0.45145556634226E-2 0.45073456460385E-2 0.45001498254202E-2 0.44929681798914E-2 0.44858006877484E-2 - 0.44786473272606E-2 0.44715080766712E-2 0.44643829141972E-2 0.44572718180302E-2 0.44501747663363E-2 - 0.4443091737257E-2 0.44360227089092E-2 0.44289676593857E-2 0.44219265667558E-2 0.44148994090653E-2 - 0.4407886164337E-2 0.44008868105714E-2 0.43939013257466E-2 0.43869296878189E-2 0.43799718747233E-2 - 0.43730278643735E-2 0.43660976346627E-2 0.43591811634638E-2 0.43522784286294E-2 0.43453894079928E-2 - 0.43385140793679E-2 0.43316524205499E-2 0.43248044093152E-2 0.43179700234221E-2 0.43111492406113E-2 - 0.43043420386058E-2 0.42975483951115E-2 0.42907682878177E-2 0.4284001694397E-2 0.42772485925062E-2 - 0.42705089597862E-2 0.42637827738627E-2 0.42570700123461E-2 0.42503706528324E-2 0.4243684672903E-2 - 0.42370120501254E-2 0.42303527620533E-2 0.42237067862273E-2 0.42170741001745E-2 0.42104546814099E-2 - 0.42038485074357E-2 0.41972555557422E-2 0.4190675803808E-2 0.41841092291004E-2 0.41775558090755E-2 - 0.41710155211787E-2 0.41644883428451E-2 0.41579742514994E-2 0.41514732245569E-2 0.41449852394231E-2 - 0.41385102734945E-2 0.41320483041589E-2 0.41255993087952E-2 0.41191632647744E-2 0.41127401494594E-2 - 0.41063299402056E-2 0.40999326143611E-2 0.40935481492668E-2 0.40871765222571E-2 0.40808177106599E-2 - 0.40744716917972E-2 0.40681384429848E-2 0.40618179415334E-2 0.40555101647482E-2 0.40492150899296E-2 - 0.40429326943733E-2 0.40366629553708E-2 0.40304058502093E-2 0.40241613561724E-2 0.40179294505402E-2 - 0.40117101105896E-2 0.40055033135945E-2 0.39993090368263E-2 0.39931272575539E-2 0.39869579530442E-2 - 0.39808011005623E-2 0.39746566773716E-2 0.39685246607345E-2 0.39624050279123E-2 0.39562977561655E-2 - 0.39502028227544E-2 0.39441202049387E-2 0.39380498799787E-2 0.39319918251346E-2 0.39259460176675E-2 - 0.39199124348393E-2 0.3913891053913E-2 0.3907881852153E-2 0.39018848068254E-2 0.38958998951982E-2 - 0.38899270945416E-2 0.38839663821281E-2 0.38780177352329E-2 0.38720811311344E-2 0.38661565471137E-2 - 0.38602439604557E-2 0.38543433484488E-2 0.38484546883854E-2 0.38425779575619E-2 0.38367131332793E-2 - 0.3830860192843E-2 0.38250191135636E-2 0.38191898727566E-2 0.38133724477429E-2 0.38075668158489E-2 - 0.3801772954407E-2 0.37959908407556E-2 0.37902204522393E-2 0.37844617662094E-2 0.37787147600237E-2 - 0.37729794110473E-2 0.37672556966521E-2 0.37615435942179E-2 0.37558430811319E-2 0.3750154134789E-2 - 0.37444767325926E-2 0.37388108519541E-2 0.37331564702935E-2 0.37275135650398E-2 0.37218821136273E-2 - 0.37162620935086E-2 0.3710653482148E-2 0.37050562570215E-2 0.36994703955861E-2 0.36938958753116E-2 - 0.36883326736951E-2 0.36827807682528E-2 0.36772401365092E-2 0.367171075599E-2 0.36661926042336E-2 - 0.36606856587916E-2 0.36551898972309E-2 0.36497052971367E-2 0.36442318360833E-2 0.36387694916564E-2 - 0.3633318241454E-2 0.36278780631108E-2 0.36224489342669E-2 0.36170308325711E-2 0.36116237356835E-2 - 0.36062276212769E-2 0.36008424670427E-2 0.35954682506882E-2 0.35901049499287E-2 0.35847525424814E-2 - 0.35794110060754E-2 0.35740803184751E-2 0.35687604574584E-2 0.35634514008164E-2 0.35581531263468E-2 - 0.35528656118621E-2 0.35475888351976E-2 0.35423227742068E-2 0.35370674067671E-2 0.35318227107388E-2 - 0.35265886640045E-2 0.352136524447E-2 0.35161524300714E-2 0.35109501987542E-2 0.35057585284821E-2 - 0.35005773972382E-2 0.3495406783023E-2 0.34902466638315E-2 0.34850970176772E-2 0.34799578226082E-2 - 0.34748290566964E-2 0.34697106980314E-2 0.34646027247053E-2 0.34595051148308E-2 0.34544178465435E-2 - 0.34493408980039E-2 0.34442742473979E-2 0.34392178729029E-2 0.34341717527163E-2 0.34291358650565E-2 - 0.34241101881831E-2 0.34190947003695E-2 0.34140893799014E-2 0.34090942050822E-2 0.34041091542329E-2 - 0.33991342057074E-2 0.33941693378808E-2 0.33892145291389E-2 0.33842697578743E-2 0.33793350024972E-2 - 0.33744102414644E-2 0.336949545325E-2 0.33645906163474E-2 0.33596957092625E-2 0.33548107105237E-2 - 0.33499355986856E-2 0.33450703523265E-2 0.33402149500502E-2 0.33353693704614E-2 0.33305335921903E-2 - 0.33257075938953E-2 0.33208913542648E-2 0.3316084852007E-2 0.33112880658479E-2 0.33065009745356E-2 - 0.33017235568416E-2 0.32969557915681E-2 0.32921976575453E-2 0.32874491336062E-2 0.32827101986021E-2 - 0.3277980831405E-2 0.32732610109381E-2 0.32685507161387E-2 0.32638499259673E-2 0.32591586194078E-2 - 0.32544767754766E-2 0.32498043731893E-2 0.32451413915874E-2 0.32404878097396E-2 0.32358436067585E-2 - 0.32312087617764E-2 0.32265832539422E-2 0.32219670624277E-2 0.32173601664291E-2 0.32127625451756E-2 - 0.32081741779233E-2 0.32035950439441E-2 0.31990251225263E-2 0.31944643929803E-2 0.31899128346641E-2 - 0.31853704269567E-2 0.31808371492613E-2 0.31763129810017E-2 0.31717979016273E-2 0.31672918906187E-2 - 0.31627949274837E-2 0.31583069917598E-2 0.31538280629886E-2 0.31493581207408E-2 0.31448971446229E-2 - 0.31404451142761E-2 0.31360020093662E-2 0.3131567809578E-2 0.31271424946237E-2 0.31227260442433E-2 - 0.31183184382092E-2 0.31139196563248E-2 0.3109529678404E-2 0.31051484842853E-2 0.31007760538332E-2 - 0.30964123669601E-2 0.30920574035999E-2 0.30877111437108E-2 0.30833735672769E-2 0.30790446543087E-2 - 0.30747243848507E-2 0.30704127389766E-2 0.30661096967844E-2 0.30618152383871E-2 0.30575293439254E-2 - 0.30532519935824E-2 0.30489831675695E-2 0.30447228461238E-2 0.30404710095137E-2 0.30362276380404E-2 - 0.30319927120221E-2 0.30277662117988E-2 0.30235481177352E-2 0.30193384102477E-2 0.30151370697765E-2 - 0.30109440767883E-2 0.30067594117733E-2 0.30025830552497E-2 0.2998414987773E-2 0.29942551899293E-2 - 0.29901036423361E-2 0.2985960325617E-2 0.29818252204274E-2 0.29776983074644E-2 0.29735795674613E-2 - 0.29694689811788E-2 0.29653665294004E-2 0.29612721929408E-2 0.2957185952645E-2 0.29531077893903E-2 - 0.29490376840894E-2 0.29449756176667E-2 0.29409215710754E-2 0.29368755252978E-2 0.29328374613698E-2 - 0.29288073603526E-2 0.29247852033291E-2 0.2920770971408E-2 0.29167646457254E-2 0.29127662074644E-2 - 0.29087756378374E-2 0.2904792918081E-2 0.29008180294479E-2 0.28968509532218E-2 0.28928916707324E-2 - 0.28889401633388E-2 0.28849964124289E-2 0.28810603994217E-2 0.28771321057662E-2 0.28732115129431E-2 - 0.28692986024639E-2 0.28653933558783E-2 0.28614957547439E-2 0.28576057806528E-2 0.28537234152337E-2 - 0.28498486401585E-2 0.28459814371238E-2 0.28421217878595E-2 0.28382696741286E-2 0.28344250777266E-2 - 0.28305879804575E-2 0.28267583641585E-2 0.2822936210713E-2 0.28191215020413E-2 0.28153142200939E-2 - 0.28115143468429E-2 0.28077218642935E-2 0.28039367544847E-2 0.28001589994911E-2 0.27963885814246E-2 - 0.27926254824073E-2 0.27888696845924E-2 0.27851211701662E-2 0.27813799213678E-2 0.27776459204624E-2 - 0.27739191497406E-2 0.27701995915223E-2 0.27664872281581E-2 0.27627820420396E-2 0.27590840155909E-2 - 0.27553931312619E-2 0.27517093715241E-2 0.27480327188792E-2 0.27443631558784E-2 0.27407006651033E-2 - 0.27370452291658E-2 0.27333968307036E-2 0.27297554523858E-2 0.27261210769201E-2 0.27224936870482E-2 - 0.27188732655482E-2 0.27152597952091E-2 0.27116532588564E-2 0.27080536393525E-2 0.27044609195998E-2 - 0.270087508253E-2 0.26972961111041E-2 0.26937239883147E-2 0.2690158697187E-2 0.26866002207839E-2 - 0.26830485422039E-2 0.26795036445645E-2 0.26759655110114E-2 0.26724341247209E-2 0.26689094689208E-2 - 0.26653915268642E-2 0.26618802818373E-2 0.265837571716E-2 0.26548778161918E-2 0.26513865623024E-2 - 0.26479019388958E-2 0.26444239294107E-2 0.26409525173334E-2 0.26374876861784E-2 0.26340294194884E-2 - 0.26305777008372E-2 0.26271325138309E-2 0.26236938421132E-2 0.26202616693619E-2 0.26168359792807E-2 - 0.26134167555987E-2 0.26100039820747E-2 0.26065976425181E-2 0.26031977207675E-2 0.25998042006931E-2 - 0.25964170661921E-2 0.25930363011952E-2 0.25896618896687E-2 0.25862938156127E-2 0.25829320630623E-2 - 0.25795766160699E-2 0.25762274587225E-2 0.25728845751457E-2 0.25695479495035E-2 0.25662175659901E-2 - 0.25628934088298E-2 0.25595754622795E-2 0.25562637106289E-2 0.25529581382026E-2 0.25496587293613E-2 - 0.25463654684864E-2 0.25430783399895E-2 0.25397973283134E-2 0.25365224179506E-2 0.25332535934208E-2 - 0.25299908392745E-2 0.25267341400922E-2 0.2523483480487E-2 0.2520238845108E-2 0.25170002186376E-2 - 0.25137675857888E-2 0.25105409312973E-2 0.25073202399308E-2 0.25041054965008E-2 0.25008966858512E-2 - 0.24976937928562E-2 0.2494496802426E-2 0.24913056995069E-2 0.24881204690674E-2 0.24849410961002E-2 - 0.24817675656271E-2 0.24785998627254E-2 0.24754379724999E-2 0.24722818800858E-2 0.24691315706452E-2 - 0.24659870293726E-2 0.24628482415002E-2 0.24597151922942E-2 0.24565878670549E-2 0.24534662510964E-2 - 0.24503503297668E-2 0.24472400884566E-2 0.24441355125956E-2 0.24410365876453E-2 0.24379432990883E-2 - 0.24348556324409E-2 0.24317735732536E-2 0.24286971071172E-2 0.24256262196584E-2 0.24225608965166E-2 - 0.24195011233618E-2 0.24164468858963E-2 0.2413398169873E-2 0.24103549610709E-2 0.24073172452993E-2 - 0.24042850083987E-2 0.24012582362415E-2 0.23982369147339E-2 0.2395221029816E-2 0.23922105674566E-2 - 0.2389205513647E-2 0.2386205854409E-2 0.23832115758107E-2 0.23802226639516E-2 0.2377239104962E-2 - 0.23742608849999E-2 0.23712879902552E-2 0.23683204069525E-2 0.23653581213488E-2 0.23624011197392E-2 - 0.23594493884298E-2 0.23565029137614E-2 0.23535616821116E-2 0.23506256799017E-2 0.23476948935805E-2 - 0.23447693096252E-2 0.2341848914546E-2 0.23389336948859E-2 0.23360236372065E-2 0.23331187281014E-2 - 0.23302189542049E-2 0.23273243021869E-2 0.23244347587477E-2 0.23215503106135E-2 0.23186709445434E-2 - 0.23157966473283E-2 0.23129274057922E-2 0.23100632067929E-2 0.23072040372102E-2 0.23043498839555E-2 - 0.23015007339714E-2 0.22986565742398E-2 0.22958173917713E-2 0.22929831736062E-2 0.2290153906815E-2 - 0.22873295784974E-2 0.22845101757924E-2 0.22816956858702E-2 0.22788860959274E-2 0.22760813931822E-2 - 0.22732815648841E-2 0.22704865983246E-2 0.22676964808245E-2 0.22649111997347E-2 0.22621307424366E-2 - 0.22593550963416E-2 0.22565842488936E-2 0.22538181875675E-2 0.22510568998734E-2 0.22483003733361E-2 - 0.22455485955131E-2 0.22428015539977E-2 0.22400592364225E-2 0.22373216304493E-2 0.22345887237627E-2 - 0.22318605040781E-2 0.2229136959142E-2 0.22264180767358E-2 0.22237038446732E-2 0.22209942507896E-2 - 0.22182892829479E-2 0.22155889290386E-2 0.22128931769997E-2 0.22102020147944E-2 0.22075154304146E-2 - 0.22048334118801E-2 0.22021559472462E-2 0.21994830245861E-2 0.21968146320026E-2 0.21941507576303E-2 - 0.21914913896471E-2 0.21888365162582E-2 0.21861861256927E-2 0.21835402062083E-2 0.2180898746092E-2 - 0.21782617336666E-2 0.21756291572855E-2 0.21730010053262E-2 0.21703772661896E-2 0.21677579283039E-2 - 0.21651429801422E-2 0.2162532410205E-2 0.21599262070212E-2 0.21573243591436E-2 0.21547268551538E-2 - 0.21521336836695E-2 0.21495448333394E-2 0.21469602928455E-2 0.21443800508777E-2 0.21418040961588E-2 - 0.21392324174477E-2 0.21366650035402E-2 0.21341018432586E-2 0.21315429254508E-2 0.21289882389948E-2 - 0.21264377727974E-2 0.21238915157938E-2 0.21213494569507E-2 0.21188115852577E-2 0.21162778897315E-2 - 0.21137483594168E-2 0.21112229833973E-2 0.21087017507823E-2 0.21061846507089E-2 0.21036716723423E-2 - 0.21011628048754E-2 0.20986580375329E-2 0.2096157359569E-2 0.20936607602647E-2 0.20911682289204E-2 - 0.20886797548652E-2 0.20861953274649E-2 0.20837149361147E-2 0.2081238570236E-2 0.20787662192797E-2 - 0.20762978727266E-2 0.20738335200813E-2 0.2071373150872E-2 0.2068916754654E-2 0.20664643210204E-2 - 0.20640158395898E-2 0.20615713000085E-2 0.20591306919488E-2 0.205669400511E-2 0.20542612292232E-2 - 0.20518323540482E-2 0.20494073693744E-2 0.20469862650048E-2 0.20445690307722E-2 0.20421556565423E-2 - 0.2039746132213E-2 0.20373404477067E-2 0.20349385929742E-2 0.20325405579935E-2 0.20301463327699E-2 - 0.20277559073345E-2 0.20253692717488E-2 0.20229864160947E-2 0.20206073304804E-2 0.20182320050403E-2 - 0.20158604299472E-2 0.20134925953977E-2 0.20111284916136E-2 0.20087681088427E-2 0.20064114373604E-2 - 0.20040584674683E-2 0.2001709189496E-2 0.19993635937988E-2 0.19970216707553E-2 0.19946834107703E-2 - 0.19923488042801E-2 0.19900178417476E-2 0.19876905136615E-2 0.19853668105379E-2 0.19830467229191E-2 - 0.19807302413735E-2 0.19784173564961E-2 0.19761080589112E-2 0.19738023392594E-2 0.19715001882085E-2 - 0.19692015964552E-2 0.19669065547295E-2 0.19646150537853E-2 0.19623270844E-2 0.19600426373773E-2 - 0.19577617035481E-2 0.19554842737628E-2 0.1953210338898E-2 0.19509398898606E-2 0.19486729175855E-2 - 0.19464094130336E-2 0.19441493671862E-2 0.19418927710503E-2 0.19396396156607E-2 0.19373898920819E-2 - 0.19351435914068E-2 0.19329007047414E-2 0.19306612232175E-2 0.1928425137993E-2 0.19261924402573E-2 - 0.19239631212215E-2 0.19217371721243E-2 0.19195145842309E-2 0.19172953488312E-2 0.19150794572382E-2 - 0.19128669007923E-2 0.19106576708564E-2 0.19084517588122E-2 0.19062491560658E-2 0.19040498540571E-2 - 0.19018538442497E-2 0.18996611181312E-2 0.18974716672142E-2 0.18952854830355E-2 0.18931025571569E-2 - 0.18909228811643E-2 0.18887464466723E-2 0.18865732453072E-2 0.18844032687216E-2 0.18822365085962E-2 - 0.18800729566428E-2 0.18779126045963E-2 0.18757554442107E-2 0.18736014672647E-2 0.18714506655614E-2 - 0.18693030309305E-2 0.18671585552269E-2 0.1865017230324E-2 0.18628790481173E-2 0.18607440005247E-2 - 0.18586120794968E-2 0.18564832770045E-2 0.1854357585043E-2 0.18522349956324E-2 0.18501155008181E-2 - 0.18479990926621E-2 0.18458857632504E-2 0.18437755046928E-2 0.18416683091263E-2 0.18395641687091E-2 - 0.18374630756241E-2 0.18353650220781E-2 0.18332700003012E-2 0.1831178002545E-2 0.18290890210857E-2 - 0.18270030482211E-2 0.18249200762698E-2 0.18228400975734E-2 0.18207631044994E-2 0.18186890894371E-2 - 0.1816618044799E-2 0.1814549963022E-2 0.18124848365653E-2 0.18104226579106E-2 0.1808363419563E-2 - 0.18063071140531E-2 0.1804253733922E-2 0.18022032717353E-2 0.18001557200849E-2 0.17981110715912E-2 - 0.1796069318895E-2 0.17940304546559E-2 0.17919944715559E-2 0.17899613622996E-2 0.17879311196165E-2 - 0.17859037362608E-2 0.1783879205E-2 0.17818575186219E-2 0.17798386699345E-2 0.17778226517819E-2 - 0.17758094570266E-2 0.17737990785502E-2 0.17717915092525E-2 0.17697867420541E-2 0.17677847699047E-2 - 0.17657855857754E-2 0.17637891826577E-2 0.17617955535551E-2 0.17598046914932E-2 0.17578165895269E-2 - 0.17558312407334E-2 0.17538486382093E-2 0.17518687750753E-2 0.17498916444762E-2 0.17479172395707E-2 - 0.17459455535314E-2 0.17439765795495E-2 0.17420103108555E-2 0.17400467406978E-2 0.17380858623438E-2 - 0.17361276690768E-2 0.17341721542017E-2 0.17322193110472E-2 0.17302691329636E-2 0.17283216133227E-2 - 0.17263767455091E-2 0.17244345229293E-2 0.17224949390132E-2 0.17205579872137E-2 0.17186236610033E-2 - 0.17166919538734E-2 0.17147628593352E-2 0.17128363709212E-2 0.17109124821881E-2 0.17089911867155E-2 - 0.1707072478092E-2 0.17051563499243E-2 0.17032427958388E-2 0.17013318094963E-2 0.16994233845748E-2 - 0.16975175147677E-2 0.16956141937858E-2 0.16937134153589E-2 0.16918151732434E-2 0.16899194612149E-2 - 0.16880262730672E-2 0.16861356026069E-2 0.16842474436604E-2 0.16823617900803E-2 0.16804786357391E-2 - 0.16785979745276E-2 0.16767198003557E-2 0.16748441071527E-2 0.16729708888672E-2 0.16711001394665E-2 - 0.16692318529393E-2 0.16673660232879E-2 0.16655026445338E-2 0.16636417107195E-2 0.16617832159106E-2 - 0.16599271541902E-2 0.165807351966E-2 0.16562223064414E-2 0.16543735086753E-2 0.1652527120514E-2 - 0.1650683136129E-2 0.1648841549715E-2 0.16470023554876E-2 0.164516554768E-2 0.16433311205427E-2 - 0.16414990683454E-2 0.16396693853759E-2 0.1637842065941E-2 0.16360171043677E-2 0.16341944949958E-2 - 0.16323742321831E-2 0.16305563103057E-2 0.16287407237629E-2 0.16269274669696E-2 0.162511653436E-2 - 0.16233079203873E-2 0.1621501619523E-2 0.1619697626254E-2 0.16178959350867E-2 0.16160965405442E-2 - 0.16142994371648E-2 0.16125046195036E-2 0.16107120821387E-2 0.16089218196653E-2 0.16071338266959E-2 - 0.16053480978593E-2 0.16035646278011E-2 0.16017834111871E-2 0.16000044427022E-2 0.15982277170523E-2 - 0.1596453228946E-2 0.15946809731119E-2 0.15929109442988E-2 0.15911431372793E-2 0.15893775468411E-2 - 0.15876141677865E-2 0.15858529949349E-2 0.1584094023123E-2 0.15823372472065E-2 0.15805826620598E-2 - 0.15788302625684E-2 0.15770800436322E-2 0.15753320001656E-2 0.15735861271127E-2 0.15718424194304E-2 - 0.15701008720917E-2 0.15683614800847E-2 0.15666242384174E-2 0.15648891421073E-2 0.15631561861885E-2 - 0.15614253657118E-2 0.15596966757512E-2 0.15579701113949E-2 0.15562456677464E-2 0.1554523339925E-2 - 0.15528031230655E-2 0.15510850123226E-2 0.15493690028683E-2 0.15476550898866E-2 0.15459432685721E-2 - 0.15442335341341E-2 0.15425258818085E-2 0.15408203068454E-2 0.15391168045105E-2 0.15374153700825E-2 - 0.15357159988566E-2 0.1534018686145E-2 0.15323234272759E-2 0.15306302175949E-2 0.15289390524567E-2 - 0.15272499272331E-2 0.15255628373127E-2 0.15238777781012E-2 0.15221947450185E-2 0.15205137335011E-2 - 0.15188347390009E-2 0.15171577569847E-2 0.15154827829353E-2 0.15138098123527E-2 0.15121388407467E-2 - 0.15104698636403E-2 0.15088028765703E-2 0.15071378750996E-2 0.15054748548038E-2 0.15038138112708E-2 - 0.15021547401002E-2 0.15004976369055E-2 0.14988424973227E-2 0.14971893170025E-2 0.14955380916094E-2 - 0.14938888168131E-2 0.14922414882981E-2 0.1490596101772E-2 0.14889526529588E-2 0.14873111375962E-2 - 0.14856715514319E-2 0.14840338902305E-2 0.14823981497696E-2 0.14807643258398E-2 0.14791324142454E-2 - 0.1477502410809E-2 0.14758743113668E-2 0.1474248111769E-2 0.14726238078783E-2 0.14710013955703E-2 - 0.14693808707395E-2 0.14677622292961E-2 0.14661454671663E-2 0.14645305802756E-2 0.14629175645654E-2 - 0.14613064159971E-2 0.14596971305513E-2 0.14580897042223E-2 0.14564841330102E-2 0.14548804129294E-2 - 0.14532785400093E-2 0.14516785102988E-2 0.1450080319862E-2 0.14484839647665E-2 0.14468894410921E-2 - 0.14452967449316E-2 0.14437058724018E-2 0.14421168196298E-2 0.1440529582755E-2 0.14389441579293E-2 - 0.1437360541318E-2 0.14357787291007E-2 0.14341987174706E-2 0.14326205026334E-2 0.14310440808038E-2 - 0.14294694482091E-2 0.14278966010949E-2 0.14263255357198E-2 0.14247562483552E-2 0.14231887352831E-2 - 0.1421622992798E-2 0.14200590172098E-2 0.14184968048428E-2 0.14169363520375E-2 0.14153776551342E-2 - 0.14138207104874E-2 0.14122655144667E-2 0.14107120634629E-2 0.14091603538776E-2 0.14076103821203E-2 - 0.1406062144613E-2 0.14045156377915E-2 0.1402970858101E-2 0.14014278019991E-2 0.13998864659582E-2 - 0.13983468464646E-2 0.13968089400165E-2 0.13952727431223E-2 0.13937382523027E-2 0.13922054640911E-2 - 0.13906743750345E-2 0.13891449816929E-2 0.13876172806343E-2 0.1386091268439E-2 0.13845669416995E-2 - 0.13830442970214E-2 0.13815233310212E-2 0.13800040403287E-2 0.1378486421587E-2 0.13769704714507E-2 - 0.13754561865838E-2 0.13739435636632E-2 0.13724325993768E-2 0.13709232904234E-2 0.13694156335136E-2 - 0.13679096253695E-2 0.13664052627242E-2 0.13649025423219E-2 0.13634014609234E-2 0.13619020152997E-2 - 0.13604042022317E-2 0.13589080185109E-2 0.13574134609429E-2 0.13559205263375E-2 0.13544292115166E-2 - 0.13529395133151E-2 0.13514514285829E-2 0.13499649541781E-2 0.13484800869727E-2 0.13469968238496E-2 - 0.13455151617032E-2 0.13440350974344E-2 0.13425566279574E-2 0.13410797501958E-2 0.13396044610835E-2 - 0.13381307575636E-2 0.13366586365964E-2 0.1335188095151E-2 0.13337191302072E-2 0.13322517387558E-2 - 0.13307859178006E-2 0.1329321664349E-2 0.13278589754192E-2 0.13263978480401E-2 0.13249382792576E-2 - 0.13234802661265E-2 0.13220238057102E-2 0.13205688950817E-2 0.13191155313227E-2 0.13176637115326E-2 - 0.13162134328213E-2 0.13147646923049E-2 0.13133174871022E-2 0.13118718143416E-2 0.1310427671173E-2 - 0.13089850547559E-2 0.13075439622588E-2 0.13061043908561E-2 0.13046663377326E-2 0.13032298000861E-2 - 0.13017947751253E-2 0.13003612600711E-2 0.12989292521443E-2 0.12974987485777E-2 0.12960697466162E-2 - 0.12946422435184E-2 0.12932162365521E-2 0.12917917229917E-2 0.12903687001217E-2 0.12889471652363E-2 - 0.12875271156424E-2 0.12861085486582E-2 0.12846914616052E-2 0.12832758518125E-2 0.12818617166175E-2 - 0.12804490533774E-2 0.12790378594567E-2 0.12776281322271E-2 0.12762198690676E-2 0.12748130673658E-2 - 0.1273407724524E-2 0.12720038379547E-2 0.12706014050788E-2 0.12692004233161E-2 0.12678008900958E-2 - 0.12664028028661E-2 0.12650061590864E-2 0.12636109562257E-2 0.12622171917534E-2 0.12608248631517E-2 - 0.12594339679107E-2 0.12580445035281E-2 0.12566564675088E-2 0.12552698573739E-2 0.1253884670653E-2 - 0.12525009048833E-2 0.12511185576071E-2 0.12497376263755E-2 0.1248358108752E-2 0.12469800023099E-2 - 0.12456033046329E-2 0.12442280133036E-2 0.12428541259151E-2 0.12414816400722E-2 0.12401105533917E-2 - 0.12387408634993E-2 0.12373725680238E-2 0.12360056646029E-2 0.12346401508835E-2 0.12332760245265E-2 - 0.12319132832019E-2 0.12305519245807E-2 0.12291919463414E-2 0.12278333461711E-2 0.12264761217695E-2 - 0.12251202708429E-2 0.12237657911064E-2 0.12224126802838E-2 0.1221060936106E-2 0.12197105563145E-2 - 0.12183615386597E-2 0.12170138808988E-2 0.12156675807895E-2 0.12143226360971E-2 0.12129790446039E-2 - 0.12116368041013E-2 0.12102959123882E-2 0.1208956367266E-2 0.12076181665458E-2 0.1206281308047E-2 - 0.12049457895973E-2 0.12036116090332E-2 0.12022787641956E-2 0.12009472529337E-2 0.11996170731049E-2 - 0.11982882225758E-2 0.11969606992192E-2 0.1195634500918E-2 0.11943096255633E-2 0.11929860710552E-2 - 0.11916638352917E-2 0.11903429161789E-2 0.1189023311636E-2 0.11877050195945E-2 0.11863880379937E-2 - 0.1185072364771E-2 0.11837579978725E-2 0.11824449352533E-2 0.11811331748794E-2 0.11798227147242E-2 - 0.11785135527644E-2 0.11772056869831E-2 0.11758991153712E-2 0.11745938359302E-2 0.1173289846668E-2 - 0.11719871455987E-2 0.11706857307434E-2 0.11693856001301E-2 0.11680867517945E-2 0.11667891837796E-2 - 0.1165492894135E-2 0.11641978809158E-2 0.11629041421845E-2 0.11616116760115E-2 0.11603204804741E-2 - 0.1159030553656E-2 0.11577418936496E-2 0.11564544985535E-2 0.1155168366473E-2 0.11538834955197E-2 - 0.11525998838145E-2 0.11513175294784E-2 0.11500364306397E-2 0.11487565854351E-2 0.11474779920136E-2 - 0.11462006485302E-2 0.11449245531419E-2 0.11436497040125E-2 0.11423760993126E-2 0.11411037372214E-2 - 0.11398326159254E-2 0.11385627336145E-2 0.11372940884833E-2 0.11360266787306E-2 0.1134760502572E-2 - 0.11334955582274E-2 0.11322318439211E-2 0.11309693578798E-2 0.11297080983384E-2 0.11284480635379E-2 - 0.11271892517253E-2 0.11259316611532E-2 0.11246752900857E-2 0.11234201367928E-2 0.1122166199547E-2 - 0.11209134766253E-2 0.11196619663101E-2 0.11184116668957E-2 0.11171625766817E-2 0.11159146939722E-2 - 0.11146680170744E-2 0.11134225443011E-2 0.11121782739754E-2 0.11109352044261E-2 0.11096933339876E-2 - 0.11084526609987E-2 0.1107213183804E-2 0.11059749007552E-2 0.11047378102097E-2 0.11035019105322E-2 - 0.11022672000886E-2 0.1101033677251E-2 0.1099801340399E-2 0.10985701879212E-2 0.10973402182119E-2 - 0.1096111429667E-2 0.10948838206879E-2 0.10936573896821E-2 0.1092432135066E-2 0.10912080552622E-2 - 0.10899851486946E-2 0.10887634137914E-2 0.10875428489853E-2 0.10863234527213E-2 0.10851052234476E-2 - 0.1083888159618E-2 0.10826722596914E-2 0.10814575221326E-2 0.10802439454107E-2 0.10790315280001E-2 - 0.10778202683812E-2 0.10766101650376E-2 0.10754012164584E-2 0.10741934211394E-2 0.10729867775816E-2 - 0.10717812842899E-2 0.1070576939779E-2 0.10693737425692E-2 0.10681716911828E-2 0.10669707841436E-2 - 0.10657710199806E-2 0.10645723972323E-2 0.10633749144414E-2 0.10621785701553E-2 0.10609833629284E-2 - 0.10597892913193E-2 0.10585963538916E-2 0.10574045492139E-2 0.10562138758621E-2 0.10550243324083E-2 - 0.10538359174302E-2 0.10526486295147E-2 0.10514624672588E-2 0.10502774292634E-2 0.10490935141293E-2 - 0.10479107204632E-2 0.10467290468767E-2 0.1045548491987E-2 0.10443690544163E-2 0.10431907327902E-2 - 0.10420135257382E-2 0.10408374318945E-2 0.10396624499006E-2 0.1038488578402E-2 0.10373158160482E-2 - 0.10361441614936E-2 0.10349736133961E-2 0.10338041704206E-2 0.10326358312367E-2 0.1031468594518E-2 - 0.10303024589396E-2 0.10291374231814E-2 0.10279734859306E-2 0.10268106458791E-2 0.10256489017234E-2 - 0.10244882521622E-2 0.10233286958988E-2 0.1022170231642E-2 0.10210128581056E-2 0.10198565740088E-2 - 0.1018701378071E-2 0.1017547269016E-2 0.10163942455728E-2 0.10152423064793E-2 0.10140914504768E-2 - 0.10129416763084E-2 0.10117929827212E-2 0.10106453684672E-2 0.10094988323012E-2 0.10083533729825E-2 - 0.10072089892747E-2 0.10060656799457E-2 0.10049234437654E-2 0.10037822795138E-2 0.1002642185974E-2 - 0.10015031619327E-2 0.10003652061774E-2 0.99922831750221E-3 0.99809249470252E-3 0.99695773657736E-3 - 0.9958240419292E-3 0.99469140956999E-3 0.99355983831428E-3 0.99242932697934E-3 0.99129987438552E-3 - 0.9901714793568E-3 0.98904414072337E-3 0.98791785731947E-3 0.98679262798227E-3 0.98566845154933E-3 - 0.98454532686194E-3 0.98342325276926E-3 0.98230222812443E-3 0.98118225178403E-3 0.98006332260686E-3 - 0.97894543945555E-3 0.97782860119746E-3 0.97671280670431E-3 0.97559805485315E-3 0.97448434451865E-3 - 0.97337167458004E-3 0.97226004392136E-3 0.97114945143354E-3 0.9700398960103E-3 0.96893137654754E-3 - 0.96782389194462E-3 0.96671744110438E-3 0.96561202293524E-3 0.9645076363499E-3 0.96340428026136E-3 - 0.9623019535843E-3 0.96120065523579E-3 0.96010038414312E-3 0.95900113923502E-3 0.95790291944354E-3 - 0.95680572370408E-3 0.95570955095727E-3 0.95461440014232E-3 0.95352027020152E-3 0.95242716008057E-3 - 0.95133506873389E-3 0.95024399511782E-3 0.94915393819082E-3 0.94806489691412E-3 0.94697687025202E-3 - 0.94588985717387E-3 0.94480385665253E-3 0.94371886766292E-3 0.94263488918043E-3 0.94155192018339E-3 - 0.9404699596574E-3 0.93938900659106E-3 0.9383090599758E-3 0.93723011880524E-3 0.93615218207618E-3 - 0.9350752487893E-3 0.9339993179488E-3 0.93292438856366E-3 0.93185045963986E-3 0.93077753018748E-3 - 0.92970559922115E-3 0.92863466576158E-3 0.92756472883173E-3 0.92649578745594E-3 0.92542784066154E-3 - 0.92436088747891E-3 0.92329492694313E-3 0.92222995809312E-3 0.9211659799672E-3 0.92010299160525E-3 - 0.91904099204897E-3 0.91797998034979E-3 0.91691995556022E-3 0.91586091673492E-3 0.9148028629301E-3 - 0.91374579320467E-3 0.91268970662253E-3 0.91163460225073E-3 0.91058047915901E-3 0.90952733641516E-3 - 0.90847517308998E-3 0.90742398826071E-3 0.90637378100847E-3 0.90532455041635E-3 0.90427629556965E-3 - 0.90322901555745E-3 0.90218270947004E-3 0.90113737639805E-3 0.90009301543433E-3 0.89904962567881E-3 - 0.89800720623354E-3 0.89696575620289E-3 0.89592527469283E-3 0.8948857608119E-3 0.89384721367229E-3 - 0.89280963238917E-3 0.89177301608165E-3 0.89073736386548E-3 0.88970267485982E-3 0.88866894818843E-3 - 0.8876361829804E-3 0.88660437836683E-3 0.88557353347895E-3 0.88454364745065E-3 0.88351471941854E-3 - 0.88248674852331E-3 0.88145973390893E-3 0.8804336747183E-3 0.87940857009583E-3 0.87838441918775E-3 - 0.87736122114818E-3 0.87633897513219E-3 0.87531768029648E-3 0.87429733579929E-3 0.87327794080104E-3 - 0.87225949446622E-3 0.87124199596194E-3 0.87022544445724E-3 0.86920983911965E-3 0.86819517911902E-3 - 0.8671814636312E-3 0.86616869183499E-3 0.86515686291118E-3 0.86414597604092E-3 0.86313603040795E-3 - 0.86212702519885E-3 0.86111895960316E-3 0.86011183281396E-3 0.85910564402163E-3 0.85810039241923E-3 - 0.85709607720264E-3 0.85609269757319E-3 0.85509025273283E-3 0.85408874188623E-3 0.85308816424053E-3 - 0.85208851900614E-3 0.85108980538974E-3 0.85009202260089E-3 0.84909516985369E-3 0.84809924636666E-3 - 0.84710425135993E-3 0.84611018405342E-3 0.84511704366936E-3 0.84412482943235E-3 0.84313354057032E-3 - 0.84214317631416E-3 0.84115373589305E-3 0.84016521853763E-3 0.83917762348024E-3 0.83819094995985E-3 - 0.83720519721599E-3 0.83622036448968E-3 0.83523645102348E-3 0.83425345606181E-3 0.8332713788519E-3 - 0.83229021864328E-3 0.83130997468677E-3 0.83033064623226E-3 0.82935223253131E-3 0.82837473284094E-3 - 0.82739814642034E-3 0.8264224725303E-3 0.82544771043171E-3 0.82447385938748E-3 0.82350091866327E-3 - 0.82252888752723E-3 0.82155776525073E-3 0.82058755110163E-3 0.81961824435034E-3 0.81864984426996E-3 - 0.81768235013839E-3 0.81671576123439E-3 0.81575007683752E-3 0.81478529622903E-3 0.81382141869184E-3 - 0.81285844351168E-3 0.81189636997669E-3 0.81093519737441E-3 0.80997492499254E-3 0.80901555211954E-3 - 0.80805707805113E-3 0.80709950208313E-3 0.80614282351287E-3 0.80518704163911E-3 0.80423215576379E-3 - 0.80327816518669E-3 0.80232506920893E-3 0.80137286713318E-3 0.80042155826811E-3 0.79947114192279E-3 - 0.79852161740702E-3 0.79757298403171E-3 0.79662524110904E-3 0.79567838795454E-3 0.79473242388549E-3 - 0.7937873482197E-3 0.79284316027381E-3 0.79189985936581E-3 0.79095744481887E-3 0.7900159159576E-3 - 0.78907527210778E-3 0.78813551259576E-3 0.78719663674941E-3 0.78625864389878E-3 0.78532153337572E-3 - 0.78438530451514E-3 0.78344995664793E-3 0.7825154891073E-3 0.78158190122922E-3 0.78064919235399E-3 - 0.77971736182268E-3 0.7787864089761E-3 0.77785633315645E-3 0.77692713370738E-3 0.77599880997538E-3 - 0.77507136130898E-3 0.77414478705539E-3 0.77321908656207E-3 0.77229425917689E-3 0.77137030425449E-3 - 0.7704472211494E-3 0.76952500921698E-3 0.76860366781306E-3 0.7676831962946E-3 0.76676359402144E-3 - 0.76584486035506E-3 0.76492699465824E-3 0.76400999629092E-3 0.76309386461462E-3 0.76217859899507E-3 - 0.76126419880038E-3 0.76035066339917E-3 0.75943799216109E-3 0.75852618445795E-3 0.75761523966147E-3 - 0.75670515714232E-3 0.75579593627201E-3 0.75488757642732E-3 0.75398007698586E-3 0.75307343732618E-3 - 0.75216765682666E-3 0.75126273486688E-3 0.75035867082882E-3 0.74945546409613E-3 0.74855311405485E-3 - 0.74765162008684E-3 0.74675098157606E-3 0.74585119790909E-3 0.7449522684758E-3 0.74405419266658E-3 - 0.74315696987185E-3 0.74226059948316E-3 0.74136508089331E-3 0.74047041349729E-3 0.73957659669211E-3 - 0.73868362987271E-3 0.73779151243425E-3 0.73690024377223E-3 0.73600982328856E-3 0.7351202503848E-3 - 0.73423152446291E-3 0.73334364492493E-3 0.73245661117372E-3 0.73157042261522E-3 0.73068507865667E-3 - 0.72980057870599E-3 0.72891692216816E-3 0.72803410844929E-3 0.72715213695997E-3 0.72627100711255E-3 - 0.72539071832013E-3 0.72451126999488E-3 0.72363266155032E-3 0.72275489240152E-3 0.7218779619652E-3 - 0.72100186966018E-3 0.72012661490211E-3 0.71925219710794E-3 0.71837861569604E-3 0.71750587008881E-3 - 0.71663395970837E-3 0.71576288397781E-3 0.71489264232134E-3 0.71402323416517E-3 0.71315465893177E-3 - 0.71228691604512E-3 0.71142000493203E-3 0.71055392502226E-3 0.70968867574604E-3 0.70882425653268E-3 - 0.70796066681252E-3 0.70709790601704E-3 0.70623597357976E-3 0.70537486893592E-3 0.70451459151831E-3 - 0.70365514075998E-3 0.70279651609443E-3 0.70193871696064E-3 0.70108174279715E-3 0.70022559304265E-3 - 0.69937026713606E-3 0.6985157645169E-3 0.69766208462709E-3 0.69680922690963E-3 0.69595719080774E-3 - 0.69510597576224E-3 0.6942555812147E-3 0.69340600661094E-3 0.69255725139794E-3 0.69170931502316E-3 - 0.69086219693339E-3 0.69001589657639E-3 0.6891704134013E-3 0.6883257468585E-3 0.68748189640041E-3 - 0.68663886147565E-3 0.68579664153409E-3 0.68495523602715E-3 0.68411464441008E-3 0.68327486613816E-3 - 0.68243590066593E-3 0.68159774744849E-3 0.68076040594159E-3 0.67992387560338E-3 0.67908815589322E-3 - 0.6782532462689E-3 0.67741914618734E-3 0.67658585510528E-3 0.67575337248544E-3 0.67492169778989E-3 - 0.67409083048112E-3 0.67326077002187E-3 0.67243151587688E-3 0.67160306750851E-3 0.6707754243795E-3 - 0.66994858595311E-3 0.66912255169695E-3 0.66829732107814E-3 0.66747289356405E-3 0.66664926862238E-3 - 0.66582644572144E-3 0.66500442433079E-3 0.66418320392097E-3 0.66336278396246E-3 0.66254316392431E-3 - 0.66172434327602E-3 0.66090632149045E-3 0.66008909804106E-3 0.65927267240167E-3 0.65845704404604E-3 - 0.6576422124485E-3 0.65682817708453E-3 0.65601493743057E-3 0.65520249296501E-3 0.65439084316198E-3 - 0.65357998749701E-3 0.65276992544714E-3 0.65196065649248E-3 0.65115218011292E-3 0.65034449578807E-3 - 0.64953760299808E-3 0.64873150122365E-3 0.64792618994677E-3 0.64712166865066E-3 0.64631793681692E-3 - 0.64551499392669E-3 0.64471283946076E-3 0.64391147290515E-3 0.64311089374509E-3 0.64231110146601E-3 - 0.64151209555311E-3 0.64071387549208E-3 0.63991644077007E-3 0.63911979087497E-3 0.63832392529532E-3 - 0.63752884351685E-3 0.63673454502593E-3 0.63594102931184E-3 0.63514829586537E-3 0.63435634417719E-3 - 0.633565173738E-3 0.63277478403985E-3 0.63198517457415E-3 0.63119634483066E-3 0.63040829429918E-3 - 0.6296210224738E-3 0.62883452884883E-3 0.62804881291874E-3 0.62726387417714E-3 0.62647971211827E-3 - 0.62569632623753E-3 0.62491371603103E-3 0.62413188099647E-3 0.6233508206276E-3 0.62257053441928E-3 - 0.62179102186806E-3 0.62101228247312E-3 0.62023431573349E-3 0.61945712114732E-3 0.61868069821323E-3 - 0.61790504643034E-3 0.61713016529898E-3 0.61635605432049E-3 0.61558271299442E-3 0.61481014081997E-3 - 0.61403833729606E-3 0.61326730192631E-3 0.61249703421365E-3 0.61172753366077E-3 0.61095879976989E-3 - 0.6101908320433E-3 0.60942362998562E-3 0.60865719310203E-3 0.60789152089788E-3 0.60712661287516E-3 - 0.60636246853639E-3 0.60559908738747E-3 0.60483646893543E-3 0.60407461268745E-3 0.60331351814923E-3 - 0.60255318482715E-3 0.60179361222844E-3 0.6010347998614E-3 0.60027674723557E-3 0.59951945385724E-3 - 0.59876291923325E-3 0.59800714287116E-3 0.59725212428148E-3 0.59649786297388E-3 0.59574435845867E-3 - 0.59499161024672E-3 0.59423961785019E-3 0.59348838077723E-3 0.59273789853697E-3 0.59198817064035E-3 - 0.59123919660038E-3 0.59049097592985E-3 0.58974350814081E-3 0.58899679274569E-3 0.58825082925736E-3 - 0.58750561718952E-3 0.58676115605692E-3 0.58601744537223E-3 0.58527448464787E-3 0.5845322733962E-3 - 0.58379081113344E-3 0.58305009737506E-3 0.58231013163643E-3 0.58157091343281E-3 0.58083244227967E-3 - 0.58009471769353E-3 0.57935773919144E-3 0.57862150629038E-3 0.57788601850509E-3 0.57715127535054E-3 - 0.57641727634459E-3 0.5756840210057E-3 0.57495150885235E-3 0.57421973940225E-3 0.57348871217352E-3 - 0.57275842668489E-3 0.57202888245561E-3 0.57130007900619E-3 0.57057201585401E-3 0.56984469251701E-3 - 0.56911810851393E-3 0.56839226336602E-3 0.56766715659407E-3 0.56694278771847E-3 0.56621915625973E-3 - 0.56549626173854E-3 0.56477410367667E-3 0.56405268159663E-3 0.56333199501951E-3 0.56261204346524E-3 - 0.56189282645336E-3 0.56117434350767E-3 0.56045659415115E-3 0.55973957790685E-3 0.55902329429799E-3 - 0.55830774284916E-3 0.55759292308219E-3 0.55687883451873E-3 0.55616547668047E-3 0.55545284909303E-3 - 0.55474095128125E-3 0.55402978276942E-3 0.55331934308146E-3 0.55260963174121E-3 0.55190064827442E-3 - 0.55119239220711E-3 0.5504848630648E-3 0.54977806037066E-3 0.54907198364787E-3 0.548366632423E-3 - 0.54766200622294E-3 0.5469581045745E-3 0.54625492700331E-3 0.54555247303542E-3 0.54485074219756E-3 - 0.54414973401688E-3 0.54344944802162E-3 0.54274988373697E-3 0.54205104068879E-3 0.5413529184037E-3 - 0.5406555164102E-3 0.53995883423637E-3 0.53926287141002E-3 0.53856762745914E-3 0.53787310191183E-3 - 0.53717929429679E-3 0.53648620414339E-3 0.53579383097963E-3 0.53510217433283E-3 0.53441123372974E-3 - 0.53372100870095E-3 0.53303149877629E-3 0.53234270348539E-3 0.5316546223572E-3 0.53096725492077E-3 - 0.53028060070634E-3 0.52959465924455E-3 0.5289094300663E-3 0.52822491269968E-3 0.52754110667314E-3 - 0.52685801151716E-3 0.5261756267632E-3 0.52549395194231E-3 0.52481298658555E-3 0.5241327302247E-3 - 0.52345318239084E-3 0.52277434261324E-3 0.522096210421E-3 0.52141878534643E-3 0.52074206692183E-3 - 0.52006605467936E-3 0.51939074815029E-3 0.51871614686611E-3 0.518042250359E-3 0.51736905816152E-3 - 0.51669656980732E-3 0.51602478482648E-3 0.5153537027498E-3 0.51468332310912E-3 0.51401364543816E-3 - 0.51334466927035E-3 0.5126763941381E-3 0.51200881957391E-3 0.51134194511041E-3 0.51067577028161E-3 - 0.51001029462212E-3 0.50934551766433E-3 0.50868143893984E-3 0.50801805797973E-3 0.50735537431937E-3 - 0.50669338749306E-3 0.50603209703481E-3 0.50537150247792E-3 0.50471160335576E-3 0.50405239920289E-3 - 0.50339388955423E-3 0.50273607394465E-3 0.50207895190633E-3 0.5014225229716E-3 0.50076678667521E-3 - 0.5001117425527E-3 0.49945739013961E-3 0.49880372896979E-3 0.4981507585775E-3 0.49749847849753E-3 - 0.49684688826539E-3 0.49619598741737E-3 0.49554577548696E-3 0.49489625200786E-3 0.49424741651409E-3 - 0.49359926854198E-3 0.49295180762692E-3 0.49230503330474E-3 0.49165894511151E-3 0.49101354258441E-3 - 0.49036882525653E-3 0.48972479266175E-3 0.48908144433524E-3 0.48843877981395E-3 0.48779679863436E-3 - 0.48715550033214E-3 0.4865148844431E-3 0.4858749505032E-3 0.48523569804887E-3 0.48459712661716E-3 - 0.48395923574341E-3 0.48332202496258E-3 0.48268549380933E-3 0.48204964182124E-3 0.48141446853519E-3 - 0.4807799734877E-3 0.48014615621494E-3 0.47951301625276E-3 0.47888055313868E-3 0.47824876641045E-3 - 0.47761765560549E-3 0.47698722025824E-3 0.4763574599032E-3 0.47572837407785E-3 0.47509996232029E-3 - 0.47447222416857E-3 0.47384515915865E-3 0.47321876682685E-3 0.47259304671034E-3 0.47196799834722E-3 - 0.47134362127659E-3 0.47071991503353E-3 0.47009687915354E-3 0.46947451317272E-3 0.46885281662991E-3 - 0.46823178906329E-3 0.46761143001012E-3 0.46699173900752E-3 0.46637271559262E-3 0.46575435930364E-3 - 0.46513666967918E-3 0.46451964625649E-3 0.46390328857169E-3 0.46328759616054E-3 0.46267256856183E-3 - 0.46205820531366E-3 0.46144450595405E-3 0.46083147002121E-3 0.46021909705419E-3 0.45960738658987E-3 - 0.45899633816487E-3 0.45838595131575E-3 0.45777622558151E-3 0.45716716050037E-3 0.45655875561049E-3 - 0.45595101044997E-3 0.45534392455691E-3 0.4547374974697E-3 0.45413172872703E-3 0.45352661786729E-3 - 0.45292216442715E-3 0.45231836794322E-3 0.45171522795426E-3 0.4511127439992E-3 0.45051091561679E-3 - 0.44990974234508E-3 0.44930922372226E-3 0.44870935928686E-3 0.44811014857769E-3 0.44751159113449E-3 - 0.4469136864939E-3 0.44631643419293E-3 0.44571983376928E-3 0.44512388476279E-3 0.44452858671292E-3 - 0.44393393915787E-3 0.44333994163574E-3 0.44274659368462E-3 0.4421538948441E-3 0.4415618446541E-3 - 0.44097044265274E-3 0.44037968837706E-3 0.43978958136347E-3 0.43920012115234E-3 0.43861130728314E-3 - 0.43802313929503E-3 0.43743561672628E-3 0.43684873911528E-3 0.43626250600124E-3 0.4356769169236E-3 - 0.43509197142201E-3 0.4345076690336E-3 0.4339240092957E-3 0.43334099174728E-3 0.43275861592818E-3 - 0.43217688137783E-3 0.43159578763527E-3 0.43101533424008E-3 0.43043552073129E-3 0.42985634664651E-3 - 0.42927781152314E-3 0.42869991490087E-3 0.42812265631938E-3 0.4275460353182E-3 0.42697005143602E-3 - 0.42639470421165E-3 0.42581999318439E-3 0.42524591789378E-3 0.42467247788027E-3 0.42409967268118E-3 - 0.42352750183443E-3 0.42295596487859E-3 0.42238506135379E-3 0.42181479079975E-3 0.42124515275552E-3 - 0.42067614676012E-3 0.4201077723526E-3 0.41954002907291E-3 0.4189729164615E-3 0.41840643405695E-3 - 0.41784058139706E-3 0.41727535801903E-3 0.41671076346375E-3 0.41614679727112E-3 0.41558345898071E-3 - 0.41502074813146E-3 0.41445866426226E-3 0.41389720691302E-3 0.41333637562385E-3 0.41277616993488E-3 - 0.41221658938379E-3 0.41165763350846E-3 0.41109930184849E-3 0.41054159394406E-3 0.40998450933528E-3 - 0.40942804756111E-3 0.40887220816067E-3 0.40831699067348E-3 0.40776239463976E-3 0.40720841960046E-3 - 0.40665506509362E-3 0.40610233065737E-3 0.40555021583008E-3 0.40499872015256E-3 0.40444784316475E-3 - 0.40389758440652E-3 0.40334794341783E-3 0.40279891973955E-3 0.40225051290933E-3 0.40170272246525E-3 - 0.40115554794654E-3 0.40060898889402E-3 0.4000630448482E-3 0.39951771534839E-3 0.39897299993396E-3 - 0.39842889814444E-3 0.39788540952012E-3 0.39734253360178E-3 0.39680026992836E-3 0.39625861803827E-3 - 0.39571757746947E-3 0.39517714776326E-3 0.3946373284601E-3 0.39409811909997E-3 0.39355951922217E-3 - 0.39302152836588E-3 0.39248414607164E-3 0.39194737188011E-3 0.39141120533175E-3 0.39087564596477E-3 - 0.39034069331758E-3 0.3898063469303E-3 0.38927260634342E-3 0.38873947109718E-3 0.38820694073142E-3 - 0.38767501478598E-3 0.38714369280098E-3 0.38661297431686E-3 0.3860828588749E-3 0.38555334601354E-3 - 0.38502443527141E-3 0.38449612618752E-3 0.38396841830316E-3 0.38344131115897E-3 0.382914804295E-3 - 0.38238889725123E-3 0.38186358956757E-3 0.38133888078465E-3 0.38081477044344E-3 0.38029125808382E-3 - 0.37976834324454E-3 0.3792460254638E-3 0.37872430428314E-3 0.37820317924349E-3 0.37768264988555E-3 - 0.37716271574958E-3 0.37664337637669E-3 0.37612463130633E-3 0.37560648007767E-3 0.37508892222965E-3 - 0.37457195730376E-3 0.37405558484074E-3 0.37353980438112E-3 0.37302461546521E-3 0.37251001763326E-3 - 0.37199601042615E-3 0.37148259338502E-3 0.37096976605065E-3 0.37045752796197E-3 0.36994587865782E-3 - 0.36943481767929E-3 0.36892434456766E-3 0.36841445886411E-3 0.36790516010883E-3 0.36739644784216E-3 - 0.36688832160491E-3 0.36638078093829E-3 0.36587382538441E-3 0.36536745448222E-3 0.36486166777104E-3 - 0.3643564647908E-3 0.36385184508353E-3 0.36334780819083E-3 0.36284435365328E-3 0.36234148101137E-3 - 0.3618391898056E-3 0.36133747957765E-3 0.36083634986955E-3 0.36033580022184E-3 0.35983583017401E-3 - 0.35933643926502E-3 0.35883762703736E-3 0.35833939303275E-3 0.35784173679268E-3 0.35734465785777E-3 - 0.35684815576874E-3 0.35635223006717E-3 0.35585688029491E-3 0.3553621059941E-3 0.35486790670413E-3 - 0.35437428196467E-3 0.35388123131702E-3 0.35338875430354E-3 0.35289685046611E-3 0.35240551934628E-3 - 0.35191476048615E-3 0.35142457342734E-3 0.35093495770982E-3 0.35044591287341E-3 0.34995743846036E-3 - 0.34946953401292E-3 0.3489821990732E-3 0.34849543318254E-3 0.34800923588246E-3 0.3475236067148E-3 - 0.34703854522154E-3 0.34655405094533E-3 0.34607012342668E-3 0.34558676220642E-3 0.34510396682597E-3 - 0.34462173682806E-3 0.34414007175521E-3 0.34365897114919E-3 0.34317843455174E-3 0.34269846150472E-3 - 0.34221905155071E-3 0.34174020423264E-3 0.34126191909225E-3 0.34078419567082E-3 0.34030703350933E-3 - 0.33983043215084E-3 0.33935439113777E-3 0.3388789100126E-3 0.33840398831779E-3 0.33792962559574E-3 - 0.33745582138922E-3 0.33698257524125E-3 0.33650988669497E-3 0.33603775529151E-3 0.33556618057224E-3 - 0.33509516207989E-3 0.33462469935776E-3 0.3341547919489E-3 0.33368543939614E-3 0.33321664124244E-3 - 0.33274839703081E-3 0.33228070630435E-3 0.33181356860677E-3 0.33134698348021E-3 0.33088095046683E-3 - 0.33041546910896E-3 0.32995053895054E-3 0.32948615953491E-3 0.3290223304056E-3 0.3285590511063E-3 - 0.3280963211814E-3 0.32763414017291E-3 0.32717250762331E-3 0.32671142307574E-3 0.32625088607446E-3 - 0.32579089616344E-3 0.32533145288625E-3 0.32487255578647E-3 0.32441420440779E-3 0.32395639829443E-3 - 0.32349913699099E-3 0.32304242004088E-3 0.3225862469872E-3 0.32213061737287E-3 0.32167553074269E-3 - 0.32122098664095E-3 0.32076698461191E-3 0.32031352419986E-3 0.31986060494902E-3 0.31940822640406E-3 - 0.31895638810987E-3 0.31850508961131E-3 0.31805433045168E-3 0.31760411017439E-3 0.31715442832427E-3 - 0.31670528444654E-3 0.31625667808636E-3 0.3158086087883E-3 0.31536107609706E-3 0.31491407955769E-3 - 0.31446761871565E-3 0.31402169311715E-3 0.31357630230603E-3 0.3131314458263E-3 0.31268712322232E-3 - 0.31224333404049E-3 0.31180007782672E-3 0.31135735412641E-3 0.31091516248486E-3 0.31047350244739E-3 - 0.31003237356014E-3 0.30959177536955E-3 0.30915170742124E-3 0.3087121692598E-3 0.3082731604296E-3 - 0.30783468047751E-3 0.30739672895005E-3 0.30695930539362E-3 0.30652240935458E-3 0.30608604038009E-3 - 0.30565019801568E-3 0.30521488180661E-3 0.30478009129786E-3 0.30434582603731E-3 0.30391208557224E-3 - 0.30347886944943E-3 0.30304617721519E-3 0.30261400841582E-3 0.30218236259871E-3 0.30175123931137E-3 - 0.30132063810113E-3 0.30089055851391E-3 0.30046100009564E-3 0.30003196239397E-3 0.29960344495689E-3 - 0.29917544733239E-3 0.29874796906743E-3 0.29832100970923E-3 0.29789456880553E-3 0.29746864590455E-3 - 0.2970432405553E-3 0.29661835230422E-3 0.29619398069809E-3 0.29577012528421E-3 0.29534678561179E-3 - 0.29492396122964E-3 0.29450165168593E-3 0.29407985652883E-3 0.2936585753066E-3 0.29323780756816E-3 - 0.29281755286279E-3 0.29239781073888E-3 0.29197858074416E-3 0.29155986242596E-3 0.2911416553343E-3 - 0.29072395901878E-3 0.29030677302884E-3 0.28989009691325E-3 0.28947393022088E-3 0.28905827250146E-3 - 0.28864312330499E-3 0.28822848218178E-3 0.2878143486801E-3 0.28740072234855E-3 0.28698760273688E-3 - 0.28657498939572E-3 0.28616288187534E-3 0.28575127972609E-3 0.28534018249874E-3 0.28492958974377E-3 - 0.28451950101031E-3 0.28410991584747E-3 0.28370083380633E-3 0.28329225443814E-3 0.2828841772941E-3 - 0.2824766019247E-3 0.28206952788066E-3 0.28166295471311E-3 0.2812568819735E-3 0.28085130921384E-3 - 0.2804462359844E-3 0.28004166183585E-3 0.2796375863193E-3 0.27923400898691E-3 0.27883092939058E-3 - 0.27842834708216E-3 0.2780262616136E-3 0.2776246725369E-3 0.27722357940448E-3 0.2768229817692E-3 - 0.27642287918299E-3 0.27602327119732E-3 0.27562415736323E-3 0.27522553723452E-3 0.27482741036448E-3 - 0.27442977630621E-3 0.27403263461218E-3 0.27363598483488E-3 0.27323982652801E-3 0.27284415924551E-3 - 0.27244898254151E-3 0.272054295968E-3 0.27166009907729E-3 0.27126639142316E-3 0.27087317256015E-3 - 0.2704804420429E-3 0.27008819942487E-3 0.26969644425984E-3 0.26930517610204E-3 0.26891439450648E-3 - 0.2685240990288E-3 0.26813428922251E-3 0.2677449646412E-3 0.26735612483867E-3 0.26696776937086E-3 - 0.26657989779316E-3 0.26619250966091E-3 0.26580560452958E-3 0.26541918195542E-3 0.2650332414927E-3 - 0.26464778269619E-3 0.26426280512128E-3 0.26387830832441E-3 0.2634942918619E-3 0.26311075528969E-3 - 0.26272769816379E-3 0.26234512004037E-3 0.26196302047643E-3 0.26158139902943E-3 0.26120025525554E-3 - 0.26081958871054E-3 0.26043939894996E-3 0.26005968553198E-3 0.25968044801426E-3 0.25930168595425E-3 - 0.258923398909E-3 0.25854558643568E-3 0.25816824809232E-3 0.25779138343716E-3 0.25741499202852E-3 - 0.25703907342344E-3 0.25666362717918E-3 0.25628865285416E-3 0.25591415000728E-3 0.2555401181975E-3 - 0.25516655698313E-3 0.2547934659226E-3 0.25442084457493E-3 0.25404869249981E-3 0.25367700925775E-3 - 0.25330579440676E-3 0.2529350475051E-3 0.2525647681114E-3 0.25219495578659E-3 0.25182561009127E-3 - 0.25145673058526E-3 0.2510883168283E-3 0.25072036838015E-3 0.25035288480182E-3 0.24998586565443E-3 - 0.2496193104986E-3 0.2492532188943E-3 0.24888759040154E-3 0.24852242458181E-3 0.24815772099649E-3 - 0.24779347920701E-3 0.24742969877513E-3 0.24706637926316E-3 0.24670352023235E-3 0.24634112124381E-3 - 0.24597918185862E-3 0.24561770163971E-3 0.24525668014964E-3 0.24489611695098E-3 0.24453601160624E-3 - 0.244176363678E-3 0.24381717272949E-3 0.24345843832429E-3 0.24310016002589E-3 0.24274233739633E-3 - 0.24238496999773E-3 0.2420280573941E-3 0.24167159914991E-3 0.24131559482969E-3 0.24096004399696E-3 - 0.2406049462156E-3 0.24025030105001E-3 0.23989610806512E-3 0.23954236682654E-3 0.23918907689793E-3 - 0.23883623784332E-3 0.23848384922716E-3 0.23813191061547E-3 0.23778042157401E-3 0.23742938166827E-3 - 0.23707879046383E-3 0.23672864752639E-3 0.23637895242227E-3 0.23602970471813E-3 0.23568090398009E-3 - 0.23533254977378E-3 0.2349846416648E-3 0.23463717922045E-3 0.23429016200783E-3 0.23394358959412E-3 - 0.23359746154653E-3 0.2332517774324E-3 0.23290653681945E-3 0.23256173927565E-3 0.23221738436937E-3 - 0.23187347166758E-3 0.23153000073757E-3 0.23118697114756E-3 0.23084438246659E-3 0.2305022342635E-3 - 0.23016052610724E-3 0.22981925756719E-3 0.22947842821262E-3 0.2291380376119E-3 0.2287980853335E-3 - 0.22845857094736E-3 0.22811949402366E-3 0.22778085413267E-3 0.2274426508442E-3 0.22710488372826E-3 - 0.22676755235543E-3 0.22643065629667E-3 0.22609419512371E-3 0.22575816840632E-3 0.22542257571478E-3 - 0.22508741661987E-3 0.22475269069358E-3 0.22441839750769E-3 0.22408453663396E-3 0.22375110764431E-3 - 0.22341811011081E-3 0.22308554360598E-3 0.2227534077028E-3 0.2224217019736E-3 0.22209042599034E-3 - 0.22175957932479E-3 0.22142916155095E-3 0.22109917224253E-3 0.2207696109732E-3 0.22044047731618E-3 - 0.22011177084488E-3 0.21978349113357E-3 0.21945563775682E-3 0.21912821028942E-3 0.21880120830498E-3 - 0.21847463137747E-3 0.21814847908173E-3 0.21782275099307E-3 0.21749744668691E-3 0.2171725657385E-3 - 0.21684810772329E-3 0.21652407221704E-3 0.21620045879599E-3 0.215877267037E-3 0.21555449651555E-3 - 0.21523214680727E-3 0.21491021748794E-3 0.21458870813522E-3 0.21426761832639E-3 0.2139469476388E-3 - 0.21362669564991E-3 0.21330686193805E-3 0.21298744607981E-3 0.21266844765233E-3 0.2123498662333E-3 - 0.21203170140153E-3 0.21171395273563E-3 0.21139661981428E-3 0.21107970221633E-3 0.21076319952085E-3 - 0.2104471113071E-3 0.21013143715486E-3 0.20981617664335E-3 0.20950132935169E-3 0.2091868948589E-3 - 0.20887287274576E-3 0.2085592625928E-3 0.20824606398058E-3 0.20793327648964E-3 0.20762089970061E-3 - 0.20730893319482E-3 0.20699737655393E-3 0.20668622935975E-3 0.20637549119292E-3 0.2060651616344E-3 - 0.20575524026622E-3 0.20544572667094E-3 0.20513662043108E-3 0.20482792112929E-3 0.20451962834846E-3 - 0.20421174167162E-3 0.20390426068192E-3 0.20359718496304E-3 0.203290514098E-3 0.20298424766996E-3 - 0.20267838526239E-3 0.20237292645995E-3 0.2020678708473E-3 0.20176321800888E-3 0.20145896752919E-3 - 0.20115511899277E-3 0.20085167198525E-3 0.20054862609248E-3 0.20024598089994E-3 0.1999437359925E-3 - 0.19964189095515E-3 0.19934044537439E-3 0.19903939883668E-3 0.1987387509286E-3 0.19843850123713E-3 - 0.19813864934984E-3 0.19783919485339E-3 0.19754013733434E-3 0.19724147637926E-3 0.19694321157654E-3 - 0.19664534251427E-3 0.19634786878065E-3 0.19605078996397E-3 0.19575410565272E-3 0.19545781543576E-3 - 0.19516191890233E-3 0.19486641564174E-3 0.19457130524237E-3 0.19427658729273E-3 0.19398226138282E-3 - 0.1936883271031E-3 0.19339478404419E-3 0.19310163179614E-3 0.1928088699494E-3 0.19251649809479E-3 - 0.1922245158235E-3 0.19193292272746E-3 0.19164171839708E-3 0.19135090242309E-3 0.19106047439667E-3 - 0.19077043391075E-3 0.19048078055821E-3 0.19019151393132E-3 0.18990263362237E-3 0.18961413922386E-3 - 0.18932603032935E-3 0.18903830653261E-3 0.18875096742704E-3 0.18846401260557E-3 0.18817744166122E-3 - 0.18789125418859E-3 0.18760544978227E-3 0.18732002803692E-3 0.18703498854714E-3 0.1867503309076E-3 - 0.18646605471379E-3 0.18618215956159E-3 0.18589864504742E-3 0.1856155107658E-3 0.18533275631171E-3 - 0.18505038128131E-3 0.18476838527183E-3 0.18448676788039E-3 0.18420552870392E-3 0.18392466733986E-3 - 0.1836441833857E-3 0.18336407643808E-3 0.18308434609374E-3 0.18280499195105E-3 0.18252601360876E-3 - 0.18224741066576E-3 0.18196918272029E-3 0.18169132937097E-3 0.18141385021695E-3 0.18113674485779E-3 - 0.18086001289361E-3 0.18058365392341E-3 0.18030766754658E-3 0.18003205336294E-3 0.17975681097349E-3 - 0.17948193997928E-3 0.179207439981E-3 0.17893331057947E-3 0.17865955137573E-3 0.17838616197171E-3 - 0.17811314196972E-3 0.17784049097148E-3 0.17756820857847E-3 0.17729629439201E-3 0.1770247480156E-3 - 0.17675356905257E-3 0.17648275710626E-3 0.17621231177954E-3 0.1759422326755E-3 0.17567251939825E-3 - 0.17540317155226E-3 0.17513418874234E-3 0.17486557057186E-3 0.17459731664453E-3 0.17432942656529E-3 - 0.17406189993989E-3 0.17379473637424E-3 0.17352793547359E-3 0.17326149684362E-3 0.17299542009034E-3 - 0.17272970482023E-3 0.17246435064021E-3 0.17219935715656E-3 0.17193472397567E-3 0.17167045070419E-3 - 0.17140653695001E-3 0.17114298232081E-3 0.17087978642468E-3 0.17061694887002E-3 0.17035446926607E-3 - 0.17009234722035E-3 0.16983058234101E-3 0.16956917423675E-3 0.16930812251733E-3 0.16904742679248E-3 - 0.16878708667188E-3 0.16852710176538E-3 0.16826747168308E-3 0.16800819603574E-3 0.16774927443453E-3 - 0.16749070649011E-3 0.16723249181307E-3 0.16697463001405E-3 0.16671712070517E-3 0.16645996349837E-3 - 0.16620315800581E-3 0.16594670383983E-3 0.16569060061303E-3 0.16543484793825E-3 0.16517944542868E-3 - 0.16492439269778E-3 0.16466968935841E-3 0.16441533502365E-3 0.16416132930759E-3 0.16390767182486E-3 - 0.16365436219038E-3 0.16340140001848E-3 0.16314878492392E-3 0.16289651652184E-3 0.16264459442796E-3 - 0.16239301825847E-3 0.16214178762871E-3 0.16189090215421E-3 0.16164036145086E-3 0.16139016513582E-3 - 0.16114031282626E-3 0.16089080413922E-3 0.16064163869188E-3 0.16039281610156E-3 0.16014433598644E-3 - 0.15989619796498E-3 0.15964840165549E-3 0.15940094667588E-3 0.15915383264416E-3 0.15890705917979E-3 - 0.15866062590232E-3 0.15841453243144E-3 0.15816877838685E-3 0.15792336338879E-3 0.15767828705727E-3 - 0.15743354901245E-3 0.15718914887456E-3 0.15694508626496E-3 0.15670136080501E-3 0.15645797211622E-3 - 0.1562149198202E-3 0.1559722035387E-3 0.15572982289423E-3 0.15548777750957E-3 0.15524606700772E-3 - 0.15500469101091E-3 0.15476364914177E-3 0.15452294102377E-3 0.15428256628076E-3 0.15404252453666E-3 - 0.1538028154158E-3 0.15356343854268E-3 0.15332439354207E-3 0.15308568003903E-3 0.15284729765933E-3 - 0.15260924602762E-3 0.15237152476885E-3 0.1521341335084E-3 0.1518970718732E-3 0.15166033949016E-3 - 0.15142393598584E-3 0.15118786098692E-3 0.15095211412029E-3 0.15071669501373E-3 0.15048160329533E-3 - 0.15024683859292E-3 0.15001240053407E-3 0.1497782887464E-3 0.14954450285908E-3 0.14931104250132E-3 - 0.14907790730246E-3 0.14884509689178E-3 0.14861261089885E-3 0.14838044895372E-3 0.14814861068675E-3 - 0.1479170957288E-3 0.14768590370983E-3 0.14745503426023E-3 0.14722448701107E-3 0.14699426159416E-3 - 0.14676435764121E-3 0.14653477478444E-3 0.14630551265643E-3 0.14607657088987E-3 0.14584794911674E-3 - 0.14561964696931E-3 0.14539166408095E-3 0.14516400008533E-3 0.14493665461622E-3 0.14470962730777E-3 - 0.14448291779439E-3 0.14425652571062E-3 0.14403045069116E-3 0.14380469237119E-3 0.14357925038564E-3 - 0.1433541243697E-3 0.14312931395896E-3 0.14290481878964E-3 0.14268063849814E-3 0.14245677272096E-3 - 0.1422332210948E-3 0.14200998325652E-3 0.14178705884392E-3 0.1415644474952E-3 0.14134214884805E-3 - 0.1411201625398E-3 0.14089848820786E-3 0.14067712549146E-3 0.14045607402972E-3 0.14023533346194E-3 - 0.14001490342733E-3 0.13979478356537E-3 0.13957497351614E-3 0.13935547292004E-3 0.13913628141784E-3 - 0.13891739864955E-3 0.13869882425559E-3 0.13848055787711E-3 0.13826259915584E-3 0.13804494773369E-3 - 0.13782760325256E-3 0.13761056535468E-3 0.13739383368256E-3 0.13717740787913E-3 0.13696128758785E-3 - 0.13674547245157E-3 0.13652996211332E-3 0.13631475621634E-3 0.13609985440529E-3 0.1358852563248E-3 - 0.13567096161951E-3 0.13545696993409E-3 0.13524328091371E-3 0.13502989420352E-3 0.134816809449E-3 - 0.13460402629592E-3 0.13439154439056E-3 0.1341793633794E-3 0.1339674829091E-3 0.1337559026266E-3 - 0.13354462217905E-3 0.13333364121409E-3 0.13312295937972E-3 0.13291257632381E-3 0.13270249169425E-3 - 0.13249270513911E-3 0.13228321630743E-3 0.13207402484833E-3 0.13186513041114E-3 0.13165653264542E-3 - 0.13144823120087E-3 0.13124022572788E-3 0.13103251587719E-3 0.1308251012999E-3 0.13061798164594E-3 - 0.13041115656558E-3 0.13020462571038E-3 0.12999838873257E-3 0.12979244528473E-3 0.12958679501862E-3 - 0.12938143758645E-3 0.12917637264092E-3 0.12897159983555E-3 0.12876711882443E-3 0.12856292926036E-3 - 0.12835903079641E-3 0.12815542308594E-3 0.12795210578411E-3 0.12774907854593E-3 0.1275463410263E-3 - 0.12734389288023E-3 0.12714173376306E-3 0.12693986333057E-3 0.12673828123891E-3 0.12653698714426E-3 - 0.12633598070267E-3 0.1261352615703E-3 0.12593482940451E-3 0.12573468386282E-3 0.12553482460294E-3 - 0.12533525128272E-3 0.12513596356056E-3 0.12493696109457E-3 0.12473824354295E-3 0.12453981056398E-3 - 0.12434166181729E-3 0.12414379696249E-3 0.12394621565935E-3 0.12374891756771E-3 0.12355190234762E-3 - 0.12335516965982E-3 0.12315871916537E-3 0.12296255052559E-3 0.12276666340111E-3 0.12257105745283E-3 - 0.12237573234276E-3 0.12218068773339E-3 0.12198592328755E-3 0.12179143866745E-3 0.1215972335357E-3 - 0.12140330755548E-3 0.12120966039065E-3 0.12101629170559E-3 0.1208232011637E-3 0.12063038842873E-3 - 0.12043785316481E-3 0.12024559503719E-3 0.12005361371118E-3 0.11986190885208E-3 0.11967048012544E-3 - 0.11947932719702E-3 0.11928844973316E-3 0.11909784740055E-3 0.11890751986583E-3 0.11871746679552E-3 - 0.11852768785623E-3 0.11833818271589E-3 0.11814895104259E-3 0.11795999250457E-3 0.11777130676987E-3 - 0.11758289350683E-3 0.11739475238445E-3 0.11720688307211E-3 0.11701928523961E-3 0.11683195855601E-3 - 0.1166449026908E-3 0.1164581173141E-3 0.11627160209669E-3 0.11608535670942E-3 0.11589938082339E-3 - 0.11571367411003E-3 0.11552823624102E-3 0.11534306688789E-3 0.1151581657225E-3 0.11497353241725E-3 - 0.11478916664478E-3 0.11460506807796E-3 0.1144212363901E-3 0.11423767125476E-3 0.11405437234572E-3 - 0.11387133933704E-3 0.11368857190326E-3 0.1135060697186E-3 0.11332383245766E-3 0.11314185979536E-3 - 0.11296015140719E-3 0.1127787069688E-3 0.11259752615614E-3 0.11241660864544E-3 0.11223595411317E-3 - 0.11205556223617E-3 0.11187543269171E-3 0.11169556515697E-3 0.11151595930908E-3 0.11133661482525E-3 - 0.11115753138403E-3 0.110978708664E-3 0.11080014634397E-3 0.11062184410275E-3 0.11044380161954E-3 - 0.11026601857379E-3 0.11008849464523E-3 0.10991122951393E-3 0.10973422285988E-3 0.10955747436331E-3 - 0.10938098370504E-3 0.10920475056642E-3 0.10902877462906E-3 0.10885305557445E-3 0.10867759308441E-3 - 0.10850238684111E-3 0.10832743652722E-3 0.10815274182593E-3 0.10797830241983E-3 0.10780411799169E-3 - 0.10763018822448E-3 0.10745651280264E-3 0.1072830914105E-3 0.10710992373256E-3 0.10693700945353E-3 - 0.10676434825877E-3 0.10659193983291E-3 0.106419783861E-3 0.10624788002852E-3 0.10607622802177E-3 - 0.10590482752705E-3 0.10573367823103E-3 0.10556277982063E-3 0.10539213198306E-3 0.10522173440573E-3 - 0.10505158677654E-3 0.10488168878326E-3 0.1047120401137E-3 0.10454264045579E-3 0.10437348949861E-3 - 0.10420458693126E-3 0.10403593244309E-3 0.10386752572379E-3 0.10369936646338E-3 0.10353145435195E-3 - 0.10336378907993E-3 0.10319637033812E-3 0.10302919781701E-3 0.10286227120736E-3 0.10269559020068E-3 - 0.10252915448904E-3 0.10236296376473E-3 0.10219701771994E-3 0.10203131604719E-3 0.10186585843936E-3 - 0.1017006445897E-3 0.10153567419194E-3 0.10137094693942E-3 0.10120646252576E-3 0.10104222064485E-3 - 0.10087822099143E-3 0.10071446326027E-3 0.10055094714647E-3 0.10038767234545E-3 0.10022463855286E-3 - 0.10006184546464E-3 0.99899292777104E-4 0.99736980186669E-4 0.99574907389622E-4 0.99413074082448E-4 - 0.99251479962603E-4 0.99090124727749E-4 0.98929008075757E-4 0.98768129704786E-4 0.98607489313479E-4 - 0.98447086600257E-4 0.98286921263619E-4 0.98126993002185E-4 0.97967301515791E-4 0.97807846504289E-4 - 0.97648627667717E-4 0.97489644706223E-4 0.97330897320198E-4 0.97172385210597E-4 0.970141080787E-4 - 0.96856065626052E-4 0.96698257553768E-4 0.96540683563286E-4 0.96383343356824E-4 0.96226236637048E-4 - 0.96069363106845E-4 0.95912722469092E-4 0.95756314426994E-4 0.95600138684099E-4 0.95444194944334E-4 - 0.95288482912091E-4 0.95133002291363E-4 0.94977752786436E-4 0.94822734101921E-4 0.94667945943289E-4 - 0.9451338801612E-4 0.94359060026123E-4 0.94204961679239E-4 0.94051092681646E-4 0.93897452740041E-4 - 0.93744041561455E-4 0.93590858852942E-4 0.93437904321477E-4 0.93285177674221E-4 0.93132678619323E-4 - 0.92980406865117E-4 0.92828362120154E-4 0.92676544093056E-4 0.92524952492722E-4 0.92373587028501E-4 - 0.92222447410082E-4 0.92071533347602E-4 0.91920844550631E-4 0.91770380729132E-4 0.91620141593641E-4 - 0.91470126855343E-4 0.9132033622553E-4 0.91170769415711E-4 0.91021426137763E-4 0.908723061038E-4 - 0.90723409025651E-4 0.90574734615419E-4 0.90426282586008E-4 0.90278052650695E-4 0.90130044522982E-4 - 0.89982257916401E-4 0.89834692544791E-4 0.89687348122354E-4 0.89540224363655E-4 0.8939332098375E-4 - 0.89246637697209E-4 0.89100174218932E-4 0.88953930264191E-4 0.88807905549111E-4 0.8866209978995E-4 - 0.88516512703011E-4 0.88371144004823E-4 0.88225993412166E-4 0.88081060642388E-4 0.87936345413166E-4 - 0.87791847442137E-4 0.87647566446914E-4 0.87503502145288E-4 0.87359654256038E-4 0.8721602249808E-4 - 0.87072606590549E-4 0.86929406252703E-4 0.8678642120407E-4 0.86643651164609E-4 0.86501095854606E-4 - 0.86358754994732E-4 0.86216628305213E-4 0.86074715506634E-4 0.85933016320174E-4 0.85791530467554E-4 - 0.85650257670692E-4 0.85509197651557E-4 0.85368350132415E-4 0.85227714835835E-4 0.85087291484789E-4 - 0.84947079802669E-4 0.84807079512614E-4 0.84667290337965E-4 0.84527712002289E-4 0.84388344230104E-4 - 0.84249186745955E-4 0.84110239274661E-4 0.83971501541306E-4 0.83832973271507E-4 0.83694654190386E-4 - 0.83556544023432E-4 0.83418642496538E-4 0.83280949336341E-4 0.83143464269629E-4 0.83006187023256E-4 - 0.82869117324325E-4 0.827322549002E-4 0.82595599478743E-4 0.82459150788158E-4 0.8232290855659E-4 - 0.82186872512234E-4 0.82051042383452E-4 0.81915417899562E-4 0.8177999878998E-4 0.81644784784349E-4 - 0.81509775612479E-4 0.81374971004431E-4 0.81240370690694E-4 0.81105974402076E-4 0.80971781869719E-4 - 0.8083779282438E-4 0.80704006997142E-4 0.8057042411974E-4 0.80437043924402E-4 0.80303866143589E-4 - 0.80170890509718E-4 0.80038116755523E-4 0.79905544614068E-4 0.79773173818836E-4 0.79641004103723E-4 - 0.79509035202333E-4 0.79377266848509E-4 0.79245698776365E-4 0.79114330720852E-4 0.78983162417025E-4 - 0.78852193600108E-4 0.78721424005536E-4 0.78590853368966E-4 0.78460481426567E-4 0.78330307914816E-4 - 0.78200332570285E-4 0.78070555129406E-4 0.77940975328821E-4 0.77811592906072E-4 0.77682407598925E-4 - 0.77553419145354E-4 0.77424627283554E-4 0.77296031752164E-4 0.77167632289726E-4 0.77039428634888E-4 - 0.76911420526436E-4 0.76783607704153E-4 0.76655989907887E-4 0.76528566877693E-4 0.76401338353773E-4 - 0.76274304076576E-4 0.76147463787011E-4 0.76020817226295E-4 0.75894364135917E-4 0.75768104257094E-4 - 0.75642037331348E-4 0.75516163100835E-4 0.75390481308118E-4 0.7526499169599E-4 0.75139694007282E-4 - 0.75014587985128E-4 0.74889673372982E-4 0.74764949914678E-4 0.74640417354463E-4 0.74516075436289E-4 - 0.74391923904378E-4 0.7426796250325E-4 0.7414419097819E-4 0.74020609074603E-4 0.73897216538049E-4 - 0.7377401311431E-4 0.73650998549405E-4 0.73528172589815E-4 0.73405534982332E-4 0.73283085473819E-4 - 0.7316082381109E-4 0.73038749741159E-4 0.72916863011886E-4 0.72795163371337E-4 0.72673650567791E-4 - 0.72552324349621E-4 0.7243118446547E-4 0.72310230664381E-4 0.72189462695711E-4 0.72068880309237E-4 - 0.71948483254305E-4 0.71828271280635E-4 0.7170824413842E-4 0.7158840157842E-4 0.71468743351498E-4 - 0.71349269208807E-4 0.71229978901833E-4 0.71110872182298E-4 0.70991948801676E-4 0.70873208511717E-4 - 0.70754651064872E-4 0.70636276213953E-4 0.70518083712004E-4 0.70400073312079E-4 0.70282244767538E-4 - 0.70164597832071E-4 0.70047132259718E-4 0.6992984780492E-4 0.69812744221851E-4 0.69695821264981E-4 - 0.69579078689098E-4 0.69462516249645E-4 0.69346133702209E-4 0.6922993080254E-4 0.69113907306622E-4 - 0.68998062970684E-4 0.6888239755137E-4 0.68766910805646E-4 0.68651602490519E-4 0.68536472363006E-4 - 0.68421520180305E-4 0.68306745700462E-4 0.68192148681685E-4 0.68077728882393E-4 0.67963486061118E-4 - 0.67849419976663E-4 0.67735530388201E-4 0.67621817055204E-4 0.67508279737497E-4 0.67394918194625E-4 - 0.67281732186451E-4 0.67168721473338E-4 0.67055885816138E-4 0.66943224975917E-4 0.66830738713771E-4 - 0.6671842679108E-4 0.66606288969512E-4 0.66494325011124E-4 0.66382534678343E-4 0.66270917733433E-4 - 0.66159473938847E-4 0.66048203057251E-4 0.65937104852136E-4 0.65826179087057E-4 0.65715425525811E-4 - 0.65604843932434E-4 0.65494434071406E-4 0.65384195706938E-4 0.65274128603553E-4 0.65164232526115E-4 - 0.65054507240085E-4 0.64944952511085E-4 0.64835568104873E-4 0.64726353787445E-4 0.64617309325038E-4 - 0.64508434484314E-4 0.64399729032255E-4 0.64291192735836E-4 0.64182825362089E-4 0.64074626678214E-4 - 0.63966596452249E-4 0.63858734452348E-4 0.63751040446883E-4 0.63643514204386E-4 0.63536155493639E-4 - 0.63428964083783E-4 0.63321939744253E-4 0.63215082244794E-4 0.63108391354912E-4 0.63001866844414E-4 - 0.62895508483626E-4 0.62789316043298E-4 0.62683289294389E-4 0.62577428007934E-4 0.62471731955247E-4 - 0.62366200907925E-4 0.62260834637916E-4 0.62155632917539E-4 0.62050595518937E-4 0.61945722214474E-4 - 0.61841012776753E-4 0.61736466979106E-4 0.6163208459498E-4 0.61527865397996E-4 0.61423809161978E-4 - 0.61319915660976E-4 0.61216184669454E-4 0.61112615962161E-4 0.61009209313984E-4 0.60905964499746E-4 - 0.60802881294486E-4 0.60699959473942E-4 0.60597198814083E-4 0.60494599091075E-4 0.6039216008133E-4 - 0.60289881561644E-4 0.60187763308763E-4 0.60085805099532E-4 0.59984006710926E-4 0.59882367920822E-4 - 0.59780888507173E-4 0.59679568248128E-4 0.59578406921971E-4 0.5947740430724E-4 0.59376560182835E-4 - 0.59275874327937E-4 0.59175346521992E-4 0.59074976544295E-4 0.58974764174416E-4 0.58874709192441E-4 - 0.58774811378825E-4 0.58675070514233E-4 0.58575486379432E-4 0.58476058755463E-4 0.58376787423641E-4 - 0.58277672165591E-4 0.58178712763301E-4 0.58079908998605E-4 0.57981260653574E-4 0.57882767510542E-4 - 0.57784429352495E-4 0.57686245962556E-4 0.57588217123993E-4 0.57490342620275E-4 0.5739262223508E-4 - 0.57295055752546E-4 0.57197642957078E-4 0.57100383633172E-4 0.57003277565294E-4 0.56906324538107E-4 - 0.56809524336995E-4 0.56712876747547E-4 0.56616381555561E-4 0.56520038546919E-4 0.5642384750776E-4 - 0.56327808224578E-4 0.56231920484154E-4 0.56136184073646E-4 0.56040598779898E-4 0.55945164390091E-4 - 0.55849880691803E-4 0.55754747473097E-4 0.55659764522154E-4 0.55564931627422E-4 0.55470248577643E-4 - 0.55375715161793E-4 0.55281331168687E-4 0.55187096387399E-4 0.55093010607566E-4 0.54999073619159E-4 - 0.5490528521237E-4 0.5481164517742E-4 0.54718153304803E-4 0.54624809385323E-4 0.54531613210127E-4 - 0.54438564570723E-4 0.54345663258379E-4 0.5425290906463E-4 0.54160301781299E-4 0.54067841200804E-4 - 0.53975527115701E-4 0.53883359318694E-4 0.537913376027E-4 0.53699461760848E-4 0.53607731586689E-4 - 0.53516146874059E-4 0.53424707416836E-4 0.53333413008898E-4 0.53242263444292E-4 0.53151258517838E-4 - 0.5306039802452E-4 0.52969681759518E-4 0.52879109518076E-4 0.52788681095693E-4 0.5269839628823E-4 - 0.52608254891825E-4 0.52518256702936E-4 0.52428401517803E-4 0.52338689132977E-4 0.52249119345402E-4 - 0.52159691952434E-4 0.5207040675161E-4 0.51981263540606E-4 0.51892262117339E-4 0.51803402279977E-4 - 0.51714683826996E-4 0.51626106557203E-4 0.51537670269335E-4 0.51449374762306E-4 0.51361219835223E-4 - 0.51273205287871E-4 0.51185330920119E-4 0.51097596532065E-4 0.51010001924038E-4 0.5092254689676E-4 - 0.5083523125073E-4 0.50748054786717E-4 0.50661017305801E-4 0.50574118609631E-4 0.50487358500015E-4 - 0.5040073677884E-4 0.50314253248204E-4 0.50227907710422E-4 0.50141699968236E-4 0.50055629824654E-4 - 0.4996969708271E-4 0.49883901545489E-4 0.49798243016236E-4 0.49712721298926E-4 0.49627336197668E-4 - 0.49542087516758E-4 0.49456975060598E-4 0.49371998633815E-4 0.4928715804141E-4 0.49202453088652E-4 - 0.49117883581095E-4 0.49033449324084E-4 0.48949150123247E-4 0.48864985784653E-4 0.48780956114762E-4 - 0.48697060920229E-4 0.48613300007767E-4 0.48529673184341E-4 0.48446180257177E-4 0.48362821033825E-4 - 0.48279595322158E-4 0.48196502929933E-4 0.48113543665096E-4 0.48030717335808E-4 0.47948023750874E-4 - 0.47865462719205E-4 0.4778303404988E-4 0.47700737552168E-4 0.47618573035561E-4 0.47536540309858E-4 - 0.47454639185122E-4 0.47372869471569E-4 0.4729123097942E-4 0.47209723519088E-4 0.47128346901551E-4 - 0.47047100938014E-4 0.46965985439872E-4 0.46885000218702E-4 0.46804145086412E-4 0.46723419854947E-4 - 0.46642824336369E-4 0.46562358342886E-4 0.4648202168737E-4 0.4640181418281E-4 0.4632173564238E-4 - 0.46241785879379E-4 0.46161964707308E-4 0.46082271940064E-4 0.46002707391802E-4 0.45923270876925E-4 - 0.45843962209634E-4 0.45764781204404E-4 0.4568572767617E-4 0.45606801440211E-4 0.45528002311994E-4 - 0.45449330107079E-4 0.45370784641282E-4 0.45292365730654E-4 0.45214073191514E-4 0.45135906840496E-4 - 0.45057866494151E-4 0.44979951969239E-4 0.44902163082747E-4 0.44824499652229E-4 0.44746961495372E-4 - 0.4466954843E-4 0.44592260274124E-4 0.44515096845961E-4 0.44438057964073E-4 0.44361143447262E-4 - 0.4428435311446E-4 0.44207686784623E-4 0.44131144276895E-4 0.44054725410972E-4 0.43978430006751E-4 - 0.43902257884326E-4 0.43826208863877E-4 0.43750282765817E-4 0.43674479410862E-4 0.43598798619986E-4 - 0.43523240214481E-4 0.43447804015454E-4 0.43372489844299E-4 0.43297297522733E-4 0.43222226872876E-4 - 0.43147277716975E-4 0.43072449877507E-4 0.42997743177206E-4 0.42923157439025E-4 0.42848692485826E-4 - 0.42774348140715E-4 0.42700124227228E-4 0.42626020569183E-4 0.42552036990577E-4 0.42478173315573E-4 - 0.42404429368559E-4 0.42330804974154E-4 0.42257299957204E-4 0.42183914142874E-4 0.42110647356246E-4 - 0.42037499422626E-4 0.41964470167558E-4 0.41891559417079E-4 0.41818766997363E-4 0.41746092734732E-4 - 0.41673536455701E-4 0.41601097986986E-4 0.41528777155627E-4 0.41456573788909E-4 0.41384487714211E-4 - 0.41312518758956E-4 0.41240666750738E-4 0.41168931517714E-4 0.4109731288821E-4 0.41025810690741E-4 - 0.40954424753926E-4 0.40883154906608E-4 0.40812000977921E-4 0.40740962797245E-4 0.40670040194251E-4 - 0.40599232998436E-4 0.40528541039565E-4 0.40457964147754E-4 0.40387502153495E-4 0.40317154887458E-4 - 0.40246922180388E-4 0.40176803863255E-4 0.4010679976725E-4 0.4003690972388E-4 0.39967133564923E-4 - 0.39897471122114E-4 0.39827922227347E-4 0.39758486712694E-4 0.39689164410767E-4 0.39619955154269E-4 - 0.39550858776124E-4 0.39481875109483E-4 0.39413003987822E-4 0.39344245244472E-4 0.39275598713008E-4 - 0.39207064227259E-4 0.39138641621485E-4 0.39070330730089E-4 0.39002131387622E-4 0.38934043428832E-4 - 0.38866066688661E-4 0.38798201002369E-4 0.38730446205464E-4 0.38662802133506E-4 0.38595268622112E-4 - 0.38527845507049E-4 0.38460532624679E-4 0.38393329811495E-4 0.38326236904168E-4 0.3825925373949E-4 - 0.38192380154458E-4 0.38125615986358E-4 0.38058961072704E-4 0.37992415251264E-4 0.37925978359692E-4 - 0.37859650235881E-4 0.37793430718082E-4 0.37727319644877E-4 0.37661316855031E-4 0.3759542218737E-4 - 0.37529635480938E-4 0.37463956575008E-4 0.37398385309154E-4 0.37332921523224E-4 0.37267565056987E-4 - 0.37202315750376E-4 0.37137173443505E-4 0.37072137977065E-4 0.37007209191859E-4 0.36942386928803E-4 - 0.36877671028956E-4 0.36813061333556E-4 0.3674855768421E-4 0.36684159922732E-4 0.36619867891078E-4 - 0.36555681431188E-4 0.36491600385196E-4 0.36427624595692E-4 0.36363753905472E-4 0.36299988157493E-4 - 0.36236327194925E-4 0.3617277086122E-4 0.36109318999848E-4 0.3604597145436E-4 0.35982728068438E-4 - 0.35919588686382E-4 0.35856553152596E-4 0.35793621311646E-4 0.35730793008198E-4 0.35668068087123E-4 - 0.35605446393585E-4 0.35542927772966E-4 0.35480512070869E-4 0.3541819913283E-4 0.35355988804609E-4 - 0.35293880932314E-4 0.3523187536234E-4 0.35169971941264E-4 0.35108170515736E-4 0.35046470932616E-4 - 0.34984873038993E-4 0.34923376682239E-4 0.34861981709988E-4 0.34800687969793E-4 0.34739495309394E-4 - 0.3467840357673E-4 0.3461741262019E-4 0.34556522288285E-4 0.34495732429679E-4 0.34435042893213E-4 - 0.34374453527897E-4 0.34313964183056E-4 0.34253574708228E-4 0.34193284953063E-4 0.34133094767222E-4 - 0.34073004000536E-4 0.34013012503309E-4 0.33953120126029E-4 0.33893326719356E-4 0.33833632134041E-4 - 0.33774036221042E-4 0.33714538831569E-4 0.33655139817048E-4 0.33595839029188E-4 0.33536636319567E-4 - 0.33477531540015E-4 0.33418524542625E-4 0.33359615179832E-4 0.33300803304186E-4 0.3324208876845E-4 - 0.33183471425607E-4 0.3312495112884E-4 0.33066527731273E-4 0.33008201086248E-4 0.32949971047452E-4 - 0.32891837468819E-4 0.32833800204452E-4 0.32775859108578E-4 0.32718014035615E-4 0.32660264840202E-4 - 0.32602611377215E-4 0.32545053501797E-4 0.32487591069003E-4 0.32430223934079E-4 0.32372951952478E-4 - 0.32315774980095E-4 0.32258692872949E-4 0.32201705487177E-4 0.32144812679084E-4 0.32088014305151E-4 - 0.32031310222145E-4 0.31974700287039E-4 0.31918184356907E-4 0.31861762288882E-4 0.31805433940263E-4 - 0.31749199168764E-4 0.31693057832261E-4 0.31637009788796E-4 0.31581054896548E-4 0.3152519301388E-4 - 0.31469423999395E-4 0.31413747711903E-4 0.31358164010466E-4 0.31302672754041E-4 0.31247273801813E-4 - 0.31191967013248E-4 0.31136752248121E-4 0.31081629366363E-4 0.31026598228E-4 0.30971658693243E-4 - 0.309168106225E-4 0.30862053876456E-4 0.30807388316039E-4 0.30752813802131E-4 0.30698330195737E-4 - 0.30643937358002E-4 0.30589635150619E-4 0.30535423435346E-4 0.30481302074111E-4 0.30427270929011E-4 - 0.30373329862432E-4 0.3031947873666E-4 0.30265717414182E-4 0.30212045757701E-4 0.30158463630319E-4 - 0.30104970895261E-4 0.30051567415871E-4 0.29998253055666E-4 0.29945027678338E-4 0.2989189114782E-4 - 0.29838843328258E-4 0.29785884083881E-4 0.29733013278996E-4 0.29680230778053E-4 0.29627536445941E-4 - 0.29574930147685E-4 0.29522411748468E-4 0.29469981113593E-4 0.29417638108539E-4 0.29365382599033E-4 - 0.29313214451004E-4 0.2926113353061E-4 0.2920913970389E-4 0.29157232837104E-4 0.29105412796809E-4 - 0.29053679449863E-4 0.29002032663265E-4 0.28950472304108E-4 0.28898998239672E-4 0.28847610337426E-4 - 0.28796308465061E-4 0.28745092490498E-4 0.28693962281651E-4 0.28642917706579E-4 0.285919586335E-4 - 0.2854108493106E-4 0.28490296468007E-4 0.28439593113238E-4 0.28388974735805E-4 0.28338441204936E-4 - 0.28287992390068E-4 0.28237628160833E-4 0.2818734838701E-4 0.28137152938403E-4 0.28087041684973E-4 - 0.28037014497059E-4 0.27987071245187E-4 0.27937211800037E-4 0.27887436032414E-4 0.27837743813365E-4 - 0.27788135014007E-4 0.27738609505562E-4 0.27689167159379E-4 0.27639807847237E-4 0.27590531441036E-4 - 0.27541337812823E-4 0.27492226834751E-4 0.27443198379143E-4 0.27394252318572E-4 0.27345388525795E-4 - 0.27296606873768E-4 0.27247907235397E-4 0.27199289483783E-4 0.27150753492315E-4 0.27102299134631E-4 - 0.27053926284523E-4 0.27005634815865E-4 0.26957424602722E-4 0.26909295519339E-4 0.2686124744017E-4 - 0.26813280239882E-4 0.26765393793155E-4 0.26717587974832E-4 0.26669862659921E-4 0.2662221772376E-4 - 0.26574653041802E-4 0.26527168489657E-4 0.26479763943102E-4 0.26432439278067E-4 0.26385194370698E-4 - 0.26338029097332E-4 0.26290943334425E-4 0.26243936958473E-4 0.26197009846124E-4 0.26150161874382E-4 - 0.26103392920416E-4 0.26056702861544E-4 0.26010091575196E-4 0.25963558938973E-4 0.25917104830678E-4 - 0.258707291283E-4 0.25824431710069E-4 0.25778212454118E-4 0.25732071238792E-4 0.2568600794266E-4 - 0.25640022444588E-4 0.2559411462354E-4 0.25548284358667E-4 0.25502531529312E-4 0.25456856014995E-4 - 0.25411257695155E-4 0.2536573644941E-4 0.25320292157713E-4 0.25274924700252E-4 0.25229633957372E-4 - 0.25184419809451E-4 0.2513928213706E-4 0.25094220820955E-4 0.25049235742102E-4 0.25004326781681E-4 - 0.24959493820842E-4 0.24914736740898E-4 0.24870055423335E-4 0.24825449749971E-4 0.24780919602734E-4 - 0.24736464863688E-4 0.24692085415048E-4 0.24647781139188E-4 0.24603551918683E-4 0.24559397636293E-4 - 0.2451531817488E-4 0.24471313417375E-4 0.24427383246843E-4 0.243835275467E-4 0.24339746200509E-4 - 0.24296039091979E-4 0.24252406104906E-4 0.2420884712325E-4 0.24165362031188E-4 0.24121950713079E-4 - 0.24078613053499E-4 0.24035348936933E-4 0.23992158248069E-4 0.23949040871824E-4 0.2390599669338E-4 - 0.23863025598049E-4 0.23820127471244E-4 0.23777302198538E-4 0.23734549665672E-4 0.23691869758575E-4 - 0.23649262363369E-4 0.23606727366225E-4 0.23564264653437E-4 0.23521874111435E-4 0.23479555626984E-4 - 0.23437309086938E-4 0.23395134378322E-4 0.23353031388334E-4 0.23311000004405E-4 0.23269040113886E-4 - 0.232271516043E-4 0.23185334363348E-4 0.23143588279064E-4 0.23101913239589E-4 0.23060309133173E-4 - 0.23018775848211E-4 0.22977313273244E-4 0.22935921297038E-4 0.22894599808533E-4 0.22853348696742E-4 - 0.22812167850729E-4 0.22771057159671E-4 0.22730016513163E-4 0.22689045800917E-4 0.22648144912779E-4 - 0.22607313738643E-4 0.22566552168562E-4 0.22525860092827E-4 0.22485237401902E-4 0.22444683986441E-4 - 0.2240419973703E-4 0.22363784544447E-4 0.22323438299693E-4 0.22283160893993E-4 0.2224295221869E-4 - 0.22202812165269E-4 0.22162740625367E-4 0.22122737490774E-4 0.22082802653439E-4 0.22042936005504E-4 - 0.22003137439151E-4 0.21963406846691E-4 0.2192374412057E-4 0.21884149153553E-4 0.21844621838503E-4 - 0.21805162068421E-4 0.21765769736435E-4 0.21726444735818E-4 0.21687186960043E-4 0.21647996302749E-4 - 0.21608872657697E-4 0.21569815918662E-4 0.21530825979563E-4 0.2149190273462E-4 0.21453046078215E-4 - 0.21414255904847E-4 0.21375532109185E-4 0.21336874586094E-4 0.21298283230478E-4 0.21259757937296E-4 - 0.21221298601621E-4 0.21182905118924E-4 0.21144577384772E-4 0.2110631529486E-4 0.21068118744966E-4 - 0.21029987631017E-4 0.20991921849149E-4 0.20953921295662E-4 0.20915985867021E-4 0.20878115459659E-4 - 0.2084030997018E-4 0.20802569295415E-4 0.20764893332401E-4 0.20727281978311E-4 0.20689735130393E-4 - 0.20652252686047E-4 0.20614834542831E-4 0.20577480598502E-4 0.20540190750995E-4 0.20502964898243E-4 - 0.20465802938309E-4 0.20428704769399E-4 0.20391670290017E-4 0.20354699398767E-4 0.20317791994374E-4 - 0.20280947975696E-4 0.20244167241732E-4 0.20207449691655E-4 0.20170795224796E-4 0.20134203740599E-4 - 0.20097675138563E-4 0.20061209318325E-4 0.20024806179779E-4 0.19988465622966E-4 0.19952187548054E-4 - 0.19915971855331E-4 0.19879818445226E-4 0.19843727218327E-4 0.1980769807537E-4 0.19771730917279E-4 - 0.19735825644962E-4 0.19699982159495E-4 0.19664200362125E-4 0.19628480154313E-4 0.19592821437611E-4 - 0.19557224113759E-4 0.19521688084655E-4 0.19486213252346E-4 0.19450799518841E-4 0.19415446786312E-4 - 0.19380154957162E-4 0.19344923933973E-4 0.19309753619448E-4 0.19274643916404E-4 0.19239594727804E-4 - 0.19204605956752E-4 0.19169677506499E-4 0.19134809280474E-4 0.19100001182128E-4 0.19065253115047E-4 - 0.19030564982957E-4 0.18995936689854E-4 0.18961368139838E-4 0.1892685923711E-4 0.18892409885991E-4 - 0.18858019990928E-4 0.18823689456578E-4 0.18789418187745E-4 0.18755206089319E-4 0.18721053066232E-4 - 0.18686959023535E-4 0.1865292386658E-4 0.18618947500852E-4 0.18585029831954E-4 0.18551170765557E-4 - 0.18517370207479E-4 0.18483628063708E-4 0.18449944240386E-4 0.18416318643832E-4 0.18382751180321E-4 - 0.18349241756293E-4 0.18315790278369E-4 0.18282396653396E-4 0.18249060788332E-4 0.18215782590217E-4 - 0.18182561966229E-4 0.18149398823679E-4 0.18116293070053E-4 0.18083244612993E-4 0.18050253360183E-4 - 0.18017319219409E-4 0.17984442098573E-4 0.17951621905853E-4 0.17918858549502E-4 0.17886151937922E-4 - 0.17853501979676E-4 0.17820908583515E-4 0.17788371658125E-4 0.17755891112336E-4 0.17723466855124E-4 - 0.17691098795743E-4 0.17658786843537E-4 0.17626530907945E-4 0.17594330898533E-4 0.17562186724991E-4 - 0.1753009829718E-4 0.17498065525104E-4 0.17466088318851E-4 0.17434166588561E-4 0.17402300244475E-4 - 0.17370489197156E-4 0.17338733357283E-4 0.17307032635645E-4 0.17275386943051E-4 0.17243796190444E-4 - 0.17212260288993E-4 0.17180779150021E-4 0.17149352685019E-4 0.1711798080538E-4 0.17086663422672E-4 - 0.17055400448651E-4 0.17024191795278E-4 0.16993037374625E-4 0.16961937098834E-4 0.1693089088018E-4 - 0.16899898631072E-4 0.16868960264096E-4 0.1683807569199E-4 0.16807244827512E-4 0.16776467583521E-4 - 0.16745743873002E-4 0.16715073609182E-4 0.16684456705375E-4 0.16653893075023E-4 0.16623382631703E-4 - 0.16592925289117E-4 0.16562520961082E-4 0.16532169561559E-4 0.16501871004625E-4 0.164716252044E-4 - 0.16441432075128E-4 0.16411291531267E-4 0.16381203487411E-4 0.16351167858256E-4 0.16321184558654E-4 - 0.16291253503611E-4 0.1626137460818E-4 0.1623154778746E-4 0.16201772956653E-4 0.16172050031267E-4 - 0.16142378926896E-4 0.16112759559247E-4 0.16083191844113E-4 0.16053675697404E-4 0.16024211035205E-4 - 0.15994797773734E-4 0.15965435829361E-4 0.15936125118402E-4 0.15906865557322E-4 0.15877657062783E-4 - 0.15848499551631E-4 0.15819392940824E-4 0.15790337147376E-4 0.15761332088438E-4 0.15732377681287E-4 - 0.15703473843352E-4 0.15674620492208E-4 0.15645817545458E-4 0.15617064920815E-4 0.15588362536109E-4 - 0.155597103094E-4 0.15531108158843E-4 0.15502556002691E-4 0.154740537593E-4 0.15445601347132E-4 - 0.15417198684842E-4 0.15388845691209E-4 0.15360542285104E-4 0.15332288385409E-4 0.15304083911121E-4 - 0.1527592878149E-4 0.15247822915891E-4 0.15219766233813E-4 0.15191758654789E-4 0.15163800098479E-4 - 0.15135890484701E-4 0.15108029733416E-4 0.15080217764749E-4 0.15052454498762E-4 0.1502473985566E-4 - 0.14997073755804E-4 0.1496945611977E-4 0.14941886868213E-4 0.14914365921899E-4 0.14886893201723E-4 - 0.14859468628713E-4 0.14832092123883E-4 0.14804763608378E-4 0.14777483003536E-4 0.14750250230858E-4 - 0.14723065211967E-4 0.14695927868503E-4 0.14668838122231E-4 0.14641795895062E-4 0.14614801109094E-4 - 0.14587853686579E-4 0.14560953549701E-4 0.14534100620751E-4 0.14507294822141E-4 0.14480536076567E-4 - 0.14453824306792E-4 0.14427159435661E-4 0.14400541386115E-4 0.14373970081207E-4 0.14347445444151E-4 - 0.14320967398283E-4 0.14294535867021E-4 0.14268150773832E-4 0.14241812042289E-4 0.14215519596177E-4 - 0.14189273359386E-4 0.14163073255915E-4 0.14136919209853E-4 0.14110811145405E-4 0.14084748986902E-4 - 0.14058732658796E-4 0.14032762085683E-4 0.14006837192154E-4 0.1398095790293E-4 0.13955124142878E-4 - 0.13929335837038E-4 0.13903592910549E-4 0.13877895288625E-4 0.13852242896591E-4 0.13826635659885E-4 - 0.13801073504087E-4 0.137755563549E-4 0.13750084138082E-4 0.13724656779483E-4 0.13699274205054E-4 - 0.13673936340932E-4 0.13648643113329E-4 0.13623394448585E-4 0.13598190273188E-4 0.13573030513772E-4 - 0.13547915096935E-4 0.13522843949393E-4 0.13497816997983E-4 0.13472834169744E-4 0.13447895391796E-4 - 0.13423000591354E-4 0.13398149695736E-4 0.13373342632363E-4 0.13348579328801E-4 0.13323859712738E-4 - 0.13299183711929E-4 0.13274551254172E-4 0.13249962267352E-4 0.13225416679607E-4 0.13200914419166E-4 - 0.13176455414355E-4 0.13152039593564E-4 0.13127666885303E-4 0.13103337218198E-4 0.13079050520989E-4 - 0.1305480672254E-4 0.13030605751743E-4 0.13006447537601E-4 0.12982332009266E-4 0.12958259096048E-4 - 0.12934228727369E-4 0.12910240832673E-4 0.12886295341508E-4 0.12862392183542E-4 0.12838531288632E-4 - 0.12814712586754E-4 0.12790936007877E-4 0.1276720148205E-4 0.12743508939413E-4 0.12719858310357E-4 - 0.12696249525335E-4 0.12672682514893E-4 0.12649157209664E-4 0.12625673540384E-4 0.12602231437925E-4 - 0.12578830833268E-4 0.12555471657493E-4 0.12532153841702E-4 0.12508877317102E-4 0.12485642015077E-4 - 0.12462447867124E-4 0.12439294804819E-4 0.12416182759887E-4 0.12393111664171E-4 0.12370081449557E-4 - 0.12347092047966E-4 0.12324143391409E-4 0.12301235412136E-4 0.12278368042474E-4 0.12255541214845E-4 - 0.12232754861747E-4 0.12210008915784E-4 0.12187303309682E-4 0.12164637976279E-4 0.12142012848531E-4 - 0.12119427859394E-4 0.12096882941934E-4 0.12074378029369E-4 0.12051913055062E-4 0.12029487952473E-4 - 0.12007102655109E-4 0.11984757096583E-4 0.11962451210616E-4 0.1194018493107E-4 0.11917958191926E-4 - 0.11895770927176E-4 0.11873623070896E-4 0.11851514557252E-4 0.11829445320642E-4 0.11807415295536E-4 - 0.11785424416467E-4 0.11763472618045E-4 0.11741559834963E-4 0.11719686002099E-4 0.11697851054425E-4 - 0.11676054926991E-4 0.11654297554862E-4 0.11632578873211E-4 0.11610898817379E-4 0.11589257322817E-4 - 0.11567654325066E-4 0.11546089759733E-4 0.11524563562519E-4 0.11503075669258E-4 0.11481626015905E-4 - 0.11460214538556E-4 0.11438841173236E-4 0.11417505856091E-4 0.11396208523394E-4 0.11374949111609E-4 - 0.11353727557261E-4 0.11332543796967E-4 0.11311397767453E-4 0.11290289405559E-4 0.11269218648095E-4 - 0.11248185431984E-4 0.11227189694307E-4 0.11206231372284E-4 0.11185310403209E-4 0.11164426724457E-4 - 0.11143580273498E-4 0.11122770987907E-4 0.11101998805366E-4 0.1108126366369E-4 0.11060565500682E-4 - 0.11039904254235E-4 0.11019279862335E-4 0.10998692263181E-4 0.10978141395041E-4 0.10957627196242E-4 - 0.10937149605189E-4 0.10916708560356E-4 0.10896304000418E-4 0.10875935864139E-4 0.10855604090339E-4 - 0.10835308617828E-4 0.1081504938551E-4 0.10794826332506E-4 0.10774639398033E-4 0.10754488521388E-4 - 0.10734373641925E-4 0.10714294699097E-4 0.10694251632468E-4 0.106742443817E-4 0.1065427288659E-4 - 0.1063433708689E-4 0.10614436922469E-4 0.10594572333319E-4 0.10574743259598E-4 0.10554949641537E-4 - 0.10535191419426E-4 0.10515468533653E-4 0.10495780924698E-4 0.1047612853314E-4 0.10456511299665E-4 - 0.10436929165011E-4 0.10417382069992E-4 0.10397869955502E-4 0.10378392762599E-4 0.10358950432397E-4 - 0.10339542906118E-4 0.103201701251E-4 0.10300832030819E-4 0.10281528564685E-4 0.10262259668195E-4 - 0.10243025282947E-4 0.10223825350783E-4 0.10204659813604E-4 0.10185528613338E-4 0.10166431691981E-4 - 0.10147368991613E-4 0.10128340454475E-4 0.10109346022888E-4 0.10090385639242E-4 0.10071459245973E-4 - 0.10052566785609E-4 0.10033708200815E-4 0.10014883434341E-4 0.99960924290196E-5 0.99773351277725E-5 - 0.99586114736043E-5 0.99399214096218E-5 0.99212648790253E-5 0.99026418251304E-5 0.9884052191236E-5 - 0.98654959207454E-5 0.9846972957182E-5 0.98284832442064E-5 0.98100267255547E-5 0.97916033450194E-5 - 0.9773213046482E-5 0.97548557739142E-5 0.97365314713972E-5 0.97182400831149E-5 0.96999815532801E-5 - 0.96817558261756E-5 0.96635628461587E-5 0.96454025577695E-5 0.96272749056077E-5 0.9609179834349E-5 - 0.95911172887422E-5 0.95730872136189E-5 0.95550895539244E-5 0.95371242546953E-5 0.95191912610485E-5 - 0.95012905181195E-5 0.94834219711304E-5 0.94655855654496E-5 0.94477812465428E-5 0.94300089599495E-5 - 0.94122686512903E-5 0.93945602662892E-5 0.93768837507237E-5 0.9359239050419E-5 0.93416261112741E-5 - 0.93240448793564E-5 0.93064953008047E-5 0.92889773218358E-5 0.92714908887294E-5 0.92540359478488E-5 - 0.92366124456633E-5 0.92192203287331E-5 0.92018595437192E-5 0.91845300372739E-5 0.9167231756147E-5 - 0.91499646472059E-5 0.91327286574372E-5 0.91155237339002E-5 0.9098349823714E-5 0.90812068740825E-5 - 0.90640948322958E-5 0.90470136457441E-5 0.90299632619185E-5 0.90129436283288E-5 0.89959546925552E-5 - 0.89789964022521E-5 0.89620687052491E-5 0.89451715494331E-5 0.89283048827576E-5 0.89114686532434E-5 - 0.88946628089884E-5 0.88778872982078E-5 0.88611420692019E-5 0.88444270703403E-5 0.88277422500115E-5 - 0.88110875566837E-5 0.87944629389743E-5 0.87778683455887E-5 0.87613037253082E-5 0.87447690269617E-5 - 0.87282641994634E-5 0.8711789191822E-5 0.8695343953138E-5 0.86789284326153E-5 0.86625425794515E-5 - 0.86461863429345E-5 0.86298596724467E-5 0.86135625175027E-5 0.85972948276746E-5 0.85810565526158E-5 - 0.85648476420648E-5 0.85486680458521E-5 0.85325177138084E-5 0.8516396595853E-5 0.85003046420232E-5 - 0.84842418024635E-5 0.84682080273918E-5 0.84522032670727E-5 0.84362274718531E-5 0.84202805921648E-5 - 0.84043625785375E-5 0.83884733815958E-5 0.83726129519768E-5 0.83567812403894E-5 0.83409781976185E-5 - 0.83252037746021E-5 0.83094579223353E-5 0.8293740591877E-5 0.82780517343549E-5 0.82623913009688E-5 - 0.82467592430264E-5 0.82311555119156E-5 0.82155800590859E-5 0.82000328360154E-5 0.81845137942559E-5 - 0.8169022885499E-5 0.81535600615137E-5 0.81381252741406E-5 0.8122718475272E-5 0.8107339616878E-5 - 0.80919886510215E-5 0.80766655298503E-5 0.80613702056136E-5 0.80461026305432E-5 0.80308627569619E-5 - 0.80156505372872E-5 0.80004659240592E-5 0.79853088698802E-5 0.79701793274082E-5 0.79550772493752E-5 - 0.79400025885893E-5 0.79249552979516E-5 0.79099353304491E-5 0.78949426391046E-5 0.78799771769968E-5 - 0.7865038897265E-5 0.78501277532038E-5 0.78352436981556E-5 0.78203866855374E-5 0.78055566688425E-5 - 0.77907536016616E-5 0.77759774375935E-5 0.77612281303096E-5 0.77465056335573E-5 0.77318099012211E-5 - 0.77171408872419E-5 0.77024985456198E-5 0.76878828304219E-5 0.76732936957839E-5 0.76587310959432E-5 - 0.76441949852139E-5 0.76296853179628E-5 0.76152020485889E-5 0.76007451315576E-5 0.75863145214752E-5 - 0.75719101730161E-5 0.75575320409218E-5 0.75431800799821E-5 0.7528854245061E-5 0.75145544911109E-5 - 0.75002807731634E-5 0.74860330463423E-5 0.74718112657596E-5 0.74576153866118E-5 0.74434453641911E-5 - 0.74293011539023E-5 0.74151827112135E-5 0.74010899916338E-5 0.73870229507441E-5 0.73729815441987E-5 - 0.7358965727748E-5 0.73449754572235E-5 0.73310106884811E-5 0.73170713774323E-5 0.73031574800494E-5 - 0.72892689524502E-5 0.72754057508017E-5 0.72615678313342E-5 0.72477551503378E-5 0.72339676641693E-5 - 0.72202053292787E-5 0.72064681021902E-5 0.71927559394947E-5 0.71790687977958E-5 0.7165406633769E-5 - 0.71517694042064E-5 0.71381570659806E-5 0.71245695760236E-5 0.71110068913347E-5 0.70974689689965E-5 - 0.70839557661367E-5 0.70704672399212E-5 0.70570033475767E-5 0.70435640464634E-5 0.70301492940014E-5 - 0.70167590476741E-5 0.7003393265015E-5 0.69900519036248E-5 0.69767349211919E-5 0.6963442275479E-5 - 0.69501739243323E-5 0.69369298255879E-5 0.6923709937162E-5 0.69105142170654E-5 0.68973426234075E-5 - 0.68841951143563E-5 0.68710716481286E-5 0.68579721830104E-5 0.68448966773567E-5 0.68318450896005E-5 - 0.68188173782538E-5 0.6805813501855E-5 0.67928334190006E-5 0.67798770883481E-5 0.67669444686813E-5 - 0.67540355188337E-5 0.67411501976985E-5 0.67282884642283E-5 0.67154502774393E-5 0.67026355964324E-5 - 0.66898443803793E-5 0.66770765885103E-5 0.66643321800766E-5 0.66516111143961E-5 0.66389133508958E-5 - 0.6626238849074E-5 0.66135875684896E-5 0.66009594687533E-5 0.65883545095427E-5 0.65757726506069E-5 - 0.65632138517659E-5 0.65506780729229E-5 0.65381652739845E-5 0.65256754149283E-5 0.65132084558063E-5 - 0.65007643567772E-5 0.64883430780465E-5 0.64759445798858E-5 0.64635688226351E-5 0.64512157667098E-5 - 0.64388853725272E-5 0.64265776005767E-5 0.64142924114399E-5 0.64020297657848E-5 0.63897896243369E-5 - 0.63775719478671E-5 0.63653766972118E-5 0.63532038332745E-5 0.63410533170337E-5 0.63289251095452E-5 - 0.63168191718802E-5 0.63047354651675E-5 0.62926739505961E-5 0.62806345894774E-5 0.62686173431695E-5 - 0.62566221730831E-5 0.62446490406844E-5 0.62326979074998E-5 0.62207687351378E-5 0.62088614852725E-5 - 0.61969761196303E-5 0.61851125999635E-5 0.61732708880843E-5 0.61614509459128E-5 0.61496527354328E-5 - 0.61378762186861E-5 0.612612135776E-5 0.61143881148059E-5 0.61026764520443E-5 0.60909863317623E-5 - 0.6079317716325E-5 0.60676705681003E-5 0.60560448495257E-5 0.60444405231117E-5 0.60328575514641E-5 - 0.60212958972404E-5 0.60097555231444E-5 0.59982363919394E-5 0.59867384664494E-5 0.59752617095764E-5 - 0.59638060842912E-5 0.59523715535935E-5 0.59409580805264E-5 0.59295656281828E-5 0.59181941597798E-5 - 0.59068436385742E-5 0.58955140278832E-5 0.58842052910866E-5 0.58729173916423E-5 0.5861650293016E-5 - 0.58504039587311E-5 0.58391783523718E-5 0.58279734376315E-5 0.58167891782488E-5 0.58056255380128E-5 - 0.57944824807677E-5 0.57833599704136E-5 0.57722579709283E-5 0.57611764463521E-5 0.57501153607701E-5 - 0.57390746782949E-5 0.57280543630939E-5 0.57170543794415E-5 0.57060746916686E-5 0.56951152641608E-5 - 0.56841760613452E-5 0.5673257047708E-5 0.56623581878061E-5 0.56514794462605E-5 0.5640620787767E-5 - 0.56297821770132E-5 0.56189635787564E-5 0.56081649578274E-5 0.55973862791447E-5 0.55866275076765E-5 - 0.55758886084329E-5 0.55651695464812E-5 0.5554470286947E-5 0.55437907950277E-5 0.55331310359864E-5 - 0.55224909751107E-5 0.55118705777335E-5 0.55012698092362E-5 0.54906886351143E-5 0.54801270209038E-5 - 0.54695849321928E-5 0.54590623346188E-5 0.54485591938744E-5 0.54380754757217E-5 0.54276111459826E-5 - 0.54171661705349E-5 0.54067405152697E-5 0.53963341461363E-5 0.53859470291737E-5 0.53755791304871E-5 - 0.53652304162286E-5 0.53549008526069E-5 0.53445904058962E-5 0.53342990424089E-5 0.53240267284868E-5 - 0.53137734305222E-5 0.53035391150115E-5 0.52933237485003E-5 0.52831272975859E-5 0.52729497289074E-5 - 0.52627910091591E-5 0.52526511051019E-5 0.52425299835556E-5 0.5232427611407E-5 0.52223439555421E-5 - 0.52122789829097E-5 0.5202232660532E-5 0.51922049555091E-5 0.51821958349901E-5 0.51722052661616E-5 - 0.51622332162658E-5 0.51522796526005E-5 0.51423445425297E-5 0.51324278534803E-5 0.51225295528993E-5 - 0.51126496082789E-5 0.51027879871582E-5 0.50929446571858E-5 0.50831195860494E-5 0.50733127414815E-5 - 0.50635240912582E-5 0.50537536032062E-5 0.50440012452273E-5 0.5034266985279E-5 0.50245507913668E-5 - 0.50148526315084E-5 0.50051724737751E-5 0.49955102863293E-5 0.4985866037392E-5 0.49762396952341E-5 - 0.49666312281616E-5 0.4957040604536E-5 0.49474677927772E-5 0.49379127613632E-5 0.49283754788374E-5 - 0.49188559137489E-5 0.49093540347033E-5 0.48998698103647E-5 0.48904032094802E-5 0.4880954200835E-5 - 0.48715227532696E-5 0.48621088356788E-5 0.48527124170187E-5 0.48433334662486E-5 0.48339719523863E-5 - 0.48246278445204E-5 0.48153011118076E-5 0.48059917234508E-5 0.47966996486925E-5 0.47874248568274E-5 - 0.4778167317203E-5 0.4768926999227E-5 0.47597038723675E-5 0.47504979061093E-5 0.47413090699829E-5 - 0.47321373335672E-5 0.47229826665343E-5 0.47138450385949E-5 0.47047244195037E-5 0.46956207790604E-5 - 0.46865340871116E-5 0.46774643135728E-5 0.46684114284124E-5 0.46593754016403E-5 0.4650356203283E-5 - 0.46413538034154E-5 0.46323681722019E-5 0.46233992798595E-5 0.46144470966518E-5 0.46055115928743E-5 - 0.45965927388745E-5 0.45876905050568E-5 0.45788048618806E-5 0.45699357798673E-5 0.45610832295396E-5 - 0.45522471814759E-5 0.45434276063122E-5 0.45346244747629E-5 0.45258377575835E-5 0.45170674255662E-5 - 0.45083134495512E-5 0.44995758004268E-5 0.44908544491435E-5 0.44821493667065E-5 0.44734605241459E-5 - 0.44647878925261E-5 0.44561314429501E-5 0.44474911466238E-5 0.44388669747862E-5 0.44302588987224E-5 - 0.44216668897611E-5 0.44130909192935E-5 0.44045309587253E-5 0.43959869795078E-5 0.43874589531391E-5 - 0.43789468512047E-5 0.43704506453271E-5 0.43619703071683E-5 0.43535058084338E-5 0.43450571208747E-5 - 0.43366242163022E-5 0.43282070665776E-5 0.43198056435997E-5 0.43114199192912E-5 0.43030498656185E-5 - 0.42946954546309E-5 0.42863566584238E-5 0.42780334491363E-5 0.42697257989426E-5 0.42614336800645E-5 - 0.42531570647777E-5 0.42448959254077E-5 0.42366502343399E-5 0.42284199639584E-5 0.42202050867019E-5 - 0.42120055750659E-5 0.4203821401616E-5 0.41956525389576E-5 0.41874989597298E-5 0.41793606366174E-5 - 0.41712375423515E-5 0.41631296497215E-5 0.4155036931569E-5 0.41469593607558E-5 0.41388969101792E-5 - 0.41308495527734E-5 0.41228172615681E-5 0.41148000096249E-5 0.41067977700461E-5 0.40988105159703E-5 - 0.40908382205811E-5 0.4082880857119E-5 0.40749383988722E-5 0.40670108191739E-5 0.40590980913695E-5 - 0.40512001888508E-5 0.40433170850794E-5 0.40354487535702E-5 0.40275951678769E-5 0.40197563015949E-5 - 0.40119321283722E-5 0.40041226218895E-5 0.39963277558525E-5 0.39885475040075E-5 0.39807818401801E-5 - 0.39730307382367E-5 0.3965294172085E-5 0.39575721156679E-5 0.39498645429736E-5 0.39421714280391E-5 - 0.39344927449476E-5 0.39268284678348E-5 0.39191785708437E-5 0.39115430281657E-5 0.39039218140483E-5 - 0.38963149028E-5 0.38887222687681E-5 0.38811438863325E-5 0.38735797299166E-5 0.38660297739882E-5 - 0.38584939930668E-5 0.38509723617215E-5 0.38434648545405E-5 0.38359714461489E-5 0.38284921112097E-5 - 0.38210268244658E-5 0.38135755606919E-5 0.38061382947024E-5 0.37987150013494E-5 0.37913056555257E-5 - 0.37839102321785E-5 0.37765287062999E-5 0.37691610529214E-5 0.37618072470885E-5 0.37544672638893E-5 - 0.37471410784796E-5 0.37398286660623E-5 0.37325300018803E-5 0.37252450612057E-5 0.37179738193545E-5 - 0.37107162516888E-5 0.37034723336183E-5 0.36962420406042E-5 0.36890253481126E-5 0.36818222316539E-5 - 0.36746326667846E-5 0.36674566291314E-5 0.36602940943521E-5 0.3653145038143E-5 0.36460094362425E-5 - 0.36388872644384E-5 0.36317784985259E-5 0.36246831143455E-5 0.36176010877926E-5 0.36105323948181E-5 - 0.36034770114095E-5 0.35964349135853E-5 0.35894060774058E-5 0.35823904789733E-5 0.35753880944362E-5 - 0.35683988999903E-5 0.35614228718491E-5 0.35544599862623E-5 0.35475102195175E-5 0.35405735479748E-5 - 0.35336499480251E-5 0.35267393960955E-5 0.35198418686496E-5 0.35129573421899E-5 0.35060857932673E-5 - 0.34992271984742E-5 0.34923815344394E-5 0.34855487778113E-5 0.34787289052776E-5 0.34719218935877E-5 - 0.3465127719532E-5 0.34583463599386E-5 0.34515777916685E-5 0.34448219916223E-5 0.34380789367441E-5 - 0.34313486040205E-5 0.34246309704876E-5 0.34179260131848E-5 0.34112337091946E-5 0.34045540356451E-5 - 0.33978869697266E-5 0.33912324886618E-5 0.3384590569704E-5 0.33779611901443E-5 0.33713443273126E-5 - 0.33647399585854E-5 0.33581480613823E-5 0.3351568613146E-5 0.33450015913469E-5 0.33384469734869E-5 - 0.33319047371452E-5 0.33253748599284E-5 0.33188573194806E-5 0.33123520934831E-5 0.33058591596672E-5 - 0.32993784957723E-5 0.32929100795726E-5 0.32864538888791E-5 0.32800099015788E-5 0.32735780955871E-5 - 0.32671584488479E-5 0.32607509393377E-5 0.32543555450691E-5 0.3247972244106E-5 0.32416010145506E-5 - 0.32352418345364E-5 0.3228894682216E-5 0.32225595357782E-5 0.32162363734728E-5 0.3209925173587E-5 - 0.3203625914443E-5 0.31973385743931E-5 0.31910631318274E-5 0.31847995651771E-5 0.31785478529124E-5 - 0.31723079735494E-5 0.31660799056096E-5 0.31598636276563E-5 0.31536591182962E-5 0.314746635619E-5 - 0.31412853200308E-5 0.31351159885391E-5 0.31289583404715E-5 0.31228123546209E-5 0.31166780098276E-5 - 0.31105552849731E-5 0.31044441589552E-5 0.30983446106993E-5 0.30922566191588E-5 0.30861801633656E-5 - 0.30801152223766E-5 0.30740617752805E-5 0.30680198011923E-5 0.30619892792624E-5 0.30559701886899E-5 - 0.30499625087118E-5 0.30439662186007E-5 0.30379812976378E-5 0.30320077251427E-5 0.3026045480487E-5 - 0.30200945430847E-5 0.30141548923789E-5 0.3008226507852E-5 0.30023093690258E-5 0.29964034554486E-5 - 0.29905087466872E-5 0.29846252223408E-5 0.29787528620715E-5 0.29728916455748E-5 0.29670415525791E-5 - 0.2961202562836E-5 0.29553746561324E-5 0.29495578122991E-5 0.29437520112057E-5 0.29379572327645E-5 - 0.29321734568861E-5 0.29264006635222E-5 0.29206388326681E-5 0.29148879443701E-5 0.29091479787049E-5 - 0.29034189157731E-5 0.289770073571E-5 0.28919934186857E-5 0.2886296944913E-5 0.28806112946435E-5 - 0.28749364481444E-5 0.28692723857112E-5 0.28636190876692E-5 0.28579765344085E-5 0.28523447063444E-5 - 0.28467235839231E-5 0.284111314762E-5 0.28355133779431E-5 0.28299242554432E-5 0.28243457607067E-5 - 0.28187778743521E-5 0.28132205770066E-5 0.2807673849331E-5 0.28021376720423E-5 0.27966120258965E-5 - 0.27910968916822E-5 0.27855922502048E-5 0.27800980823062E-5 0.27746143688655E-5 0.27691410908001E-5 - 0.27636782290668E-5 0.27582257646301E-5 0.2752783678489E-5 0.27473519516782E-5 0.27419305652837E-5 - 0.2736519500417E-5 0.27311187382226E-5 0.27257282598783E-5 0.27203480466009E-5 0.27149780796143E-5 - 0.2709618340179E-5 0.27042688095963E-5 0.26989294692101E-5 0.26936003003928E-5 0.26882812845451E-5 - 0.26829724030997E-5 0.26776736375225E-5 0.26723849693155E-5 0.26671063800187E-5 0.26618378511856E-5 - 0.26565793643978E-5 0.26513309012665E-5 0.26460924434607E-5 0.26408639726737E-5 0.26356454706277E-5 - 0.26304369190743E-5 0.26252382997946E-5 0.26200495946107E-5 0.26148707853784E-5 0.26097018539812E-5 - 0.26045427823126E-5 0.25993935522972E-5 0.25942541459131E-5 0.25891245451725E-5 0.25840047321167E-5 - 0.25788946888097E-5 0.25737943973481E-5 0.25687038398625E-5 0.25636229985174E-5 0.25585518555154E-5 - 0.25534903930642E-5 0.25484385934049E-5 0.25433964388137E-5 0.25383639116164E-5 0.25333409941644E-5 - 0.25283276688329E-5 0.25233239180271E-5 0.25183297241821E-5 0.25133450697709E-5 0.25083699372996E-5 - 0.2503404309294E-5 0.24984481683024E-5 0.24935014968992E-5 0.24885642777146E-5 0.24836364934027E-5 - 0.24787181266468E-5 0.247380916016E-5 0.24689095766934E-5 0.24640193590085E-5 0.24591384898953E-5 - 0.24542669521725E-5 0.2449404728711E-5 0.24445518024049E-5 0.24397081561758E-5 0.24348737729732E-5 - 0.24300486357763E-5 0.24252327275979E-5 0.24204260314825E-5 0.24156285305006E-5 0.24108402077377E-5 - 0.2406061046308E-5 0.24012910293736E-5 0.23965301401265E-5 0.23917783617863E-5 0.23870356775967E-5 - 0.23823020708315E-5 0.23775775247959E-5 0.23728620228253E-5 0.23681555482915E-5 0.23634580845717E-5 - 0.23587696150759E-5 0.23540901232476E-5 0.23494195925723E-5 0.23447580065603E-5 0.23401053487472E-5 - 0.23354616026976E-5 0.23308267520051E-5 0.23262007802932E-5 0.23215836712176E-5 0.23169754084534E-5 - 0.23123759756995E-5 0.2307785356678E-5 0.2303203535164E-5 0.22986304949554E-5 0.22940662198753E-5 - 0.22895106937685E-5 0.22849639005061E-5 0.22804258240002E-5 0.22758964481923E-5 0.22713757570529E-5 - 0.2266863734557E-5 0.22623603647106E-5 0.22578656315604E-5 0.22533795191868E-5 0.22489020116943E-5 - 0.22444330932153E-5 0.22399727479138E-5 0.22355209599761E-5 0.22310777136042E-5 0.2226642993026E-5 - 0.22222167825162E-5 0.22177990663764E-5 0.22133898289342E-5 0.22089890545369E-5 0.22045967275598E-5 - 0.22002128324111E-5 0.21958373535288E-5 0.21914702753844E-5 0.21871115824517E-5 0.21827612592362E-5 - 0.21784192902766E-5 0.21740856601492E-5 0.21697603534546E-5 0.2165443354815E-5 0.21611346488795E-5 - 0.21568342203244E-5 0.21525420538595E-5 0.21482581342259E-5 0.21439824461757E-5 0.21397149744818E-5 - 0.2135455703939E-5 0.21312046193999E-5 0.2126961705736E-5 0.21227269478413E-5 0.21185003306296E-5 - 0.21142818390414E-5 0.21100714580522E-5 0.21058691726655E-5 0.210167496791E-5 0.2097488828823E-5 - 0.20933107404687E-5 0.20891406879521E-5 0.20849786564083E-5 0.20808246309974E-5 0.2076678596898E-5 - 0.20725405393161E-5 0.20684104434861E-5 0.20642882946713E-5 0.2060174078167E-5 0.20560677792736E-5 - 0.20519693833179E-5 0.20478788756547E-5 0.20437962416828E-5 0.20397214668201E-5 0.20356545365094E-5 - 0.20315954362195E-5 0.20275441514511E-5 0.20235006677071E-5 0.2019464970519E-5 0.20154370454523E-5 - 0.2011416878108E-5 0.2007404454109E-5 0.20033997590992E-5 0.19994027787486E-5 0.19954134987527E-5 - 0.19914319048333E-5 0.1987457982741E-5 0.19834917182414E-5 0.19795330971228E-5 0.19755821051961E-5 - 0.19716387283156E-5 0.19677029523557E-5 0.19637747632131E-5 0.1959854146806E-5 0.19559410890765E-5 - 0.19520355759995E-5 0.19481375935758E-5 0.1944247127829E-5 0.19403641647913E-5 0.1936488690519E-5 - 0.19326206911101E-5 0.19287601526906E-5 0.19249070614104E-5 0.19210614034309E-5 0.191722316494E-5 - 0.19133923321547E-5 0.19095688913209E-5 0.19057528287126E-5 0.19019441306098E-5 0.18981427833188E-5 - 0.18943487731723E-5 0.18905620865371E-5 0.18867827098E-5 0.1883010629372E-5 0.18792458316881E-5 - 0.18754883032064E-5 0.18717380304126E-5 0.18679949998189E-5 0.18642591979536E-5 0.18605306113618E-5 - 0.18568092266093E-5 0.18530950303055E-5 0.18493880090788E-5 0.18456881495806E-5 0.1841995438485E-5 - 0.1838309862495E-5 0.18346314083249E-5 0.1830960062711E-5 0.18272958124121E-5 0.18236386442252E-5 - 0.18199885449659E-5 0.18163455014723E-5 0.18127095006049E-5 0.18090805292463E-5 0.18054585743076E-5 - 0.18018436227246E-5 0.17982356614532E-5 0.1794634677459E-5 0.17910406577301E-5 0.17874535892937E-5 - 0.17838734592004E-5 0.17803002545226E-5 0.17767339623508E-5 0.17731745697984E-5 0.17696220640051E-5 - 0.1766076432135E-5 0.17625376613815E-5 0.17590057389387E-5 0.17554806520266E-5 0.17519623878919E-5 - 0.17484509338171E-5 0.17449462771045E-5 0.17414484050721E-5 0.173795730506E-5 0.17344729644308E-5 - 0.17309953705761E-5 0.17275245109115E-5 0.17240603728665E-5 0.17206029438873E-5 0.17171522114397E-5 - 0.17137081630303E-5 0.17102707861837E-5 0.17068400684449E-5 0.17034159973778E-5 0.16999985605675E-5 - 0.16965877456276E-5 0.16931835401955E-5 0.16897859319322E-5 0.16863949084998E-5 0.16830104575842E-5 - 0.1679632566906E-5 0.16762612242142E-5 0.16728964172767E-5 0.16695381338795E-5 0.16661863618342E-5 - 0.16628410889705E-5 0.165950230313E-5 0.16561699921748E-5 0.16528441440032E-5 0.16495247465342E-5 - 0.16462117877076E-5 0.16429052554797E-5 0.16396051378282E-5 0.16363114227568E-5 0.16330240982921E-5 - 0.16297431524893E-5 0.16264685734001E-5 0.16232003491023E-5 0.16199384677002E-5 0.16166829173314E-5 - 0.16134336861522E-5 0.16101907623325E-5 0.16069541340633E-5 0.16037237895568E-5 0.16004997170527E-5 - 0.15972819048144E-5 0.15940703411147E-5 0.15908650142426E-5 0.15876659125042E-5 0.15844730242495E-5 - 0.15812863378432E-5 0.15781058416686E-5 0.15749315241268E-5 0.15717633736406E-5 0.15686013786544E-5 - 0.15654455276342E-5 0.15622958090666E-5 0.15591522114509E-5 0.15560147233067E-5 0.15528833331816E-5 - 0.15497580296467E-5 0.15466388012931E-5 0.15435256367267E-5 0.15404185245743E-5 0.15373174534852E-5 - 0.15342224121319E-5 0.15311333892116E-5 0.15280503734257E-5 0.15249733534962E-5 0.15219023181664E-5 - 0.15188372562149E-5 0.1515778156436E-5 0.15127250076414E-5 0.15096777986629E-5 0.15066365183567E-5 - 0.15036011555844E-5 0.15005716992294E-5 0.14975481382002E-5 0.14945304614322E-5 0.1491518657879E-5 - 0.1488512716509E-5 0.14855126263107E-5 0.14825183762924E-5 0.14795299554878E-5 0.1476547352953E-5 - 0.14735705577521E-5 0.14705995589661E-5 0.14676343456938E-5 0.14646749070695E-5 0.14617212322428E-5 - 0.14587733103809E-5 0.14558311306685E-5 0.14528946823091E-5 0.14499639545304E-5 0.14470389365799E-5 - 0.14441196177238E-5 0.14412059872365E-5 0.14382980344122E-5 0.14353957485734E-5 0.14324991190633E-5 - 0.14296081352427E-5 0.14267227864907E-5 0.1423843062206E-5 0.14209689518062E-5 0.14181004447277E-5 - 0.14152375304296E-5 0.14123801983797E-5 0.1409528438065E-5 0.14066822389933E-5 0.14038415906996E-5 - 0.14010064827353E-5 0.13981769046686E-5 0.13953528460861E-5 0.13925342965928E-5 0.13897212458156E-5 - 0.13869136834016E-5 0.13841115990115E-5 0.13813149823194E-5 0.1378523823017E-5 0.13757381108255E-5 - 0.13729578354816E-5 0.13701829867404E-5 0.13674135543786E-5 0.13646495281957E-5 0.13618908979953E-5 - 0.13591376535969E-5 0.13563897848367E-5 0.13536472815899E-5 0.13509101337449E-5 0.13481783312039E-5 - 0.13454518638833E-5 0.13427307217181E-5 0.13400148946668E-5 0.13373043727061E-5 0.13345991458295E-5 - 0.13318992040408E-5 0.13292045373615E-5 0.13265151358397E-5 0.13238309895423E-5 0.13211520885534E-5 - 0.13184784229713E-5 0.13158099829122E-5 0.13131467585129E-5 0.131048873993E-5 0.1307835917342E-5 - 0.13051882809296E-5 0.13025458208939E-5 0.12999085274556E-5 0.12972763908607E-5 0.12946494013698E-5 - 0.12920275492611E-5 0.12894108248301E-5 0.12867992183903E-5 0.12841927202718E-5 0.12815913208241E-5 - 0.12789950104106E-5 0.12764037794093E-5 0.12738176182141E-5 0.12712365172457E-5 0.12686604669397E-5 - 0.12660894577486E-5 0.12635234801417E-5 0.12609625246053E-5 0.12584065816438E-5 0.12558556417798E-5 - 0.1253309695554E-5 0.1250768733514E-5 0.12482327462251E-5 0.12457017242764E-5 0.12431756582784E-5 - 0.12406545388561E-5 0.12381383566512E-5 0.12356271023247E-5 0.12331207665517E-5 0.12306193400144E-5 - 0.12281228134106E-5 0.12256311774697E-5 0.12231444229378E-5 0.12206625405772E-5 0.12181855211598E-5 - 0.12157133554758E-5 0.12132460343345E-5 0.12107835485631E-5 0.12083258890093E-5 0.12058730465229E-5 - 0.12034250119721E-5 0.12009817762455E-5 0.11985433302569E-5 0.11961096649349E-5 0.11936807712189E-5 - 0.11912566400644E-5 0.11888372624438E-5 0.11864226293501E-5 0.11840127317933E-5 0.11816075607944E-5 - 0.11792071073885E-5 0.11768113626246E-5 0.11744203175793E-5 0.11720339633425E-5 0.11696522910189E-5 - 0.11672752917269E-5 0.11649029566004E-5 0.1162535276794E-5 0.11601722434789E-5 0.11578138478425E-5 - 0.11554600810793E-5 0.11531109344006E-5 0.11507663990394E-5 0.1148426466247E-5 0.11460911272898E-5 - 0.1143760373447E-5 0.11414341960139E-5 0.11391125863028E-5 0.11367955356441E-5 0.11344830353874E-5 - 0.11321750768854E-5 0.11298716515066E-5 0.11275727506357E-5 0.11252783656861E-5 0.11229884880829E-5 - 0.11207031092648E-5 0.11184222206858E-5 0.11161458138195E-5 0.11138738801428E-5 0.11116064111502E-5 - 0.1109343398355E-5 0.11070848332917E-5 0.11048307075079E-5 0.11025810125657E-5 0.11003357400425E-5 - 0.10980948815316E-5 0.1095858428641E-5 0.10936263729968E-5 0.10913987062344E-5 0.10891754200028E-5 - 0.10869565059645E-5 0.1084741955808E-5 0.10825317612336E-5 0.1080325913956E-5 0.10781244057043E-5 - 0.10759272282214E-5 0.10737343732696E-5 0.10715458326273E-5 0.10693615980873E-5 0.1067181661445E-5 - 0.10650060145112E-5 0.1062834649122E-5 0.10606675571312E-5 0.10585047304065E-5 0.10563461608243E-5 - 0.10541918402771E-5 0.10520417606737E-5 0.10498959139401E-5 0.10477542920194E-5 0.10456168868591E-5 - 0.10434836904221E-5 0.10413546946872E-5 0.10392298916553E-5 0.10371092733397E-5 0.10349928317666E-5 - 0.10328805589761E-5 0.1030772447023E-5 0.10286684879785E-5 0.10265686739299E-5 0.10244729969749E-5 - 0.1022381449221E-5 0.10202940227884E-5 0.1018210709824E-5 0.10161315024869E-5 0.10140563929498E-5 - 0.10119853733984E-5 0.10099184360365E-5 0.1007855573074E-5 0.1005796776733E-5 0.10037420392489E-5 - 0.10016913528837E-5 0.99964470991014E-6 0.99760210261345E-6 0.99556352329182E-6 0.9935289642574E-6 - 0.99149841783872E-6 0.9894718763792E-6 0.98744933223509E-6 0.98543077776963E-6 0.98341620535974E-6 - 0.98140560740418E-6 0.97939897631665E-6 0.97739630452412E-6 0.97539758446372E-6 0.97340280858701E-6 - 0.97141196936086E-6 0.96942505926727E-6 0.96744207080483E-6 0.96546299647562E-6 0.96348782879657E-6 - 0.9615165603E-6 0.95954918353868E-6 0.95758569107732E-6 0.95562607549164E-6 0.95367032937069E-6 - 0.95171844531692E-6 0.9497704159495E-6 0.94782623390222E-6 0.9458858918179E-6 0.94394938234948E-6 - 0.94201669816173E-6 0.9400878319433E-6 0.93816277639399E-6 0.93624152422622E-6 0.93432406816363E-6 - 0.93241040094323E-6 0.93050051531773E-6 0.92859440405381E-6 0.92669205993251E-6 0.92479347573937E-6 - 0.92289864427407E-6 0.92100755835418E-6 0.91912021081354E-6 0.91723659449733E-6 0.91535670226398E-6 - 0.91348052698648E-6 0.91160806154919E-6 0.9097392988435E-6 0.90787423177352E-6 0.90601285326406E-6 - 0.90415515625307E-6 0.90230113369086E-6 0.90045077853819E-6 0.8986040837691E-6 0.89676104237179E-6 - 0.89492164734817E-6 0.89308589171578E-6 0.89125376849526E-6 0.88942527072154E-6 0.88760039144438E-6 - 0.88577912373193E-6 0.88396146066365E-6 0.8821473953292E-6 0.88033692083088E-6 0.87853003028372E-6 - 0.87672671681779E-6 0.87492697357702E-6 0.87313079371343E-6 0.87133817038932E-6 0.86954909677785E-6 - 0.86776356607451E-6 0.86598157148475E-6 0.86420310622582E-6 0.86242816352577E-6 0.86065673662496E-6 - 0.8588888187787E-6 0.85712440325535E-6 0.85536348333603E-6 0.85360605230661E-6 0.85185210346593E-6 - 0.85010163013019E-6 0.8483546256302E-6 0.84661108330873E-6 0.84487099651716E-6 0.84313435861974E-6 - 0.84140116299384E-6 0.83967140303091E-6 0.83794507213669E-6 0.83622216372116E-6 0.83450267120635E-6 - 0.83278658802681E-6 0.83107390763662E-6 0.82936462349921E-6 0.82765872909005E-6 0.82595621789675E-6 - 0.82425708342192E-6 0.82256131917062E-6 0.82086891866155E-6 0.81917987542777E-6 0.81749418301879E-6 - 0.81581183499482E-6 0.81413282492565E-6 0.8124571463929E-6 0.81078479299024E-6 0.8091157583255E-6 - 0.80745003601982E-6 0.80578761970065E-6 0.80412850300526E-6 0.80247267958119E-6 0.8008201430977E-6 - 0.79917088723306E-6 0.7975249056762E-6 0.79588219212604E-6 0.79424274029283E-6 0.79260654390157E-6 - 0.79097359668938E-6 0.78934389240469E-6 0.78771742480037E-6 0.78609418764121E-6 0.78447417470913E-6 - 0.78285737979912E-6 0.78124379671723E-6 0.77963341927786E-6 0.77802624130733E-6 0.77642225664442E-6 - 0.7748214591407E-6 0.77322384266131E-6 0.77162940107477E-6 0.77003812826136E-6 0.76845001811344E-6 - 0.76686506454098E-6 0.76528326146337E-6 0.76370460280987E-6 0.76212908252057E-6 0.76055669454653E-6 - 0.75898743285235E-6 0.75742129141463E-6 0.75585826421825E-6 0.75429834525568E-6 0.75274152852946E-6 - 0.75118780806124E-6 0.74963717788234E-6 0.74808963203467E-6 0.74654516457082E-6 0.74500376955664E-6 - 0.74346544106392E-6 0.74193017317424E-6 0.74039795997927E-6 0.73886879558969E-6 0.73734267412496E-6 - 0.7358195897143E-6 0.73429953649649E-6 0.73278250862079E-6 0.73126850025043E-6 0.7297575055599E-6 - 0.72824951873366E-6 0.72674453396115E-6 0.72524254544264E-6 0.72374354739483E-6 0.72224753404606E-6 - 0.72075449963495E-6 0.71926443840811E-6 0.7177773446232E-6 0.71629321254966E-6 0.71481203646856E-6 - 0.71333381067371E-6 0.71185852946194E-6 0.71038618714134E-6 0.70891677803178E-6 0.70745029646908E-6 - 0.70598673679833E-6 0.70452609337289E-6 0.7030683605563E-6 0.70161353272238E-6 0.70016160425825E-6 - 0.69871256956215E-6 0.69726642303945E-6 0.69582315910308E-6 0.69438277217538E-6 0.69294525669649E-6 - 0.69151060711534E-6 0.69007881789068E-6 0.68864988349027E-6 0.68722379839214E-6 0.68580055708606E-6 - 0.68438015407256E-6 0.68296258386333E-6 0.68154784097377E-6 0.68013591993019E-6 0.67872681527236E-6 - 0.67732052155275E-6 0.67591703333269E-6 0.67451634518329E-6 0.67311845168685E-6 0.67172334743474E-6 - 0.67033102702416E-6 0.66894148506217E-6 0.66755471617104E-6 0.66617071498334E-6 0.66478947614126E-6 - 0.66341099429491E-6 0.66203526410455E-6 0.66066228024162E-6 0.65929203738834E-6 0.65792453023899E-6 - 0.65655975348995E-6 0.65519770184865E-6 0.65383837003382E-6 0.65248175277837E-6 0.65112784482394E-6 - 0.64977664092009E-6 0.64842813582596E-6 0.64708232431041E-6 0.64573920115463E-6 0.64439876115044E-6 - 0.6430609990955E-6 0.64172590979488E-6 0.6403934880621E-6 0.63906372872838E-6 0.63773662663253E-6 - 0.63641217662245E-6 0.63509037355439E-6 0.63377121229417E-6 0.6324546877188E-6 0.63114079471533E-6 - 0.62982952818079E-6 0.62852088301593E-6 0.62721485413152E-6 0.62591143645144E-6 0.62461062491091E-6 - 0.62331241445422E-6 0.62201680003272E-6 0.62072377660764E-6 0.61943333915011E-6 0.61814548264195E-6 - 0.61686020207597E-6 0.61557749244852E-6 0.61429734876506E-6 0.61301976604054E-6 0.61174473930529E-6 - 0.61047226359677E-6 0.6092023339616E-6 0.60793494545565E-6 0.60667009314651E-6 0.60540777210349E-6 - 0.60414797740641E-6 0.60289070414608E-6 0.6016359474263E-6 0.60038370235914E-6 0.59913396406367E-6 - 0.59788672766803E-6 0.59664198830961E-6 0.59539974113701E-6 0.59415998130888E-6 0.59292270398891E-6 - 0.59168790434829E-6 0.59045557756597E-6 0.58922571883771E-6 0.58799832336619E-6 0.58677338636231E-6 - 0.58555090304447E-6 0.58433086863981E-6 0.58311327838688E-6 0.58189812753349E-6 0.58068541133632E-6 - 0.57947512505552E-6 0.57826726396045E-6 0.57706182333335E-6 0.57585879846662E-6 0.57465818466121E-6 - 0.5734599772244E-6 0.57226417147274E-6 0.57107076273221E-6 0.56987974633863E-6 0.56869111763808E-6 - 0.56750487197984E-6 0.56632100472209E-6 0.56513951123218E-6 0.56396038689071E-6 0.56278362708558E-6 - 0.56160922721246E-6 0.56043718267541E-6 0.5592674888869E-6 0.55810014126955E-6 0.55693513525512E-6 - 0.55577246628211E-6 0.55461212979495E-6 0.55345412124601E-6 0.55229843610152E-6 0.55114506983534E-6 - 0.54999401792947E-6 0.54884527587458E-6 0.5476988391713E-6 0.54655470332497E-6 0.54541286384839E-6 - 0.5442733162622E-6 0.54313605610098E-6 0.54200107890613E-6 0.54086838022693E-6 0.53973795562041E-6 - 0.53860980065166E-6 0.5374839108958E-6 0.53636028193669E-6 0.53523890936604E-6 0.53411978877943E-6 - 0.53300291578069E-6 0.53188828598628E-6 0.5307758950218E-6 0.52966573852083E-6 0.52855781212248E-6 - 0.52745211147448E-6 0.52634863223373E-6 0.52524737006641E-6 0.52414832064843E-6 0.52305147965776E-6 - 0.52195684278105E-6 0.52086440571384E-6 0.51977416416397E-6 0.51868611384612E-6 0.51760025048201E-6 - 0.51651656980124E-6 0.51543506754138E-6 0.51435573944923E-6 0.5132785812803E-6 0.51220358879596E-6 - 0.51113075776352E-6 0.5100600839574E-6 0.50899156316559E-6 0.50792519118295E-6 0.50686096381189E-6 - 0.50579887686162E-6 0.50473892614937E-6 0.50368110750109E-6 0.50262541675092E-6 0.50157184974165E-6 - 0.50052040231933E-6 0.49947107033835E-6 0.49842384966313E-6 0.4973787361678E-6 0.49633572573331E-6 - 0.49529481424824E-6 0.49425599760968E-6 0.49321927172179E-6 0.4921846324931E-6 0.49115207583974E-6 - 0.49012159768943E-6 0.48909319397791E-6 0.48806686064829E-6 0.4870425936497E-6 0.48602038893907E-6 - 0.48500024248187E-6 0.48398215025181E-6 0.4829661082318E-6 0.48195211240663E-6 0.48094015876939E-6 - 0.47993024332176E-6 0.47892236207646E-6 0.47791651105288E-6 0.47691268627643E-6 0.47591088377988E-6 - 0.47491109960344E-6 0.4739133297966E-6 0.47291757041683E-6 0.47192381752662E-6 0.47093206719421E-6 - 0.46994231549446E-6 0.46895455851508E-6 0.46796879234989E-6 0.46698501309972E-6 0.46600321687161E-6 - 0.46502339977987E-6 0.46404555794771E-6 0.46306968750604E-6 0.4620957845935E-6 0.4611238453514E-6 - 0.46015386592891E-6 0.45918584248494E-6 0.45821977118712E-6 0.45725564820994E-6 0.4562934697337E-6 - 0.45533323194613E-6 0.45437493104252E-6 0.45341856322613E-6 0.45246412470854E-6 0.45151161170463E-6 - 0.45056102043623E-6 0.44961234713234E-6 0.44866558803332E-6 0.44772073938509E-6 0.44677779744073E-6 - 0.44583675846044E-6 0.44489761871334E-6 0.44396037447007E-6 0.44302502200926E-6 0.4420915576178E-6 - 0.44115997759254E-6 0.44023027823663E-6 0.43930245585873E-6 0.43837650677449E-6 0.43745242730657E-6 - 0.43653021378606E-6 0.43560986255171E-6 0.43469136994642E-6 0.43377473231879E-6 0.43285994602327E-6 - 0.43194700742718E-6 0.43103591290313E-6 0.43012665883007E-6 0.42921924159265E-6 0.4283136575823E-6 - 0.42740990319898E-6 0.42650797484975E-6 0.42560786894854E-6 0.42470958191223E-6 0.42381311016482E-6 - 0.42291845013967E-6 0.42202559827794E-6 0.42113455102723E-6 0.42024530484051E-6 0.41935785617771E-6 - 0.41847220150586E-6 0.41758833729949E-6 0.41670626004104E-6 0.41582596621529E-6 0.41494745231373E-6 - 0.41407071483477E-6 0.41319575028769E-6 0.41232255518734E-6 0.41145112605398E-6 0.41058145941408E-6 - 0.40971355180049E-6 0.40884739975446E-6 0.40798299982402E-6 0.40712034856243E-6 0.40625944252733E-6 - 0.4054002782825E-6 0.40454285240222E-6 0.40368716146668E-6 0.4028332020623E-6 0.40198097078222E-6 - 0.40113046422733E-6 0.40028167900182E-6 0.39943461171537E-6 0.39858925898335E-6 0.39774561743305E-6 - 0.39690368369675E-6 0.39606345441241E-6 0.39522492622342E-6 0.39438809577932E-6 0.39355295973805E-6 - 0.39271951476416E-6 0.39188775752825E-6 0.39105768470338E-6 0.39022929296902E-6 0.3894025790144E-6 - 0.38857753953581E-6 0.38775417123571E-6 0.3869324708206E-6 0.38611243500359E-6 0.38529406050482E-6 - 0.3844773440517E-6 0.38366228237906E-6 0.38284887222317E-6 0.38203711032685E-6 0.38122699343969E-6 - 0.38041851832104E-6 0.37961168173542E-6 0.37880648045254E-6 0.37800291124804E-6 0.37720097090372E-6 - 0.37640065620841E-6 0.37560196395745E-6 0.37480489095101E-6 0.37400943399393E-6 0.37321558989666E-6 - 0.37242335547925E-6 0.37163272756723E-6 0.37084370299192E-6 0.37005627858991E-6 0.36927045120379E-6 - 0.36848621768302E-6 0.36770357488338E-6 0.36692251966727E-6 0.36614304889945E-6 0.36536515945109E-6 - 0.36458884820096E-6 0.36381411203526E-6 0.3630409478455E-6 0.36226935252876E-6 0.36149932298858E-6 - 0.36073085613408E-6 0.35996394887806E-6 0.35919859813917E-6 0.35843480084438E-6 0.35767255392678E-6 - 0.35691185432514E-6 0.35615269898305E-6 0.35539508484999E-6 0.35463900888191E-6 0.35388446804109E-6 - 0.35313145929677E-6 0.35237997961986E-6 0.35163002598753E-6 0.35088159538342E-6 0.35013468479954E-6 - 0.34938929123304E-6 0.3486454116857E-6 0.34790304316489E-6 0.34716218268362E-6 0.34642282726199E-6 - 0.34568497392613E-6 0.34494861970618E-6 0.34421376163674E-6 0.3434803967575E-6 0.34274852211759E-6 - 0.34201813477094E-6 0.34128923177686E-6 0.34056181019954E-6 0.33983586710873E-6 0.33911139958077E-6 - 0.33838840469783E-6 0.33766687954809E-6 0.33694682122187E-6 0.33622822681543E-6 0.33551109343243E-6 - 0.33479541818326E-6 0.33408119818366E-6 0.33336843055344E-6 0.33265711241815E-6 0.33194724090912E-6 - 0.33123881316411E-6 0.3305318263272E-6 0.32982627754469E-6 0.32912216396809E-6 0.32841948275428E-6 - 0.32771823106933E-6 0.32701840608346E-6 0.32632000497218E-6 0.32562302491634E-6 0.32492746310342E-6 - 0.32423331672278E-6 0.32354058296976E-6 0.32284925904575E-6 0.32215934215923E-6 0.32147082952352E-6 - 0.32078371835674E-6 0.32009800588229E-6 0.31941368932885E-6 0.31873076593144E-6 0.31804923293095E-6 - 0.31736908757153E-6 0.3166903271016E-6 0.31601294877416E-6 0.31533694985163E-6 0.31466232760059E-6 - 0.31398907929252E-6 0.31331720220348E-6 0.31264669361465E-6 0.3119775508135E-6 0.31130977109293E-6 - 0.31064335175113E-6 0.30997829008866E-6 0.30931458341149E-6 0.30865222903262E-6 0.30799122427099E-6 - 0.30733156645058E-6 0.30667325289922E-6 0.30601628095006E-6 0.30536064794173E-6 0.30470635121879E-6 - 0.30405338813167E-6 0.30340175603279E-6 0.3027514522796E-6 0.30210247423482E-6 0.30145481926906E-6 - 0.30080848475722E-6 0.30016346807861E-6 0.29951976661736E-6 0.2988773777624E-6 0.29823629890897E-6 - 0.29759652745754E-6 0.29695806081258E-6 0.29632089638172E-6 0.29568503157724E-6 0.2950504638197E-6 - 0.29441719053426E-6 0.2937852091508E-6 0.29315451710374E-6 0.2925251118333E-6 0.29189699078295E-6 - 0.29127015140046E-6 0.29064459113801E-6 0.290020307456E-6 0.28939729781881E-6 0.28877555969539E-6 - 0.28815509055901E-6 0.28753588788778E-6 0.28691794916548E-6 0.28630127188096E-6 0.28568585352777E-6 - 0.2850716916022E-6 0.28445878360542E-6 0.2838471270452E-6 0.28323671943457E-6 0.28262755829126E-6 - 0.28201964113671E-6 0.28141296549723E-6 0.28080752890437E-6 0.28020332889507E-6 0.27960036301186E-6 - 0.27899862879883E-6 0.278398123805E-6 0.27779884558442E-6 0.27720079169827E-6 0.27660395971181E-6 - 0.27600834719427E-6 0.27541395171936E-6 0.27482077086526E-6 0.27422880221625E-6 0.27363804336153E-6 - 0.27304849189362E-6 0.27246014540813E-6 0.27187300150488E-6 0.27128705779177E-6 0.27070231188079E-6 - 0.27011876138829E-6 0.26953640393431E-6 0.26895523714357E-6 0.26837525864607E-6 0.26779646607665E-6 - 0.26721885707531E-6 0.26664242928351E-6 0.26606718034767E-6 0.26549310792001E-6 0.26492020965862E-6 - 0.26434848322552E-6 0.26377792628701E-6 0.26320853651431E-6 0.26264031158293E-6 0.26207324917083E-6 - 0.26150734696039E-6 0.26094260264063E-6 0.26037901390535E-6 0.2598165784526E-6 0.25925529398402E-6 - 0.2586951582058E-6 0.25813616882892E-6 0.25757832356903E-6 0.25702162014691E-6 0.25646605628534E-6 - 0.25591162971177E-6 0.25535833815838E-6 0.25480617936326E-6 0.25425515106831E-6 0.25370525101968E-6 - 0.25315647696784E-6 0.25260882666763E-6 0.25206229787843E-6 0.25151688836441E-6 0.25097259589304E-6 - 0.25042941823525E-6 0.24988735316581E-6 0.24934639846659E-6 0.24880655192319E-6 0.24826781132527E-6 - 0.24773017446607E-6 0.24719363914303E-6 0.2466582031588E-6 0.24612386432049E-6 0.2455906204398E-6 - 0.24505846932993E-6 0.24452740880872E-6 0.24399743669942E-6 0.24346855083041E-6 0.24294074903399E-6 - 0.242414029146E-6 0.24188838900653E-6 0.24136382646002E-6 0.24084033935553E-6 0.24031792554684E-6 - 0.23979658288999E-6 0.23927630924497E-6 0.23875710247583E-6 0.2382389604531E-6 0.23772188105056E-6 - 0.23720586214618E-6 0.23669090162205E-6 0.23617699736534E-6 0.23566414726441E-6 0.23515234921207E-6 - 0.23464160110585E-6 0.23413190084919E-6 0.23362324634927E-6 0.23311563551618E-6 0.23260906626395E-6 - 0.23210353651067E-6 0.23159904417979E-6 0.23109558719904E-6 0.23059316349859E-6 0.23009177101166E-6 - 0.22959140767487E-6 0.22909207143248E-6 0.22859376023188E-6 0.22809647202411E-6 0.22760020476329E-6 - 0.22710495640757E-6 0.22661072491997E-6 0.22611750826763E-6 0.22562530442175E-6 0.22513411135532E-6 - 0.22464392704548E-6 0.22415474947459E-6 0.22366657662961E-6 0.22317940650123E-6 0.2226932370833E-6 - 0.22220806637375E-6 0.22172389237449E-6 0.22124071309157E-6 0.22075852653538E-6 0.2202773307186E-6 - 0.21979712365771E-6 0.21931790337306E-6 0.21883966789061E-6 0.21836241523969E-6 0.21788614345305E-6 - 0.21741085056711E-6 0.21693653462179E-6 0.21646319366217E-6 0.21599082573715E-6 0.21551942889874E-6 - 0.21504900120119E-6 0.21457954070242E-6 0.21411104546632E-6 0.2136435135603E-6 0.21317694305535E-6 - 0.21271133202631E-6 0.21224667855233E-6 0.21178298071495E-6 0.21132023659896E-6 0.21085844429258E-6 - 0.21039760188988E-6 0.20993770748795E-6 0.20947875918757E-6 0.20902075509313E-6 0.20856369331273E-6 - 0.20810757195825E-6 0.2076523891454E-6 0.20719814299354E-6 0.20674483162449E-6 0.20629245316373E-6 - 0.2058410057414E-6 0.20539048749163E-6 0.20494089655196E-6 0.20449223106335E-6 0.20404448917035E-6 - 0.20359766902132E-6 0.2031517687685E-6 0.20270678656837E-6 0.20226272057886E-6 0.20181956896149E-6 - 0.20137732988159E-6 0.20093600151011E-6 0.2004955820211E-6 0.20005606959141E-6 0.19961746240122E-6 - 0.19917975863406E-6 0.19874295647839E-6 0.19830705412626E-6 0.19787204977233E-6 0.19743794161359E-6 - 0.19700472785044E-6 0.19657240668878E-6 0.19614097633772E-6 0.19571043500974E-6 0.19528078092058E-6 - 0.19485201228948E-6 0.19442412733933E-6 0.19399712429661E-6 0.19357100139172E-6 0.19314575685652E-6 - 0.19272138892652E-6 0.19229789584156E-6 0.19187527584599E-6 0.19145352718724E-6 0.19103264811563E-6 - 0.1906126368852E-6 0.19019349175333E-6 0.18977521097965E-6 0.18935779282713E-6 0.18894123556341E-6 - 0.18852553745972E-6 0.18811069679058E-6 0.18769671183334E-6 0.18728358086882E-6 0.18687130218138E-6 - 0.18645987405891E-6 0.18604929479314E-6 0.1856395626775E-6 0.18523067600884E-6 0.18482263308763E-6 - 0.18441543221889E-6 0.18400907171071E-6 0.1836035498739E-6 0.18319886502248E-6 0.18279501547361E-6 - 0.1823919995489E-6 0.18198981557335E-6 0.18158846187426E-6 0.18118793678126E-6 0.18078823862711E-6 - 0.18038936574971E-6 0.17999131648978E-6 0.17959408919123E-6 0.17919768220117E-6 0.17880209386993E-6 - 0.17840732255115E-6 0.1780133666018E-6 0.17762022438238E-6 0.17722789425491E-6 0.17683637458477E-6 - 0.17644566374143E-6 0.17605576009826E-6 0.17566666203165E-6 0.17527836792053E-6 0.17489087614703E-6 - 0.17450418509662E-6 0.17411829315844E-6 0.17373319872523E-6 0.17334890019121E-6 0.17296539595357E-6 - 0.17258268441254E-6 0.17220076397341E-6 0.17181963304391E-6 0.17143929003494E-6 0.17105973336058E-6 - 0.17068096143873E-6 0.17030297268818E-6 0.16992576553108E-6 0.16954933839311E-6 0.1691736897044E-6 - 0.16879881789783E-6 0.16842472140875E-6 0.16805139867552E-6 0.16767884813958E-6 0.16730706824598E-6 - 0.16693605744302E-6 0.16656581418121E-6 0.1661963369136E-6 0.16582762409594E-6 0.16545967418885E-6 - 0.16509248565548E-6 0.16472605696191E-6 0.16436038657702E-6 0.16399547297266E-6 0.163631314624E-6 - 0.16326791000935E-6 0.16290525761015E-6 0.16254335590929E-6 0.16218220339278E-6 0.16182179855053E-6 - 0.16146213987598E-6 0.16110322586548E-6 0.16074505501745E-6 0.16038762583333E-6 0.16003093681778E-6 - 0.159674986479E-6 0.15931977332855E-6 0.15896529587913E-6 0.15861155264628E-6 0.1582585421485E-6 - 0.15790626290888E-6 0.15755471345297E-6 0.15720389230884E-6 0.15685379800725E-6 0.15650442908161E-6 - 0.15615578406936E-6 0.15580786151081E-6 0.15546065994863E-6 0.15511417792702E-6 0.15476841399292E-6 - 0.15442336669805E-6 0.15407903459686E-6 0.1537354162465E-6 0.15339251020663E-6 0.1530503150402E-6 - 0.15270882931226E-6 0.15236805159039E-6 0.15202798044477E-6 0.15168861444971E-6 0.15134995218192E-6 - 0.15101199222088E-6 0.15067473314885E-6 0.15033817355085E-6 0.15000231201483E-6 0.14966714713164E-6 - 0.14933267749494E-6 0.1489989017E-6 0.14866581834479E-6 0.14833342603116E-6 0.14800172336408E-6 - 0.14767070895131E-6 0.1473403814023E-6 0.14701073932935E-6 0.14668178134786E-6 0.14635350607651E-6 - 0.14602591213714E-6 0.14569899815242E-6 0.14537276274778E-6 0.14504720455148E-6 0.1447223221961E-6 - 0.14439811431649E-6 0.14407457954974E-6 0.14375171653547E-6 0.1434295239158E-6 0.14310800033651E-6 - 0.14278714444609E-6 0.14246695489506E-6 0.14214743033566E-6 0.14182856942267E-6 0.14151037081509E-6 - 0.14119283317438E-6 0.14087595516451E-6 0.14055973545189E-6 0.14024417270556E-6 0.13992926559739E-6 - 0.13961501280197E-6 0.13930141299686E-6 0.13898846486072E-6 0.13867616707508E-6 0.13836451832451E-6 - 0.13805351729678E-6 0.13774316268186E-6 0.13743345317262E-6 0.13712438746462E-6 0.13681596425595E-6 - 0.13650818224605E-6 0.13620104013693E-6 0.13589453663427E-6 0.1355886704466E-6 0.13528344028494E-6 - 0.13497884486204E-6 0.13467488289334E-6 0.13437155309707E-6 0.13406885419424E-6 0.13376678490864E-6 - 0.13346534396553E-6 0.13316453009279E-6 0.1328643420209E-6 0.13256477848358E-6 0.13226583821681E-6 - 0.13196751995896E-6 0.13166982245086E-6 0.13137274443571E-6 0.13107628465974E-6 0.13078044187175E-6 - 0.13048521482248E-6 0.13019060226453E-6 0.12989660295272E-6 0.12960321564589E-6 0.12931043910511E-6 - 0.12901827209375E-6 0.12872671337701E-6 0.12843576172244E-6 0.12814541590091E-6 0.12785567468595E-6 - 0.12756653685381E-6 0.12727800118097E-6 0.12699006644661E-6 0.12670273143323E-6 0.12641599492658E-6 - 0.12612985571465E-6 0.12584431258701E-6 0.12555936433569E-6 0.12527500975527E-6 0.12499124764326E-6 - 0.1247080767999E-6 0.12442549602638E-6 0.12414350412606E-6 0.12386209990456E-6 0.12358128217163E-6 - 0.12330104973874E-6 0.12302140141974E-6 0.12274233603082E-6 0.12246385239098E-6 0.12218594932018E-6 - 0.12190862564088E-6 0.1216318801781E-6 0.12135571176004E-6 0.12108011921699E-6 0.12080510138127E-6 - 0.12053065708746E-6 0.12025678517237E-6 0.11998348447574E-6 0.11971075383976E-6 0.11943859210815E-6 - 0.1191669981264E-6 0.11889597074201E-6 0.11862550880645E-6 0.11835561117306E-6 0.11808627669737E-6 - 0.11781750423694E-6 0.11754929265162E-6 0.11728164080369E-6 0.11701454755774E-6 0.11674801178071E-6 - 0.11648203234102E-6 0.11621660810943E-6 0.11595173795933E-6 0.1156874207666E-6 0.11542365540923E-6 - 0.11516044076738E-6 0.11489777572337E-6 0.11463565916182E-6 0.11437408996988E-6 0.11411306703719E-6 - 0.1138525892545E-6 0.11359265551466E-6 0.11333326471275E-6 0.11307441574712E-6 0.11281610751802E-6 - 0.11255833892769E-6 0.11230110888046E-6 0.11204441628262E-6 0.11178826004345E-6 0.11153263907444E-6 - 0.11127755228887E-6 0.1110229986012E-6 0.11076897692801E-6 0.11051548618927E-6 0.11026252530704E-6 - 0.1100100932054E-6 0.10975818881052E-6 0.10950681105101E-6 0.10925595885695E-6 0.10900563116019E-6 - 0.10875582689434E-6 0.10850654499696E-6 0.10825778440732E-6 0.10800954406655E-6 0.10776182291743E-6 - 0.10751461990481E-6 0.10726793397625E-6 0.10702176408145E-6 0.10677610917225E-6 0.10653096820136E-6 - 0.10628634012363E-6 0.10604222389691E-6 0.1057986184815E-6 0.10555552283979E-6 0.1053129359354E-6 - 0.10507085673421E-6 0.10482928420429E-6 0.10458821731603E-6 0.10434765504208E-6 0.1041075963562E-6 - 0.10386804023415E-6 0.10362898565379E-6 0.10339043159608E-6 0.10315237704371E-6 0.10291482098116E-6 - 0.10267776239483E-6 0.10244120027311E-6 0.10220513360673E-6 0.10196956138854E-6 0.10173448261306E-6 - 0.10149989627625E-6 0.1012658013759E-6 0.10103219691302E-6 0.10079908189053E-6 0.10056645531325E-6 - 0.10033431618752E-6 0.10010266352173E-6 0.99871496326507E-7 0.99640813614546E-7 0.99410614400777E-7 - 0.99180897700947E-7 0.98951662532931E-7 0.98722907916934E-7 0.98494632875696E-7 0.98266836433621E-7 - 0.98039517617095E-7 0.97812675454566E-7 0.97586308976399E-7 0.97360417214004E-7 0.97134999200744E-7 - 0.96910053972677E-7 0.96685580567987E-7 0.96461578026692E-7 0.96238045390396E-7 0.9601498170266E-7 - 0.95792386009076E-7 0.95570257357299E-7 0.95348594797169E-7 0.95127397379411E-7 0.94906664156685E-7 - 0.94686394183654E-7 0.94466586517759E-7 0.94247240218073E-7 0.94028354345321E-7 0.93809927962043E-7 - 0.93591960132604E-7 0.93374449923632E-7 0.93157396403705E-7 0.92940798642864E-7 0.9272465571253E-7 - 0.92508966685816E-7 0.9229373063883E-7 0.92078946649351E-7 0.91864613796915E-7 0.91650731162641E-7 - 0.91437297829488E-7 0.91224312882497E-7 0.91011775408616E-7 0.90799684496804E-7 0.90588039236844E-7 - 0.90376838720462E-7 0.90166082041619E-7 0.89955768296528E-7 0.89745896583095E-7 0.89536466000724E-7 - 0.89327475650639E-7 0.89118924635911E-7 0.88910812061683E-7 0.88703137035072E-7 0.88495898664247E-7 - 0.8828909605902E-7 0.88082728330889E-7 0.87876794594206E-7 0.87671293964718E-7 0.87466225559956E-7 - 0.87261588499227E-7 0.87057381903948E-7 0.86853604896281E-7 0.8665025660024E-7 0.86447336141755E-7 - 0.86244842649236E-7 0.86042775252633E-7 0.85841133083409E-7 0.85639915274728E-7 0.85439120961455E-7 - 0.85238749280571E-7 0.85038799370872E-7 0.84839270372438E-7 0.84640161426701E-7 0.84441471676648E-7 - 0.8424320026807E-7 0.84045346348233E-7 0.83847909066043E-7 0.83650887571928E-7 0.83454281017991E-7 - 0.83258088558302E-7 0.83062309348705E-7 0.82866942546851E-7 0.82671987311152E-7 0.82477442801796E-7 - 0.82283308181153E-7 0.82089582613639E-7 0.81896265265242E-7 0.81703355303318E-7 0.81510851896922E-7 - 0.81318754216844E-7 0.81127061435752E-7 0.80935772728172E-7 0.80744887269542E-7 0.80554404236874E-7 - 0.80364322808795E-7 0.80174642166532E-7 0.79985361492682E-7 0.79796479971345E-7 0.79607996788167E-7 - 0.79419911130371E-7 0.79232222187153E-7 0.7904492914938E-7 0.78858031209365E-7 0.78671527560507E-7 - 0.78485417397764E-7 0.7829969991848E-7 0.78114374321595E-7 0.77929439807554E-7 0.77744895578472E-7 - 0.77560740838264E-7 0.77376974791968E-7 0.77193596645954E-7 0.77010605607997E-7 0.76828000888533E-7 - 0.76645781699299E-7 0.76463947253552E-7 0.76282496765991E-7 0.76101429452866E-7 0.75920744532244E-7 - 0.75740441223817E-7 0.75560518748891E-7 0.75380976329625E-7 0.75201813189776E-7 0.75023028555162E-7 - 0.74844621653408E-7 0.74666591713636E-7 0.74488937966195E-7 0.74311659643027E-7 0.74134755977708E-7 - 0.7395822620557E-7 0.73782069563675E-7 0.73606285289852E-7 0.73430872623436E-7 0.73255830805311E-7 - 0.73081159078728E-7 0.72906856688222E-7 0.72732922879686E-7 0.7255935690046E-7 0.72386157999326E-7 - 0.72213325426961E-7 0.72040858435579E-7 0.71868756278666E-7 0.71697018210748E-7 0.7152564348775E-7 - 0.71354631367953E-7 0.71183981111075E-7 0.71013691978257E-7 0.70843763231878E-7 0.70674194135815E-7 - 0.70504983955624E-7 0.70336131958418E-7 0.70167637413008E-7 0.69999499588791E-7 0.69831717756769E-7 - 0.69664291189679E-7 0.6949721916219E-7 0.69330500950222E-7 0.6916413583115E-7 0.68998123083884E-7 - 0.68832461988799E-7 0.6866715182711E-7 0.68502191881489E-7 0.68337581436582E-7 0.68173319778639E-7 - 0.68009406195293E-7 0.67845839975369E-7 0.67682620409148E-7 0.67519746788449E-7 0.67357218406679E-7 - 0.6719503455887E-7 0.67033194540714E-7 0.66871697649342E-7 0.66710543183372E-7 0.66549730443547E-7 - 0.66389258731826E-7 0.662291273514E-7 0.66069335606812E-7 0.65909882803943E-7 0.65750768250479E-7 - 0.65591991255538E-7 0.65433551129334E-7 0.65275447183076E-7 0.65117678729244E-7 0.64960245082593E-7 - 0.64803145559138E-7 0.64646379476214E-7 0.64489946152329E-7 0.64333844907361E-7 0.64178075062775E-7 - 0.64022635941473E-7 0.63867526867877E-7 0.63712747167002E-7 0.63558296165324E-7 0.6340417319099E-7 - 0.63250377573872E-7 0.63096908645088E-7 0.62943765736904E-7 0.62790948182945E-7 0.62638455318211E-7 - 0.6248628647924E-7 0.62334441004023E-7 0.62182918231426E-7 0.62031717501545E-7 0.6188083815572E-7 - 0.61730279537413E-7 0.61580040991134E-7 0.61430121862737E-7 0.6128052149943E-7 0.61131239249975E-7 - 0.60982274463737E-7 0.60833626491453E-7 0.60685294685278E-7 0.60537278399248E-7 0.60389576988543E-7 - 0.60242189809491E-7 0.6009511621969E-7 0.59948355577999E-7 0.59801907244899E-7 0.59655770582215E-7 - 0.59509944952752E-7 0.59364429720315E-7 0.59219224249857E-7 0.59074327908477E-7 0.5892974006439E-7 - 0.58785460087026E-7 0.58641487346918E-7 0.58497821215849E-7 0.58354461067123E-7 0.58211406275375E-7 - 0.580686562166E-7 0.57926210267366E-7 0.57784067805572E-7 0.57642228210734E-7 0.57500690863915E-7 - 0.57359455147353E-7 0.57218520444313E-7 0.57077886139325E-7 0.5693755161822E-7 0.56797516268279E-7 - 0.56657779478149E-7 0.56518340637188E-7 0.56379199135924E-7 0.56240354366077E-7 0.56101805721328E-7 - 0.55963552596382E-7 0.55825594387072E-7 0.55687930490393E-7 0.55550560304497E-7 0.55413483229071E-7 - 0.5527669866504E-7 0.55140206014423E-7 0.55004004680027E-7 0.54868094065833E-7 0.54732473577607E-7 - 0.54597142622323E-7 0.5446210060807E-7 0.54327346944227E-7 0.54192881041503E-7 0.54058702311463E-7 - 0.53924810166657E-7 0.5379120402069E-7 0.53657883289156E-7 0.53524847388636E-7 0.53392095736848E-7 - 0.53259627752593E-7 0.53127442855814E-7 0.52995540467853E-7 0.52863920011271E-7 0.52732580909847E-7 - 0.52601522587961E-7 0.52470744471192E-7 0.52340245986674E-7 0.52210026562921E-7 0.52080085629547E-7 - 0.5195042261711E-7 0.51821036957342E-7 0.51691928083204E-7 0.51563095428999E-7 0.51434538430312E-7 - 0.51306256523309E-7 0.51178249145265E-7 0.51050515734602E-7 0.50923055731565E-7 0.50795868577347E-7 - 0.50668953714155E-7 0.50542310585274E-7 0.50415938635059E-7 0.5028983730931E-7 0.50164006054954E-7 - 0.50038444319902E-7 0.49913151552871E-7 0.49788127203623E-7 0.49663370723632E-7 0.4953888156546E-7 - 0.49414659182737E-7 0.49290703030051E-7 0.49167012563088E-7 0.49043587238806E-7 0.48920426515339E-7 - 0.48797529852085E-7 0.48674896708895E-7 0.48552526546818E-7 0.48430418828183E-7 0.48308573016779E-7 - 0.48186988577307E-7 0.48065664975614E-7 0.47944601678687E-7 0.47823798154616E-7 0.47703253872089E-7 - 0.47582968300881E-7 0.47462940912263E-7 0.4734317117873E-7 0.47223658573793E-7 0.47104402571875E-7 - 0.46985402648485E-7 0.46866658280272E-7 0.46748168945076E-7 0.46629934121932E-7 0.46511953290432E-7 - 0.46394225931221E-7 0.46276751526046E-7 0.46159529558261E-7 0.46042559512117E-7 0.45925840872807E-7 - 0.45809373126541E-7 0.45693155760526E-7 0.45577188263299E-7 0.45461470124447E-7 0.45346000834433E-7 - 0.45230779884495E-7 0.45115806766831E-7 0.45001080975257E-7 0.44886602004548E-7 0.44772369350471E-7 - 0.44658382509735E-7 0.44544640980051E-7 0.44431144260302E-7 0.44317891850449E-7 0.44204883251589E-7 - 0.44092117965285E-7 0.4397959549418E-7 0.4386731534216E-7 0.43755277014431E-7 0.43643480017106E-7 - 0.43531923857188E-7 0.43420608042684E-7 0.43309532082627E-7 0.43198695487214E-7 0.43088097767726E-7 - 0.42977738436101E-7 0.42867617005186E-7 0.42757732988726E-7 0.42648085902156E-7 0.42538675261668E-7 - 0.42429500584444E-7 0.4232056138865E-7 0.42211857193603E-7 0.42103387519146E-7 0.41995151886119E-7 - 0.41887149816414E-7 0.41779380833354E-7 0.41671844461109E-7 0.4156454022471E-7 0.41457467650132E-7 - 0.41350626264279E-7 0.41244015595311E-7 0.41137635172365E-7 0.41031484525336E-7 0.40925563184876E-7 - 0.40819870682494E-7 0.4071440655127E-7 0.40609170325122E-7 0.40504161538889E-7 0.40399379728292E-7 - 0.40294824429981E-7 0.40190495181692E-7 0.40086391522147E-7 0.39982512991086E-7 0.39878859128749E-7 - 0.39775429476348E-7 0.3967222357629E-7 0.39569240972162E-7 0.39466481208422E-7 0.39363943830288E-7 - 0.39261628383916E-7 0.39159534416433E-7 0.39057661476095E-7 0.38956009112144E-7 0.38854576874395E-7 - 0.38753364313525E-7 0.3865237098109E-7 0.38551596430126E-7 0.38451040214418E-7 0.38350701888615E-7 - 0.38250581008231E-7 0.38150677129631E-7 0.38050989810362E-7 0.37951518608881E-7 0.37852263084475E-7 - 0.37753222797027E-7 0.37654397307277E-7 0.37555786177322E-7 0.37457388970177E-7 0.37359205249683E-7 - 0.37261234580624E-7 0.37163476528768E-7 0.37065930660561E-7 0.36968596543183E-7 0.3687147374458E-7 - 0.36774561834229E-7 0.36677860382333E-7 0.36581368959949E-7 0.36485087138936E-7 0.36389014491997E-7 - 0.36293150592903E-7 0.36197495016336E-7 0.36102047337891E-7 0.36006807133612E-7 0.35911773980429E-7 - 0.35816947456446E-7 0.35722327140826E-7 0.3562791261354E-7 0.35533703455266E-7 0.35439699247556E-7 - 0.35345899572871E-7 0.35252304014681E-7 0.35158912157371E-7 0.35065723585867E-7 0.3497273788592E-7 - 0.34879954644124E-7 0.34787373448369E-7 0.34694993887244E-7 0.34602815550167E-7 0.34510838027391E-7 - 0.34419060909965E-7 0.34327483789958E-7 0.3423610626028E-7 0.34144927914608E-7 0.34053948347261E-7 - 0.33963167153336E-7 0.33872583929156E-7 0.33782198271866E-7 0.336920097794E-7 0.33602018050488E-7 - 0.33512222684665E-7 0.33422623282381E-7 0.33333219444957E-7 0.33244010774636E-7 0.33154996874098E-7 - 0.33066177346887E-7 0.32977551797487E-7 0.32889119831471E-7 0.32800881055083E-7 0.32712835075452E-7 - 0.32624981500546E-7 0.32537319939159E-7 0.32449850000581E-7 0.32362571294907E-7 0.32275483433312E-7 - 0.32188586027886E-7 0.32101878691464E-7 0.32015361037618E-7 0.31929032680702E-7 0.31842893235932E-7 - 0.31756942319438E-7 0.3167117954822E-7 0.31585604539723E-7 0.31500216912171E-7 0.31415016284595E-7 - 0.3133000227724E-7 0.31245174511002E-7 0.31160532607509E-7 0.31076076189152E-7 0.3099180487904E-7 - 0.30907718301327E-7 0.30823816080935E-7 0.3074009784344E-7 0.3065656321499E-7 0.3057321182242E-7 - 0.30490043293842E-7 0.30407057258089E-7 0.30324253344724E-7 0.30241631183977E-7 0.3015919040682E-7 - 0.30076930645143E-7 0.29994851531652E-7 0.29912952699893E-7 0.29831233783768E-7 0.29749694417975E-7 - 0.29668334238138E-7 0.29587152880892E-7 0.29506149983531E-7 0.29425325184032E-7 0.29344678121113E-7 - 0.29264208434254E-7 0.29183915763835E-7 0.29103799751018E-7 0.29023860037491E-7 0.2894409626562E-7 - 0.28864508078426E-7 0.28785095120192E-7 0.28705857035755E-7 0.2862679347071E-7 0.28547904071408E-7 - 0.28469188485037E-7 0.28390646359207E-7 0.28312277342256E-7 0.28234081083297E-7 0.28156057232554E-7 - 0.28078205440856E-7 0.28000525359697E-7 0.27923016641275E-7 0.27845678938467E-7 0.27768511905106E-7 - 0.27691515195736E-7 0.27614688465493E-7 0.27538031370075E-7 0.27461543565797E-7 0.273852247102E-7 - 0.2730907446145E-7 0.27233092478393E-7 0.27157278420504E-7 0.27081631947934E-7 0.27006152721705E-7 - 0.26930840403585E-7 0.2685569465609E-7 0.26780715142111E-7 0.26705901525256E-7 0.2663125347002E-7 - 0.26556770641793E-7 0.26482452706576E-7 0.26408299331027E-7 0.26334310182483E-7 0.26260484928996E-7 - 0.26186823239458E-7 0.26113324783497E-7 0.26039989231174E-7 0.25966816253183E-7 0.25893805520846E-7 - 0.25820956706683E-7 0.25748269483743E-7 0.25675743525717E-7 0.25603378506947E-7 0.25531174102395E-7 - 0.25459129987924E-7 0.25387245840064E-7 0.25315521335976E-7 0.25243956153282E-7 0.25172549970231E-7 - 0.25101302466087E-7 0.2503021332081E-7 0.24959282214954E-7 0.24888508829828E-7 0.24817892847446E-7 - 0.24747433950348E-7 0.24677131821627E-7 0.24606986144936E-7 0.24536996605075E-7 0.24467162887388E-7 - 0.24397484677858E-7 0.24327961663087E-7 0.24258593530299E-7 0.24189379967509E-7 0.24120320663405E-7 - 0.24051415307347E-7 0.2398266358912E-7 0.23914065199147E-7 0.23845619828691E-7 0.23777327169798E-7 - 0.23709186915087E-7 0.23641198757807E-7 0.23573362391823E-7 0.23505677511678E-7 0.23438143812667E-7 - 0.23370760990751E-7 0.23303528742338E-7 0.23236446764442E-7 0.23169514754687E-7 0.23102732411696E-7 - 0.23036099434599E-7 0.22969615523149E-7 0.22903280377726E-7 0.22837093699262E-7 0.22771055189557E-7 - 0.22705164551016E-7 0.22639421486606E-7 0.22573825699728E-7 0.22508376894346E-7 0.22443074775422E-7 - 0.22377919048532E-7 0.22312909419831E-7 0.22248045596053E-7 0.2218332728452E-7 0.22118754193265E-7 - 0.22054326030992E-7 0.21990042507067E-7 0.21925903331212E-7 0.21861908213776E-7 0.21798056865806E-7 - 0.21734348999187E-7 0.21670784326277E-7 0.21607362560131E-7 0.21544083414426E-7 0.21480946603455E-7 - 0.21417951841881E-7 0.21355098844945E-7 0.21292387328728E-7 0.21229817010022E-7 0.21167387606155E-7 - 0.21105098835008E-7 0.21042950415045E-7 0.20980942065363E-7 0.20919073505746E-7 0.20857344456585E-7 - 0.2079575463871E-7 0.20734303773519E-7 0.20672991582998E-7 0.2061181779001E-7 0.20550782117898E-7 - 0.20489884290589E-7 0.20429124032592E-7 0.20368501068927E-7 0.20308015125424E-7 0.2024766592846E-7 - 0.20187453204922E-7 0.20127376682133E-7 0.2006743608791E-7 0.20007631151046E-7 0.19947961600867E-7 - 0.1988842716724E-7 0.19829027580565E-7 0.1976976257178E-7 0.197106318725E-7 0.19651635214949E-7 - 0.19592772331955E-7 0.19534042956687E-7 0.1947544682288E-7 0.19416983664954E-7 0.1935865321811E-7 - 0.19300455218029E-7 0.19242389400894E-7 0.19184455503428E-7 0.19126653262917E-7 0.19068982417406E-7 - 0.19011442705489E-7 0.18954033866149E-7 0.18896755638865E-7 0.18839607763584E-7 0.18782589981252E-7 - 0.18725702033198E-7 0.18668943661327E-7 0.18612314608131E-7 0.18555814616692E-7 0.18499443430465E-7 - 0.18443200793429E-7 0.18387086450131E-7 0.18331100145966E-7 0.18275241626769E-7 0.18219510638877E-7 - 0.1816390692915E-7 0.18108430244927E-7 0.18053080334336E-7 0.17997856945994E-7 0.17942759828984E-7 - 0.17887788732835E-7 0.17832943407528E-7 0.17778223603922E-7 0.17723629073347E-7 0.17669159567637E-7 - 0.17614814839161E-7 0.17560594640761E-7 0.1750649872593E-7 0.1745252684871E-7 0.1739867876368E-7 - 0.17344954225797E-7 0.17291352990521E-7 0.17237874813959E-7 0.17184519452875E-7 0.17131286664475E-7 - 0.1707817620651E-7 0.17025187837224E-7 0.16972321315385E-7 0.16919576400405E-7 0.16866952852209E-7 - 0.1681445043111E-7 0.16762068897894E-7 0.16709808013795E-7 0.1665766754095E-7 0.16605647241874E-7 - 0.1655374687958E-7 0.16501966217574E-7 0.16450305019803E-7 0.16398763050931E-7 0.163473400761E-7 - 0.1629603586093E-7 0.16244850171416E-7 0.16193782773999E-7 0.16142833435885E-7 0.16092001924801E-7 - 0.1604128800891E-7 0.15990691456958E-7 0.15940212038191E-7 0.1588984952228E-7 0.15839603679326E-7 - 0.15789474279824E-7 0.15739461095143E-7 0.15689563897045E-7 0.15639782457773E-7 0.15590116550075E-7 - 0.15540565947138E-7 0.15491130422743E-7 0.15441809751168E-7 0.15392603707174E-7 0.1534351206592E-7 - 0.15294534603011E-7 0.15245671094668E-7 0.15196921317702E-7 0.1514828504933E-7 0.15099762067288E-7 - 0.15051352149751E-7 0.15003055075397E-7 0.14954870623508E-7 0.14906798573836E-7 0.14858838706483E-7 - 0.14810990801993E-7 0.14763254641343E-7 0.14715630006336E-7 0.14668116679116E-7 0.14620714442301E-7 - 0.14573423078975E-7 0.1452624237262E-7 0.14479172107393E-7 0.14432212067886E-7 0.1438536203912E-7 - 0.14338621806474E-7 0.14291991155716E-7 0.14245469873388E-7 0.14199057746493E-7 0.14152754562454E-7 - 0.14106560109139E-7 0.14060474174828E-7 0.14014496548367E-7 0.13968627019124E-7 0.13922865376932E-7 - 0.13877211411928E-7 0.13831664914693E-7 0.13786225676329E-7 0.13740893488592E-7 0.13695668143568E-7 - 0.13650549433881E-7 0.13605537152612E-7 0.13560631093292E-7 0.13515831049792E-7 0.13471136816385E-7 - 0.13426548187971E-7 0.13382064959992E-7 0.1333768692826E-7 0.13293413889061E-7 0.1324924563908E-7 - 0.13205181975485E-7 0.13161222695984E-7 0.13117367598717E-7 0.13073616482166E-7 0.13029969145229E-7 - 0.12986425387221E-7 0.12942985008202E-7 0.12899647808558E-7 0.128564135891E-7 0.12813282151065E-7 - 0.12770253296039E-7 0.12727326826306E-7 0.1268450254451E-7 0.12641780253698E-7 0.12599159757279E-7 - 0.12556640859029E-7 0.12514223363394E-7 0.12471907075215E-7 0.12429691799729E-7 0.12387577342686E-7 - 0.12345563510185E-7 0.12303650108839E-7 0.12261836945723E-7 0.12220123828341E-7 0.12178510564499E-7 - 0.12136996962402E-7 0.12095582830761E-7 0.12054267978878E-7 0.12013052216364E-7 0.11971935353319E-7 - 0.11930917200233E-7 0.11889997568001E-7 0.11849176268049E-7 0.11808453112187E-7 0.11767827912599E-7 - 0.11727300481866E-7 0.11686870632887E-7 0.11646538179298E-7 0.11606302935008E-7 0.11566164714376E-7 - 0.11526123332242E-7 0.1148617860386E-7 0.11446330344769E-7 0.11406578370891E-7 0.11366922498559E-7 - 0.113273625448E-7 0.11287898326927E-7 0.11248529662668E-7 0.11209256370146E-7 0.11170078267827E-7 - 0.11130995174773E-7 0.11092006910385E-7 0.11053113294443E-7 0.110143141471E-7 0.10975609288805E-7 - 0.10936998540702E-7 0.10898481724276E-7 0.10860058661385E-7 0.10821729174296E-7 0.107834930856E-7 - 0.10745350218417E-7 0.1070730039629E-7 0.10669343443128E-7 0.10631479183189E-7 0.10593707441084E-7 - 0.10556028041895E-7 0.10518440811205E-7 0.10480945574885E-7 0.10443542159303E-7 0.10406230391172E-7 - 0.10369010097587E-7 0.10331881106144E-7 0.10294843244792E-7 0.10257896341815E-7 0.10221040225854E-7 - 0.10184274725861E-7 0.10147599671488E-7 0.10111014892635E-7 0.10074520219609E-7 0.10038115483127E-7 - 0.100018005142E-7 0.99655751443898E-8 0.99294392056006E-8 0.98933925300963E-8 0.985743495047E-8 - 0.98215662996129E-8 0.97857864110068E-8 0.97500951185305E-8 0.97144922563608E-8 0.96789776591658E-8 - 0.9643551161947E-8 0.96082126000538E-8 0.95729618091827E-8 0.95377986252994E-8 0.95027228850362E-8 - 0.9467734425311E-8 0.94328330833988E-8 0.93980186969912E-8 0.93632911040666E-8 0.93286501430946E-8 - 0.92940956529207E-8 0.92596274727343E-8 0.92252454420422E-8 0.91909494006551E-8 0.9156739188873E-8 - 0.9122614647471E-8 0.90885756174901E-8 0.90546219403983E-8 0.90207534579727E-8 0.89869700123648E-8 - 0.8953271446209E-8 0.89196576024495E-8 0.88861283243508E-8 0.88526834555079E-8 0.88193228398138E-8 - 0.87860463218106E-8 0.87528537462664E-8 0.87197449583285E-8 0.8686719803517E-8 0.86537781276128E-8 - 0.86209197769475E-8 0.85881445981489E-8 0.85554524381767E-8 0.85228431443106E-8 0.8490316564089E-8 - 0.84578725456319E-8 0.84255109374042E-8 0.83932315881645E-8 0.83610343470801E-8 0.83289190635861E-8 - 0.82968855875519E-8 0.82649337692506E-8 0.8233063459264E-8 0.82012745084779E-8 0.81695667680759E-8 - 0.813794008963E-8 0.81063943252388E-8 0.80749293272095E-8 0.80435449482952E-8 0.80122410415829E-8 - 0.79810174604789E-8 0.79498740586968E-8 0.79188106902161E-8 0.78878272095E-8 0.78569234714343E-8 - 0.78260993311451E-8 0.7795354644177E-8 0.77646892663426E-8 0.7734103053819E-8 0.77035958632275E-8 - 0.76731675514659E-8 0.7642817975747E-8 0.76125469935837E-8 0.75823544627826E-8 0.75522402417361E-8 - 0.75222041890383E-8 0.74922461636556E-8 0.74623660249011E-8 0.74325636323199E-8 0.74028388459867E-8 - 0.73731915262281E-8 0.73436215336865E-8 0.73141287293207E-8 0.72847129743136E-8 0.72553741304106E-8 - 0.7226112059649E-8 0.71969266243459E-8 0.71678176872172E-8 0.71387851112108E-8 0.71098287596969E-8 - 0.7080948496408E-8 0.70521441853541E-8 0.70234156908522E-8 0.69947628774783E-8 0.6966185610193E-8 - 0.69376837544306E-8 0.69092571758276E-8 0.68809057404139E-8 0.68526293145E-8 0.68244277646837E-8 - 0.67963009580373E-8 0.67682487618737E-8 0.67402710438114E-8 0.67123676717705E-8 0.66845385138676E-8 - 0.6656783438858E-8 0.66291023156615E-8 0.66014950135383E-8 0.65739614021255E-8 0.65465013513193E-8 - 0.65191147313305E-8 0.64918014126383E-8 0.64645612660106E-8 0.64373941627507E-8 0.64102999743402E-8 - 0.63832785726234E-8 0.63563298297599E-8 0.6329453618122E-8 0.63026498105914E-8 0.62759182802669E-8 - 0.62492589005472E-8 0.62226715451349E-8 0.61961560879232E-8 0.6169712403362E-8 0.61433403661441E-8 - 0.61170398512311E-8 0.60908107339634E-8 0.60646528898825E-8 0.6038566194944E-8 0.60125505254259E-8 - 0.59866057578533E-8 0.5960731769071E-8 0.59349284361434E-8 0.59091956365143E-8 0.58835332480497E-8 - 0.58579411487942E-8 0.58324192171867E-8 0.5806967331908E-8 0.5781585371919E-8 0.5756273216621E-8 - 0.57310307456277E-8 0.57058578388515E-8 0.56807543764812E-8 0.56557202388992E-8 0.56307553070759E-8 - 0.56058594621314E-8 0.55810325855055E-8 0.55562745589688E-8 0.55315852644732E-8 0.55069645844413E-8 - 0.54824124015303E-8 0.54579285986628E-8 0.54335130590686E-8 0.54091656661592E-8 0.53848863038127E-8 - 0.53606748562106E-8 0.53365312077246E-8 0.53124552431695E-8 0.52884468475635E-8 0.52645059062109E-8 - 0.52406323047038E-8 0.5216825928795E-8 0.51930866647906E-8 0.51694143991906E-8 0.51458090187582E-8 - 0.51222704106173E-8 0.50987984620638E-8 0.50753930608021E-8 0.50520540948208E-8 0.5028781452334E-8 - 0.50055750218742E-8 0.49824346921586E-8 0.49593603522836E-8 0.49363518917244E-8 0.49134092001122E-8 - 0.48905321674716E-8 0.48677206840311E-8 0.48449746402952E-8 0.48222939271773E-8 0.47996784357756E-8 - 0.47771280574848E-8 0.47546426839479E-8 0.47322222070002E-8 0.47098665190211E-8 0.46875755125178E-8 - 0.46653490803112E-8 0.46431871155291E-8 0.46210895114531E-8 0.45990561618354E-8 0.45770869606212E-8 - 0.45551818020094E-8 0.45333405804963E-8 0.45115631907291E-8 0.44898495278205E-8 0.44681994871419E-8 - 0.44466129642565E-8 0.44250898551148E-8 0.44036300558215E-8 0.43822334628276E-8 0.43608999729092E-8 - 0.43396294830253E-8 0.43184218904511E-8 0.42972770926485E-8 0.42761949873693E-8 0.42551754728037E-8 - 0.42342184472559E-8 0.42133238094112E-8 0.41924914582004E-8 0.41717212927681E-8 0.41510132125688E-8 - 0.41303671172098E-8 0.41097829066822E-8 0.40892604813165E-8 0.4068799741578E-8 0.40484005883244E-8 - 0.40280629225782E-8 0.40077866456387E-8 0.39875716591844E-8 0.39674178650508E-8 0.39473251653742E-8 - 0.3927293462509E-8 0.39073226590029E-8 0.38874126579061E-8 0.38675633623753E-8 0.3847774675879E-8 - 0.38280465021665E-8 0.38083787451145E-8 0.37887713090768E-8 0.37692240985563E-8 0.37497370183043E-8 - 0.3730309973356E-8 0.37109428688681E-8 0.36916356104686E-8 0.36723881039987E-8 0.36532002554872E-8 - 0.3634071971331E-8 0.36150031580542E-8 0.35959937225289E-8 0.35770435719186E-8 0.35581526135426E-8 - 0.35393207550515E-8 0.35205479042438E-8 0.35018339692119E-8 0.34831788584444E-8 0.34645824805339E-8 - 0.34460447444312E-8 0.34275655592855E-8 0.34091448344409E-8 0.33907824796619E-8 0.33724784048405E-8 - 0.33542325201495E-8 0.33360447360114E-8 0.33179149629486E-8 0.32998431120282E-8 0.32818290943994E-8 - 0.32638728214839E-8 0.32459742050344E-8 0.32281331569438E-8 0.32103495894117E-8 0.31926234148233E-8 - 0.31749545457604E-8 0.31573428952726E-8 0.31397883764948E-8 0.31222909028734E-8 0.31048503881079E-8 - 0.30874667460069E-8 0.30701398908559E-8 0.30528697370518E-8 0.30356561992369E-8 0.30184991923239E-8 - 0.30013986313201E-8 0.29843544317099E-8 0.29673665091461E-8 0.29504347794679E-8 0.29335591588659E-8 - 0.29167395636315E-8 0.28999759104088E-8 0.28832681160994E-8 0.28666160977425E-8 0.28500197727131E-8 - 0.28334790585015E-8 0.28169938728966E-8 0.28005641340447E-8 0.27841897601741E-8 0.27678706698845E-8 - 0.27516067819375E-8 0.27353980152904E-8 0.2719244289291E-8 0.27031455233915E-8 0.26871016373294E-8 - 0.26711125510648E-8 0.26551781846571E-8 0.26392984586838E-8 0.26234732937881E-8 0.26077026108763E-8 - 0.2591986331144E-8 0.25763243758705E-8 0.25607166667617E-8 0.25451631256764E-8 0.2529663674664E-8 - 0.25142182360752E-8 0.24988267323451E-8 0.24834890863008E-8 0.24682052210085E-8 0.24529750596347E-8 - 0.24377985257669E-8 0.24226755430905E-8 0.24076060355377E-8 0.23925899272998E-8 0.23776271426374E-8 - 0.23627176062907E-8 0.23478612431236E-8 0.23330579781899E-8 0.23183077368829E-8 0.23036104446702E-8 - 0.22889660273772E-8 0.22743744110503E-8 0.22598355218628E-8 0.22453492863257E-8 0.22309156310343E-8 - 0.22165344828913E-8 0.2202205769121E-8 0.21879294170118E-8 0.21737053542293E-8 0.21595335085626E-8 - 0.21454138080025E-8 0.21313461809102E-8 0.21173305557257E-8 0.21033668611824E-8 0.20894550262061E-8 - 0.20755949798272E-8 0.20617866515616E-8 0.20480299709706E-8 0.20343248678838E-8 0.2020671272398E-8 - 0.20070691146709E-8 0.19935183252967E-8 0.19800188349802E-8 0.19665705746185E-8 0.19531734754039E-8 - 0.19398274685874E-8 0.1926532485817E-8 0.19132884589379E-8 0.19000953199027E-8 0.18869530010503E-8 - 0.1873861434779E-8 0.18608205537782E-8 0.1847830291015E-8 0.18348905795335E-8 0.18220013527211E-8 - 0.18091625440664E-8 0.17963740872728E-8 0.17836359164294E-8 0.17709479656568E-8 0.17583101694258E-8 - 0.17457224623899E-8 0.17331847793267E-8 0.17206970553519E-8 0.17082592256377E-8 0.16958712256711E-8 - 0.16835329912228E-8 0.16712444581072E-8 0.16590055625388E-8 0.16468162408136E-8 0.1634676429438E-8 - 0.16225860652608E-8 0.16105450851877E-8 0.15985534264313E-8 0.15866110263652E-8 0.15747178224734E-8 - 0.15628737526991E-8 0.15510787550106E-8 0.1539332767651E-8 0.15276357291067E-8 0.15159875779031E-8 - 0.15043882530168E-8 0.14928376934916E-8 0.14813358385703E-8 0.14698826277814E-8 0.14584780006873E-8 - 0.14471218972575E-8 0.14358142576161E-8 0.14245550219986E-8 0.14133441310169E-8 0.14021815253175E-8 - 0.13910671458468E-8 0.1380000933798E-8 0.1368982830417E-8 0.13580127773235E-8 0.13470907161958E-8 - 0.13362165889394E-8 0.13253903378091E-8 0.13146119050805E-8 0.13038812333771E-8 0.12931982654627E-8 - 0.12825629442184E-8 0.12719752129299E-8 0.1261435014915E-8 0.12509422937578E-8 0.12404969932603E-8 - 0.1230099057243E-8 0.12197484300268E-8 0.12094450559491E-8 0.1199188879566E-8 0.11889798457411E-8 - 0.11788178993743E-8 0.11687029856965E-8 0.11586350500641E-8 0.1148614037955E-8 0.11386398952695E-8 - 0.11287125679142E-8 0.11188320020871E-8 0.11089981442051E-8 0.10992109407043E-8 0.10894703384876E-8 - 0.10797762844802E-8 0.10701287258193E-8 0.10605276099147E-8 0.10509728841874E-8 0.10414644964778E-8 - 0.10320023947388E-8 0.10225865270434E-8 0.10132168418255E-8 0.10038932875308E-8 0.99461581291927E-9 - 0.98538436696771E-9 0.97619889868979E-9 0.96705935748975E-9 0.9579656927937E-9 0.94891785426876E-9 - 0.93991579189863E-9 0.93095945566753E-9 0.92204879593196E-9 0.91318376314949E-9 0.90436430790552E-9 - 0.89559038116497E-9 0.88686193390299E-9 0.87817891737919E-9 0.86954128304455E-9 0.86094898236697E-9 - 0.85240196730295E-9 0.8439001898006E-9 0.83544360203504E-9 0.82703215645755E-9 0.81866580551804E-9 - 0.81034450207307E-9 0.80206819906904E-9 0.79383684958206E-9 0.7856504070162E-9 0.77750882477299E-9 - 0.76941205659796E-9 0.76136005643338E-9 0.7533527782413E-9 0.74539017641903E-9 0.73747220536915E-9 - 0.72959881971711E-9 0.72176997434501E-9 0.71398562412332E-9 0.70624572436187E-9 0.69855023043884E-9 - 0.69089909786481E-9 0.68329228249571E-9 0.67572974017645E-9 0.66821142708396E-9 0.66073729957538E-9 - 0.65330731404453E-9 0.64592142727766E-9 0.63857959605665E-9 0.63128177741818E-9 0.62402792868402E-9 - 0.61681800715775E-9 0.60965197053985E-9 0.60252977659042E-9 0.59545138321117E-9 0.58841674866232E-9 - 0.58142583119155E-9 0.57447858935321E-9 0.5675749818608E-9 0.56071496745478E-9 0.55389850533995E-9 - 0.54712555469291E-9 0.54039607493279E-9 0.53371002573334E-9 0.5270673667464E-9 0.52046805805195E-9 - 0.513912059776E-9 0.50739933218727E-9 0.50092983587373E-9 0.49450353139729E-9 0.48812037968166E-9 - 0.48178034180677E-9 0.47548337888466E-9 0.46922945243971E-9 0.46301852397244E-9 0.45685055524039E-9 - 0.45072550825645E-9 0.44464334501494E-9 0.4386040279051E-9 0.43260751934817E-9 0.42665378192188E-9 - 0.42074277856306E-9 0.41487447216691E-9 0.40904882597325E-9 0.40326580336268E-9 0.39752536775921E-9 - 0.39182748297861E-9 0.38617211280688E-9 0.38055922130181E-9 0.37498877277488E-9 0.36946073150145E-9 - 0.36397506218172E-9 0.35853172953115E-9 0.35313069842504E-9 0.3477719340757E-9 0.34245540166062E-9 - 0.3371810666906E-9 0.33194889480009E-9 0.32675885166346E-9 0.32161090339652E-9 0.31650501606728E-9 - 0.31144115600553E-9 0.30641928977184E-9 0.30143938388553E-9 0.29650140531945E-9 0.29160532104631E-9 - 0.28675109820007E-9 0.28193870422214E-9 0.27716810650584E-9 0.27243927282697E-9 0.2677521710755E-9 - 0.26310676918588E-9 0.25850303549529E-9 0.25394093829395E-9 0.24942044614898E-9 0.24494152785499E-9 - 0.24050415216685E-9 0.23610828827002E-9 0.23175390533593E-9 0.22744097271348E-9 0.22316946008327E-9 - 0.21893933706796E-9 0.21475057365543E-9 0.21060313993236E-9 0.20649700603633E-9 0.20243214251796E-9 - 0.19840851987489E-9 0.19442610887509E-9 0.19048488049615E-9 0.18658480565926E-9 0.18272585577367E-9 - 0.17890800220841E-9 0.17513121651304E-9 0.17139547054702E-9 0.16770073611467E-9 0.16404698538605E-9 - 0.16043419061536E-9 0.15686232411332E-9 0.15333135860924E-9 0.14984126676494E-9 0.14639202152787E-9 - 0.14298359605017E-9 0.13961596342417E-9 0.13628909722518E-9 0.13300297097923E-9 0.12975755839786E-9 - 0.12655283348588E-9 0.12338877017912E-9 0.12026534281907E-9 0.11718252581754E-9 0.11414029364777E-9 - 0.11113862117488E-9 0.10817748319439E-9 0.10525685480233E-9 0.1023767112925E-9 0.99537027899513E-10 - 0.96737780327785E-10 0.93978944218651E-10 0.9126049541571E-10 0.88582410066305E-10 0.85944664241151E-10 - 0.83347234408258E-10 0.80790097091558E-10 0.78273228884193E-10 0.7579660677513E-10 0.73360207677622E-10 - 0.7096400880429E-10 0.68607987549117E-10 0.66292121243707E-10 0.64016387717586E-10 0.61780764722758E-10 - 0.59585230215763E-10 0.57429762438993E-10 0.5531433955358E-10 0.53238940136073E-10 0.51203542804478E-10 - 0.49208126252848E-10 0.47252669554284E-10 0.45337151697468E-10 0.43461551993433E-10 0.41625849926816E-10 - 0.39830024924883E-10 0.38074056895759E-10 0.36357925667986E-10 0.34681611281053E-10 0.33045094050383E-10 - 0.31448354202707E-10 0.29891372396657E-10 0.28374129315374E-10 0.26896605724987E-10 0.25458782771274E-10 - 0.24060641508354E-10 0.22702163317708E-10 0.2138332973998E-10 0.20104122265926E-10 0.18864522858568E-10 - 0.17664513392887E-10 0.16504075967777E-10 0.15383192957389E-10 0.14301846640821E-10 0.13260019730933E-10 - 0.12257694950099E-10 0.11294855112714E-10 0.10371483410682E-10 0.94875629374466E-11 0.86430771202916E-11 - 0.78380095310327E-11 0.70723436946325E-11 0.63460636168343E-11 0.56591532053074E-11 0.50115965977311E-11 - 0.44033781949971E-11 0.38344822972775E-11 0.33048936536186E-11 0.28145970062726E-11 0.23635771978531E-11 - 0.19518194401425E-11 0.15793088385718E-11 0.12460308477349E-11 0.95197105416845E-12 0.69711500364319E-12 - 0.48144871423007E-12 0.30495809990602E-12 0.16762931441037E-12 0.69448766761193E-13 0.10402885315998E-13 - 0. 0.69359921501914E-1 0.13853998370442E0 0.20754052459671E0 0.27636188161334E0 - 0.34500439163036E0 0.41346839097836E0 0.48175421541995E0 0.54986220018139E0 0.61779267993589E0 - 0.68554598878954E0 0.75312246031861E0 0.82052242754039E0 0.88774622292626E0 0.95479417840045E0 - 0.10216666253409E1 0.108836389458E1 0.11548863164056E1 0.12212342205616E1 0.12874079362492E1 - 0.13534077921271E1 0.14192341163129E1 0.14848872363837E1 0.1550367479377E1 0.16156751717912E1 - 0.1680810639587E1 0.17457742081879E1 0.18105662024807E1 0.18751869468171E1 0.19396367650138E1 - 0.20039159803538E1 0.20680249155867E1 0.21319638929302E1 0.21957332340705E1 0.22593332601628E1 - 0.2322764291833E1 0.23860266491776E1 0.24491206517651E1 0.25120466186365E1 0.25748048683063E1 - 0.26373957187633E1 0.26998194874711E1 0.27620764913694E1 0.28241670468744E1 0.28860914698797E1 - 0.29478500757573E1 0.30094431793581E1 0.3070871095013E1 0.31321341365334E1 0.31932326172122E1 - 0.32541668498245E1 0.33149371466286E1 0.33755438193663E1 0.34359871792644E1 0.34962675370347E1 - 0.35563852028756E1 0.36163404864722E1 0.36761336969975E1 0.3735765143113E1 0.37952351329696E1 - 0.38545439742083E1 0.39136919739609E1 0.39726794388512E1 0.40315066749951E1 0.4090173988002E1 - 0.41486816829753E1 0.42070300645131E1 0.42652194367091E1 0.43232501031535E1 0.43811223669335E1 - 0.44388365306342E1 0.44963928963395E1 0.45537917656326E1 0.4611033439597E1 0.46681182188171E1 - 0.47250464033792E1 0.4781818292872E1 0.48384341863875E1 0.48948943825217E1 0.49511991793756E1 - 0.50073488745554E1 0.5063343765174E1 0.51191841478512E1 0.51748703187147E1 0.52304025734007E1 - 0.52857812070549E1 0.53410065143331E1 0.53960787894019E1 0.54509983259394E1 0.55057654171363E1 - 0.55603803556963E1 0.5614843433837E1 0.56691549432906E1 0.57233151753047E1 0.57773244206428E1 - 0.58311829695856E1 0.58848911119312E1 0.5938449136996E1 0.59918573336156E1 0.60451159901452E1 - 0.60982253944609E1 0.61511858339599E1 0.62039975955614E1 0.62566609657073E1 0.63091762303634E1 - 0.63615436750192E1 0.64137635846896E1 0.6465836243915E1 0.65177619367623E1 0.65695409468254E1 - 0.66211735572264E1 0.66726600506158E1 0.67240007091735E1 0.67751958146095E1 0.68262456481644E1 - 0.68771504906107E1 0.69279106222528E1 0.69785263229282E1 0.70289978720081E1 0.7079325548398E1 - 0.71295096305386E1 0.71795503964065E1 0.72294481235146E1 0.72792030889134E1 0.7328815569191E1 - 0.73782858404747E1 0.74276141784306E1 0.74768008582654E1 0.75258461547264E1 0.75747503421024E1 - 0.76235136942246E1 0.76721364844669E1 0.77206189857472E1 0.77689614705274E1 0.78171642108146E1 - 0.78652274781617E1 0.79131515436681E1 0.79609366779802E1 0.80085831512923E1 0.80560912333474E1 - 0.81034611934376E1 0.81506933004051E1 0.81977878226424E1 0.82447450280938E1 0.82915651842554E1 - 0.83382485581759E1 0.83847954164577E1 0.8431206025257E1 0.84774806502851E1 0.85236195568085E1 - 0.85696230096502E1 0.86154912731896E1 0.8661224611364E1 0.87068232876687E1 0.87522875651582E1 - 0.87976177064463E1 0.88428139737071E1 0.88878766286758E1 0.8932805932649E1 0.89776021464859E1 - 0.90222655306084E1 0.90667963450022E1 0.91111948492172E1 0.91554613023686E1 0.91995959631369E1 - 0.92435990897692E1 0.92874709400795E1 0.93312117714495E1 0.93748218408294E1 0.94183014047383E1 - 0.94616507192648E1 0.95048700400683E1 0.95479596223789E1 0.95909197209984E1 0.96337505903011E1 - 0.96764524842342E1 0.97190256563185E1 0.97614703596494E1 0.9803786846897E1 0.98459753703073E1 - 0.98880361817023E1 0.99299695324812E1 0.99717756736209E1 0.10013454855676E2 0.10055007328781E2 - 0.10096433342649E2 0.10137733146574E2 0.10178906989431E2 0.10219955119675E2 0.10260877785346E2 - 0.10301675234064E2 0.10342347713033E2 0.10382895469043E2 0.10423318748467E2 0.10463617797264E2 - 0.10503792860979E2 0.10543844184744E2 0.10583772013277E2 0.10623576590886E2 0.10663258161466E2 - 0.10702816968501E2 0.10742253255067E2 0.10781567263829E2 0.10820759237043E2 0.10859829416557E2 - 0.10898778043811E2 0.1093760535984E2 0.1097631160527E2 0.11014897020324E2 0.11053361844817E2 - 0.11091706318162E2 0.11129930679368E2 0.11168035167038E2 0.11206020019377E2 0.11243885474184E2 - 0.11281631768859E2 0.113192591404E2 0.11356767825407E2 0.11394158060078E2 0.11431430080215E2 - 0.11468584121218E2 0.11505620418093E2 0.11542539205448E2 0.11579340717494E2 0.11616025188045E2 - 0.11652592850524E2 0.11689043937956E2 0.11725378682973E2 0.11761597317813E2 0.11797700074324E2 - 0.11833687183959E2 0.11869558877781E2 0.11905315386462E2 0.11940956940283E2 0.11976483769137E2 - 0.12011896102527E2 0.12047194169567E2 0.12082378198983E2 0.12117448419117E2 0.1215240505792E2 - 0.1218724834296E2 0.12221978501418E2 0.12256595760091E2 0.12291100345392E2 0.1232549248335E2 - 0.12359772399611E2 0.12393940319438E2 0.12427996467714E2 0.12461941068938E2 0.12495774347232E2 - 0.12529496526334E2 0.12563107829606E2 0.12596608480029E2 0.12629998700206E2 0.12663278712364E2 - 0.1269644873835E2 0.12729508999637E2 0.12762459717321E2 0.12795301112124E2 0.1282803340439E2 - 0.12860656814093E2 0.12893171560831E2 0.12925577863829E2 0.1295787594194E2 0.12990066013646E2 - 0.13022148297057E2 0.13054123009911E2 0.13085990369578E2 0.13117750593057E2 0.13149403896981E2 - 0.1318095049761E2 0.13212390610839E2 0.13243724452197E2 0.13274952236843E2 0.13306074179572E2 - 0.13337090494814E2 0.13368001396632E2 0.13398807098727E2 0.13429507814434E2 0.13460103756727E2 - 0.13490595138216E2 0.13520982171148E2 0.13551265067411E2 0.13581444038529E2 0.13611519295669E2 - 0.13641491049635E2 0.13671359510874E2 0.13701124889473E2 0.13730787395161E2 0.1376034723731E2 - 0.13789804624934E2 0.13819159766692E2 0.13848412870886E2 0.13877564145462E2 0.13906613798012E2 - 0.13935562035775E2 0.13964409065634E2 0.1399315509412E2 0.14021800327412E2 0.14050344971335E2 - 0.14078789231364E2 0.14107133312624E2 0.14135377419887E2 0.14163521757577E2 0.14191566529769E2 - 0.14219511940187E2 0.14247358192209E2 0.14275105488865E2 0.14302754032837E2 0.1433030402646E2 - 0.14357755671725E2 0.14385109170276E2 0.14412364723412E2 0.14439522532088E2 0.14466582796915E2 - 0.14493545718162E2 0.14520411495752E2 0.14547180329268E2 0.14573852417952E2 0.14600427960702E2 - 0.14626907156079E2 0.146532902023E2 0.14679577297245E2 0.14705768638455E2 0.1473186442313E2 - 0.14757864848135E2 0.14783770109996E2 0.14809580404902E2 0.14835295928705E2 0.14860916876923E2 - 0.14886443444738E2 0.14911875826995E2 0.14937214218207E2 0.14962458812554E2 0.14987609803879E2 - 0.15012667385696E2 0.15037631751185E2 0.15062503093195E2 0.15087281604243E2 0.15111967476515E2 - 0.15136560901869E2 0.15161062071831E2 0.151854711776E2 0.15209788410043E2 0.15234013959704E2 - 0.15258148016794E2 0.15282190771202E2 0.15306142412486E2 0.1533000312988E2 0.15353773112294E2 - 0.1537745254831E2 0.15401041626188E2 0.15424540533862E2 0.15447949458944E2 0.15471268588723E2 - 0.15494498110163E2 0.15517638209911E2 0.15540689074287E2 0.15563650889294E2 0.15586523840612E2 - 0.15609308113604E2 0.15632003893311E2 0.15654611364456E2 0.15677130711443E2 0.15699562118358E2 - 0.15721905768972E2 0.15744161846735E2 0.15766330534784E2 0.15788412015939E2 0.15810406472703E2 - 0.15832314087266E2 0.15854135041503E2 0.15875869516975E2 0.15897517694929E2 0.159190797563E2 - 0.15940555881709E2 0.15961946251467E2 0.15983251045573E2 0.16004470443713E2 0.16025604625264E2 - 0.16046653769294E2 0.1606761805456E2 0.1608849765951E2 0.16109292762284E2 0.16130003540714E2 - 0.16150630172323E2 0.16171172834329E2 0.16191631703641E2 0.16212006956865E2 0.16232298770298E2 - 0.16252507319933E2 0.1627263278146E2 0.16292675330262E2 0.1631263514142E2 0.16332512389712E2 - 0.16352307249612E2 0.16372019895292E2 0.16391650500622E2 0.16411199239171E2 0.16430666284209E2 - 0.16450051808701E2 0.16469355985316E2 0.16488578986422E2 0.16507720984089E2 0.16526782150087E2 - 0.16545762655889E2 0.16564662672669E2 0.16583482371306E2 0.16602221922382E2 0.16620881496181E2 - 0.16639461262692E2 0.1665796139161E2 0.16676382052334E2 0.1669472341397E2 0.16712985645328E2 - 0.16731168914926E2 0.16749273390989E2 0.1676729924145E2 0.16785246633949E2 0.16803115735834E2 - 0.16820906714164E2 0.16838619735706E2 0.16856254966937E2 0.16873812574044E2 0.16891292722924E2 - 0.16908695579188E2 0.16926021308156E2 0.16943270074862E2 0.1696044204405E2 0.1697753738018E2 - 0.16994556247423E2 0.17011498809666E2 0.17028365230509E2 0.17045155673267E2 0.17061870300971E2 - 0.17078509276367E2 0.17095072761918E2 0.17111560919802E2 0.17127973911916E2 0.17144311899874E2 - 0.17160575045007E2 0.17176763508366E2 0.17192877450718E2 0.17208917032554E2 0.1722488241408E2 - 0.17240773755225E2 0.17256591215638E2 0.17272334954689E2 0.17288005131469E2 0.17303601904792E2 - 0.17319125433193E2 0.17334575874932E2 0.17349953387991E2 0.17365258130074E2 0.17380490258613E2 - 0.1739564993076E2 0.17410737303397E2 0.17425752533128E2 0.17440695776284E2 0.17455567188921E2 - 0.17470366926825E2 0.17485095145506E2 0.17499752000203E2 0.17514337645883E2 0.17528852237241E2 - 0.17543295928702E2 0.17557668874419E2 0.17571971228276E2 0.17586203143886E2 0.17600364774594E2 - 0.17614456273475E2 0.17628477793335E2 0.17642429486715E2 0.17656311505885E2 0.17670124002848E2 - 0.17683867129342E2 0.17697541036838E2 0.1771114587654E2 0.17724681799387E2 0.17738148956054E2 - 0.17751547496951E2 0.17764877572221E2 0.17778139331747E2 0.17791332925146E2 0.17804458501774E2 - 0.17817516210721E2 0.17830506200819E2 0.17843428620635E2 0.17856283618477E2 0.1786907134239E2 - 0.1788179194016E2 0.17894445559312E2 0.17907032347112E2 0.17919552450566E2 0.17932006016422E2 - 0.17944393191168E2 0.17956714121036E2 0.17968968951998E2 0.17981157829772E2 0.17993280899815E2 - 0.18005338307331E2 0.18017330197268E2 0.18029256714315E2 0.18041118002909E2 0.18052914207232E2 - 0.18064645471209E2 0.18076311938515E2 0.18087913752568E2 0.18099451056535E2 0.18110923993328E2 - 0.18122332705609E2 0.18133677335788E2 0.1814495802602E2 0.18156174918213E2 0.18167328154023E2 - 0.18178417874853E2 0.18189444221861E2 0.1820040733595E2 0.18211307357779E2 0.18222144427753E2 - 0.18232918686034E2 0.18243630272532E2 0.1825427932691E2 0.18264865988586E2 0.18275390396729E2 - 0.18285852690262E2 0.18296253007864E2 0.18306591487964E2 0.18316868268751E2 0.18327083488166E2 - 0.18337237283906E2 0.18347329793423E2 0.18357361153928E2 0.18367331502387E2 0.18377240975523E2 - 0.18387089709816E2 0.18396877841506E2 0.1840660550659E2 0.18416272840822E2 0.18425879979718E2 - 0.18435427058551E2 0.18444914212356E2 0.18454341575927E2 0.18463709283817E2 0.18473017470343E2 - 0.18482266269582E2 0.18491455815372E2 0.18500586241314E2 0.18509657680771E2 0.18518670266869E2 - 0.18527624132497E2 0.18536519410308E2 0.1854535623272E2 0.18554134731913E2 0.18562855039833E2 - 0.18571517288192E2 0.18580121608466E2 0.18588668131897E2 0.18597156989494E2 0.18605588312033E2 - 0.18613962230056E2 0.18622278873872E2 0.18630538373559E2 0.18638740858963E2 0.18646886459697E2 - 0.18654975305144E2 0.18663007524456E2 0.18670983246556E2 0.18678902600133E2 0.18686765713651E2 - 0.18694572715343E2 0.1870232373321E2 0.1871001889503E2 0.18717658328348E2 0.18725242160484E2 - 0.18732770518528E2 0.18740243529347E2 0.18747661319576E2 0.18755024015628E2 0.18762331743688E2 - 0.18769584629715E2 0.18776782799443E2 0.18783926378383E2 0.18791015491818E2 0.18798050264809E2 - 0.18805030822193E2 0.18811957288583E2 0.18818829788369E2 0.18825648445718E2 0.18832413384575E2 - 0.18839124728663E2 0.18845782601482E2 0.18852387126312E2 0.18858938426213E2 0.1886543662402E2 - 0.18871881842354E2 0.1887827420361E2 0.18884613829967E2 0.18890900843385E2 0.18897135365602E2 - 0.18903317518141E2 0.18909447422305E2 0.18915525199178E2 0.18921550969629E2 0.18927524854309E2 - 0.18933446973652E2 0.18939317447874E2 0.18945136396979E2 0.1895090394075E2 0.18956620198759E2 - 0.1896228529036E2 0.18967899334695E2 0.18973462450689E2 0.18978974757054E2 0.1898443637229E2 - 0.1898984741468E2 0.18995208002297E2 0.19000518253001E2 0.19005778284438E2 0.19010988214044E2 - 0.19016148159042E2 0.19021258236445E2 0.19026318563054E2 0.19031329255459E2 0.19036290430041E2 - 0.19041202202971E2 0.19046064690209E2 0.19050878007508E2 0.19055642270409E2 0.19060357594247E2 - 0.19065024094147E2 0.19069641885029E2 0.19074211081602E2 0.19078731798369E2 0.19083204149627E2 - 0.19087628249465E2 0.19092004211767E2 0.1909633215021E2 0.19100612178267E2 0.19104844409203E2 - 0.19109028956081E2 0.19113165931758E2 0.19117255448887E2 0.19121297619917E2 0.19125292557092E2 - 0.19129240372457E2 0.19133141177848E2 0.19136995084904E2 0.19140802205058E2 0.19144562649543E2 - 0.19148276529389E2 0.19151943955426E2 0.19155565038282E2 0.19159139888384E2 0.19162668615961E2 - 0.19166151331039E2 0.19169588143446E2 0.1917297916281E2 0.19176324498561E2 0.19179624259929E2 - 0.19182878555945E2 0.19186087495445E2 0.19189251187064E2 0.19192369739241E2 0.19195443260218E2 - 0.19198471858039E2 0.19201455640553E2 0.19204394715412E2 0.19207289190072E2 0.19210139171793E2 - 0.19212944767643E2 0.1921570608449E2 0.19218423229011E2 0.19221096307688E2 0.19223725426808E2 - 0.19226310692466E2 0.19228852210561E2 0.19231350086803E2 0.19233804426706E2 0.19236215335592E2 - 0.19238582918592E2 0.19240907280645E2 0.19243188526498E2 0.19245426760707E2 0.19247622087637E2 - 0.19249774611462E2 0.19251884436168E2 0.19253951665549E2 0.19255976403209E2 0.19257958752564E2 - 0.19259898816841E2 0.19261796699077E2 0.19263652502123E2 0.19265466328638E2 0.19267238281098E2 - 0.19268968461787E2 0.19270656972805E2 0.19272303916064E2 0.19273909393289E2 0.19275473506019E2 - 0.19276996355608E2 0.19278478043223E2 0.19279918669846E2 0.19281318336275E2 0.19282677143121E2 - 0.19283995190813E2 0.19285272579594E2 0.19286509409524E2 0.19287705780479E2 0.19288861792153E2 - 0.19289977544055E2 0.19291053135512E2 0.1929208866567E2 0.19293084233491E2 0.19294039937757E2 - 0.19294955877067E2 0.19295832149839E2 0.1929666885431E2 0.19297466088539E2 0.192982239504E2 - 0.19298942537592E2 0.19299621947629E2 0.1930026227785E2 0.19300863625413E2 0.19301426087298E2 - 0.19301949760304E2 0.19302434741054E2 0.19302881125993E2 0.19303289011388E2 0.19303658493328E2 - 0.19303989667725E2 0.19304282630314E2 0.19304537476655E2 0.1930475430213E2 0.19304933201945E2 - 0.19305074271132E2 0.19305177604546E2 0.19305243296867E2 0.19305271442601E2 0.19305262136079E2 - 0.19305215471458E2 0.19305131542721E2 0.19305010443676E2 0.19304852267959E2 0.19304657109034E2 - 0.19304425060188E2 0.19304156214541E2 0.19303850665036E2 0.19303508504447E2 0.19303129825373E2 - 0.19302714720247E2 0.19302263281324E2 0.19301775600694E2 0.19301251770273E2 0.19300691881809E2 - 0.19300096026876E2 0.19299464296883E2 0.19298796783067E2 0.19298093576496E2 0.19297354768069E2 - 0.19296580448516E2 0.192957707084E2 0.19294925638115E2 0.19294045327886E2 0.19293129867773E2 - 0.19292179347667E2 0.19291193857291E2 0.19290173486204E2 0.19289118323796E2 0.19288028459293E2 - 0.19286903981752E2 0.19285744980068E2 0.19284551542968E2 0.19283323759015E2 0.19282061716606E2 - 0.19280765503975E2 0.19279435209191E2 0.19278070920158E2 0.19276672724618E2 0.19275240710148E2 - 0.19273774964163E2 0.19272275573913E2 0.19270742626489E2 0.19269176208814E2 0.19267576407655E2 - 0.19265943309612E2 0.19264277001127E2 0.19262577568478E2 0.19260845097783E2 0.19259079674999E2 - 0.19257281385923E2 0.19255450316191E2 0.19253586551279E2 0.19251690176503E2 0.1924976127702E2 - 0.19247799937828E2 0.19245806243764E2 0.19243780279508E2 0.19241722129582E2 0.19239631878348E2 - 0.19237509610012E2 0.1923535540862E2 0.19233169358062E2 0.1923095154207E2 0.19228702044221E2 - 0.19226420947933E2 0.19224108336469E2 0.19221764292934E2 0.19219388900279E2 0.19216982241299E2 - 0.19214544398632E2 0.19212075454764E2 0.19209575492022E2 0.19207044592582E2 0.19204482838464E2 - 0.19201890311533E2 0.19199267093502E2 0.19196613265929E2 0.1919392891022E2 0.19191214107625E2 - 0.19188468939245E2 0.19185693486025E2 0.1918288782876E2 0.19180052048091E2 0.19177186224508E2 - 0.19174290438351E2 0.19171364769805E2 0.19168409298907E2 0.19165424105542E2 0.19162409269443E2 - 0.19159364870196E2 0.19156290987233E2 0.19153187699839E2 0.19150055087148E2 0.19146893228145E2 - 0.19143702201666E2 0.19140482086397E2 0.19137232960876E2 0.19133954903493E2 0.19130647992491E2 - 0.19127312305961E2 0.19123947921851E2 0.19120554917958E2 0.19117133371935E2 0.19113683361284E2 - 0.19110204963365E2 0.19106698255387E2 0.19103163314417E2 0.19099600217372E2 0.19096009041025E2 - 0.19092389862006E2 0.19088742756795E2 0.19085067801729E2 0.19081365073003E2 0.19077634646663E2 - 0.19073876598612E2 0.19070091004612E2 0.19066277940277E2 0.19062437481079E2 0.19058569702347E2 - 0.19054674679267E2 0.19050752486881E2 0.19046803200089E2 0.1904282689365E2 0.19038823642177E2 - 0.19034793520146E2 0.19030736601887E2 0.19026652961591E2 0.19022542673306E2 0.19018405810941E2 - 0.19014242448263E2 0.19010052658899E2 0.19005836516334E2 0.19001594093915E2 0.18997325464849E2 - 0.18993030702202E2 0.189887098789E2 0.18984363067734E2 0.18979990341351E2 0.18975591772262E2 - 0.1897116743284E2 0.18966717395318E2 0.18962241731791E2 0.18957740514218E2 0.18953213814419E2 - 0.18948661704078E2 0.18944084254739E2 0.18939481537813E2 0.18934853624571E2 0.1893020058615E2 - 0.18925522493549E2 0.18920819417632E2 0.18916091429128E2 0.18911338598628E2 0.1890656099659E2 - 0.18901758693337E2 0.18896931759054E2 0.18892080263796E2 0.1888720427748E2 0.1888230386989E2 - 0.18877379110676E2 0.18872430069355E2 0.18867456815309E2 0.18862459417789E2 0.1885743794591E2 - 0.18852392468656E2 0.18847323054878E2 0.18842229773294E2 0.18837112692492E2 0.18831971880924E2 - 0.18826807406914E2 0.18821619338653E2 0.18816407744201E2 0.18811172691486E2 0.18805914248306E2 - 0.18800632482328E2 0.18795327461089E2 0.18789999251994E2 0.18784647922321E2 0.18779273539216E2 - 0.18773876169696E2 0.18768455880649E2 0.18763012738832E2 0.18757546810876E2 0.18752058163282E2 - 0.1874654686242E2 0.18741012974537E2 0.18735456565747E2 0.18729877702038E2 0.18724276449271E2 - 0.18718652873179E2 0.18713007039367E2 0.18707339013314E2 0.18701648860372E2 0.18695936645766E2 - 0.18690202434594E2 0.1868444629183E2 0.18678668282321E2 0.18672868470786E2 0.18667046921822E2 - 0.18661203699899E2 0.1865533886936E2 0.18649452494427E2 0.18643544639195E2 0.18637615367634E2 - 0.18631664743591E2 0.18625692830788E2 0.18619699692823E2 0.18613685393172E2 0.18607649995185E2 - 0.18601593562091E2 0.18595516156995E2 0.18589417842879E2 0.18583298682601E2 0.185771587389E2 - 0.1857099807439E2 0.18564816751564E2 0.18558614832792E2 0.18552392380324E2 0.18546149456288E2 - 0.1853988612269E2 0.18533602441415E2 0.1852729847423E2 0.18520974282777E2 0.18514629928581E2 - 0.18508265473044E2 0.18501880977452E2 0.18495476502966E2 0.18489052110631E2 0.18482607861372E2 - 0.18476143815995E2 0.18469660035185E2 0.1846315657951E2 0.18456633509419E2 0.18450090885243E2 - 0.18443528767194E2 0.18436947215367E2 0.18430346289737E2 0.18423726050164E2 0.18417086556389E2 - 0.18410427868036E2 0.18403750044613E2 0.18397053145509E2 0.18390337229998E2 0.18383602357238E2 - 0.1837684858627E2 0.18370075976017E2 0.1836328458529E2 0.18356474472781E2 0.18349645697069E2 - 0.18342798316616E2 0.18335932389769E2 0.18329047974762E2 0.18322145129711E2 0.1831522391262E2 - 0.18308284381379E2 0.18301326593762E2 0.1829435060743E2 0.1828735647993E2 0.18280344268696E2 - 0.18273314031048E2 0.18266265824193E2 0.18259199705226E2 0.18252115731127E2 0.18245013958766E2 - 0.18237894444899E2 0.18230757246169E2 0.18223602419109E2 0.18216430020139E2 0.18209240105567E2 - 0.18202032731591E2 0.18194807954296E2 0.18187565829656E2 0.18180306413536E2 0.18173029761689E2 - 0.18165735929756E2 0.1815842497327E2 0.18151096947654E2 0.18143751908218E2 0.18136389910165E2 - 0.18129011008587E2 0.18121615258468E2 0.18114202714681E2 0.18106773431991E2 0.18099327465055E2 - 0.18091864868418E2 0.1808438569652E2 0.18076890003692E2 0.18069377844155E2 0.18061849272023E2 - 0.18054304341304E2 0.18046743105895E2 0.18039165619589E2 0.18031571936069E2 0.18023962108913E2 - 0.1801633619159E2 0.18008694237465E2 0.18001036299795E2 0.1799336243173E2 0.17985672686314E2 - 0.17977967116486E2 0.1797024577508E2 0.17962508714821E2 0.17954755988332E2 0.1794698764813E2 - 0.17939203746625E2 0.17931404336123E2 0.17923589468828E2 0.17915759196835E2 0.17907913572138E2 - 0.17900052646624E2 0.17892176472079E2 0.17884285100184E2 0.17876378582514E2 0.17868456970545E2 - 0.17860520315645E2 0.17852568669082E2 0.17844602082019E2 0.17836620605519E2 0.1782862429054E2 - 0.17820613187938E2 0.17812587348466E2 0.17804546822777E2 0.1779649166142E2 0.17788421914843E2 - 0.17780337633394E2 0.17772238867316E2 0.17764125666754E2 0.17755998081751E2 0.17747856162247E2 - 0.17739699958085E2 0.17731529519005E2 0.17723344894647E2 0.17715146134551E2 0.17706933288157E2 - 0.17698706404805E2 0.17690465533735E2 0.17682210724087E2 0.17673942024904E2 0.17665659485127E2 - 0.176573631536E2 0.17649053079066E2 0.17640729310171E2 0.17632391895463E2 0.17624040883389E2 - 0.17615676322301E2 0.17607298260451E2 0.17598906745992E2 0.17590501826983E2 0.17582083551382E2 - 0.17573651967052E2 0.17565207121757E2 0.17556749063164E2 0.17548277838845E2 0.17539793496274E2 - 0.17531296082828E2 0.17522785645788E2 0.1751426223234E2 0.17505725889573E2 0.17497176664478E2 - 0.17488614603955E2 0.17480039754803E2 0.17471452163731E2 0.17462851877349E2 0.17454238942172E2 - 0.17445613404623E2 0.17436975311027E2 0.17428324707617E2 0.17419661640528E2 0.17410986155806E2 - 0.17402298299398E2 0.1739359811716E2 0.17384885654853E2 0.17376160958145E2 0.17367424072609E2 - 0.17358675043727E2 0.17349913916885E2 0.1734114073738E2 0.17332355550413E2 0.17323558401092E2 - 0.17314749334436E2 0.17305928395367E2 0.17297095628718E2 0.1728825107923E2 0.1727939479155E2 - 0.17270526810235E2 0.17261647179749E2 0.17252755944467E2 0.17243853148671E2 0.17234938836551E2 - 0.17226013052207E2 0.1721707583965E2 0.17208127242798E2 0.17199167305479E2 0.17190196071432E2 - 0.17181213584303E2 0.17172219887651E2 0.17163215024943E2 0.17154199039558E2 0.17145171974785E2 - 0.17136133873822E2 0.1712708477978E2 0.17118024735679E2 0.17108953784451E2 0.1709987196894E2 - 0.170907793319E2 0.17081675915997E2 0.17072561763808E2 0.17063436917824E2 0.17054301420445E2 - 0.17045155313986E2 0.17035998640671E2 0.1702683144264E2 0.17017653761943E2 0.17008465640544E2 - 0.16999267120318E2 0.16990058243056E2 0.1698083905046E2 0.16971609584146E2 0.16962369885643E2 - 0.16953119996394E2 0.16943859957757E2 0.16934589811001E2 0.16925309597311E2 0.16916019357787E2 - 0.16906719133441E2 0.16897408965202E2 0.16888088893911E2 0.16878758960327E2 0.1686941920512E2 - 0.16860069668879E2 0.16850710392105E2 0.16841341415217E2 0.16831962778548E2 0.16822574522347E2 - 0.16813176686779E2 0.16803769311924E2 0.1679435243778E2 0.16784926104259E2 0.16775490351192E2 - 0.16766045218324E2 0.16756590745317E2 0.16747126971753E2 0.16737653937126E2 0.16728171680852E2 - 0.1671868024226E2 0.16709179660599E2 0.16699669975034E2 0.1669015122465E2 0.16680623448448E2 - 0.16671086685347E2 0.16661540974184E2 0.16651986353715E2 0.16642422862613E2 0.16632850539473E2 - 0.16623269422804E2 0.16613679551037E2 0.16604080962521E2 0.16594473695524E2 0.16584857788232E2 - 0.16575233278754E2 0.16565600205115E2 0.1655595860526E2 0.16546308517056E2 0.16536649978287E2 - 0.16526983026661E2 0.16517307699801E2 0.16507624035255E2 0.16497932070489E2 0.1648823184289E2 - 0.16478523389767E2 0.16468806748347E2 0.16459081955781E2 0.16449349049141E2 0.16439608065417E2 - 0.16429859041525E2 0.16420102014299E2 0.16410337020496E2 0.16400564096796E2 0.16390783279799E2 - 0.16380994606028E2 0.16371198111929E2 0.16361393833868E2 0.16351581808137E2 0.16341762070947E2 - 0.16331934658435E2 0.16322099606659E2 0.163122569516E2 0.16302406729163E2 0.16292548975177E2 - 0.16282683725392E2 0.16272811015484E2 0.16262930881051E2 0.16253043357617E2 0.16243148480627E2 - 0.16233246285452E2 0.16223336807388E2 0.16213420081654E2 0.16203496143394E2 0.16193565027675E2 - 0.16183626769493E2 0.16173681403763E2 0.16163728965331E2 0.16153769488964E2 0.16143803009355E2 - 0.16133829561125E2 0.16123849178817E2 0.16113861896902E2 0.16103867749776E2 0.16093866771761E2 - 0.16083858997106E2 0.16073844459984E2 0.16063823194497E2 0.1605379523467E2 0.16043760614459E2 - 0.16033719367744E2 0.16023671528331E2 0.16013617129956E2 0.16003556206278E2 0.15993488790888E2 - 0.15983414917301E2 0.15973334618959E2 0.15963247929235E2 0.15953154881426E2 0.1594305550876E2 - 0.15932949844391E2 0.15922837921401E2 0.15912719772802E2 0.15902595431533E2 0.1589246493046E2 - 0.15882328302381E2 0.15872185580021E2 0.15862036796034E2 0.15851881983001E2 0.15841721173437E2 - 0.1583155439978E2 0.15821381694403E2 0.15811203089606E2 0.15801018617616E2 0.15790828310595E2 - 0.15780632200632E2 0.15770430319745E2 0.15760222699883E2 0.15750009372927E2 0.15739790370686E2 - 0.157295657249E2 0.1571933546724E2 0.15709099629308E2 0.15698858242636E2 0.15688611338687E2 - 0.15678358948856E2 0.15668101104467E2 0.15657837836779E2 0.15647569176979E2 0.15637295156187E2 - 0.15627015805455E2 0.15616731155766E2 0.15606441238034E2 0.15596146083108E2 0.15585845721766E2 - 0.15575540184721E2 0.15565229502615E2 0.15554913706027E2 0.15544592825464E2 0.15534266891368E2 - 0.15523935934115E2 0.15513599984012E2 0.15503259071299E2 0.15492913226151E2 0.15482562478675E2 - 0.15472206858912E2 0.15461846396835E2 0.15451481122354E2 0.1544111106531E2 0.15430736255477E2 - 0.15420356722567E2 0.15409972496224E2 0.15399583606024E2 0.15389190081481E2 0.15378791952041E2 - 0.15368389247087E2 0.15357981995934E2 0.15347570227833E2 0.15337153971972E2 0.1532673325747E2 - 0.15316308113385E2 0.15305878568707E2 0.15295444652365E2 0.15285006393219E2 0.1527456382007E2 - 0.1526411696165E2 0.1525366584663E2 0.15243210503615E2 0.15232750961146E2 0.15222287247704E2 - 0.152118193917E2 0.15201347421488E2 0.15190871365353E2 0.15180391251519E2 0.15169907108149E2 - 0.15159418963338E2 0.15148926845123E2 0.15138430781474E2 0.15127930800301E2 0.15117426929449E2 - 0.15106919196703E2 0.15096407629784E2 0.15085892256351E2 0.15075373104E2 0.15064850200266E2 - 0.15054323572621E2 0.15043793248477E2 0.15033259255181E2 0.15022721620021E2 0.15012180370223E2 - 0.15001635532951E2 0.14991087135306E2 0.14980535204332E2 0.14969979767008E2 0.14959420850253E2 - 0.14948858480926E2 0.14938292685824E2 0.14927723491685E2 0.14917150925184E2 0.14906575012937E2 - 0.148959957815E2 0.14885413257368E2 0.14874827466975E2 0.14864238436696E2 0.14853646192847E2 - 0.14843050761681E2 0.14832452169396E2 0.14821850442126E2 0.14811245605947E2 0.14800637686875E2 - 0.1479002671087E2 0.14779412703827E2 0.14768795691588E2 0.1475817569993E2 0.14747552754577E2 - 0.14736926881189E2 0.14726298105371E2 0.14715666452668E2 0.14705031948566E2 0.14694394618493E2 - 0.14683754487819E2 0.14673111581857E2 0.14662465925859E2 0.14651817545022E2 0.14641166464483E2 - 0.14630512709322E2 0.14619856304562E2 0.14609197275167E2 0.14598535646046E2 0.14587871442048E2 - 0.14577204687966E2 0.14566535408535E2 0.14555863628436E2 0.14545189372288E2 0.14534512664657E2 - 0.14523833530051E2 0.14513151992922E2 0.14502468077665E2 0.14491781808618E2 0.14481093210063E2 - 0.14470402306227E2 0.1445970912128E2 0.14449013679334E2 0.14438316004449E2 0.14427616120626E2 - 0.14416914051811E2 0.14406209821896E2 0.14395503454716E2 0.14384794974049E2 0.14374084403622E2 - 0.14363371767102E2 0.14352657088105E2 0.14341940390189E2 0.14331221696858E2 0.14320501031562E2 - 0.14309778417695E2 0.14299053878597E2 0.14288327437553E2 0.14277599117795E2 0.14266868942498E2 - 0.14256136934786E2 0.14245403117727E2 0.14234667514334E2 0.14223930147568E2 0.14213191040335E2 - 0.14202450215487E2 0.14191707695825E2 0.14180963504092E2 0.14170217662981E2 0.1415947019513E2 - 0.14148721123125E2 0.14137970469497E2 0.14127218256725E2 0.14116464507236E2 0.14105709243403E2 - 0.14094952487545E2 0.1408419426193E2 0.14073434588773E2 0.14062673490236E2 0.1405191098843E2 - 0.14041147105412E2 0.14030381863188E2 0.1401961528371E2 0.1400884738888E2 0.13998078200548E2 - 0.1398730774051E2 0.13976536030512E2 0.13965763092249E2 0.13954988947362E2 0.13944213617443E2 - 0.13933437124031E2 0.13922659488615E2 0.13911880732631E2 0.13901100877465E2 0.13890319944452E2 - 0.13879537954876E2 0.13868754929971E2 0.13857970890918E2 0.13847185858849E2 0.13836399854846E2 - 0.13825612899939E2 0.13814825015109E2 0.13804036221285E2 0.13793246539347E2 0.13782455990125E2 - 0.13771664594399E2 0.13760872372898E2 0.13750079346303E2 0.13739285535244E2 0.137284909603E2 - 0.13717695642004E2 0.13706899600836E2 0.13696102857229E2 0.13685305431566E2 0.13674507344179E2 - 0.13663708615355E2 0.13652909265328E2 0.13642109314284E2 0.13631308782362E2 0.1362050768965E2 - 0.13609706056189E2 0.1359890390197E2 0.13588101246936E2 0.13577298110983E2 0.13566494513956E2 - 0.13555690475654E2 0.13544886015828E2 0.13534081154178E2 0.1352327591036E2 0.13512470303978E2 - 0.13501664354593E2 0.13490858081713E2 0.13480051504804E2 0.13469244643279E2 0.13458437516507E2 - 0.13447630143809E2 0.13436822544458E2 0.1342601473768E2 0.13415206742655E2 0.13404398578515E2 - 0.13393590264345E2 0.13382781819183E2 0.13371973262021E2 0.13361164611804E2 0.13350355887431E2 - 0.13339547107752E2 0.13328738291574E2 0.13317929457655E2 0.13307120624709E2 0.13296311811402E2 - 0.13285503036355E2 0.13274694318143E2 0.13263885675293E2 0.13253077126289E2 0.13242268689569E2 - 0.13231460383523E2 0.13220652226497E2 0.13209844236793E2 0.13199036432664E2 0.1318822883232E2 - 0.13177421453927E2 0.13166614315603E2 0.13155807435422E2 0.13145000831414E2 0.13134194521562E2 - 0.13123388523807E2 0.13112582856043E2 0.13101777536119E2 0.13090972581843E2 0.13080168010974E2 - 0.13069363841228E2 0.13058560090279E2 0.13047756775754E2 0.13036953915237E2 0.13026151526267E2 - 0.1301534962634E2 0.13004548232908E2 0.12993747363379E2 0.12982947035117E2 0.12972147265442E2 - 0.1296134807163E2 0.12950549470917E2 0.1293975148049E2 0.12928954117496E2 0.1291815739904E2 - 0.12907361342179E2 0.12896565963933E2 0.12885771281273E2 0.12874977311131E2 0.12864184070395E2 - 0.1285339157591E2 0.12842599844478E2 0.12831808892859E2 0.1282101873777E2 0.12810229395886E2 - 0.12799440883839E2 0.12788653218218E2 0.12777866415571E2 0.12767080492404E2 0.1275629546518E2 - 0.1274551135032E2 0.12734728164203E2 0.12723945923167E2 0.12713164643508E2 0.12702384341479E2 - 0.12691605033293E2 0.12680826735121E2 0.12670049463091E2 0.12659273233291E2 0.12648498061768E2 - 0.12637723964527E2 0.12626950957532E2 0.12616179056707E2 0.12605408277932E2 0.12594638637049E2 - 0.12583870149857E2 0.12573102832117E2 0.12562336699547E2 0.12551571767824E2 0.12540808052585E2 - 0.12530045569429E2 0.1251928433391E2 0.12508524361545E2 0.1249776566781E2 0.1248700826814E2 - 0.12476252177931E2 0.12465497412538E2 0.12454743987276E2 0.12443991917421E2 0.12433241218209E2 - 0.12422491904835E2 0.12411743992455E2 0.12400997496187E2 0.12390252431108E2 0.12379508812254E2 - 0.12368766654624E2 0.12358025973178E2 0.12347286782835E2 0.12336549098474E2 0.12325812934939E2 - 0.12315078307031E2 0.12304345229513E2 0.1229361371711E2 0.12282883784509E2 0.12272155446355E2 - 0.12261428717258E2 0.12250703611788E2 0.12239980144474E2 0.12229258329811E2 0.12218538182253E2 - 0.12207819716216E2 0.12197102946078E2 0.12186387886179E2 0.12175674550821E2 0.12164962954266E2 - 0.12154253110742E2 0.12143545034435E2 0.12132838739497E2 0.12122134240039E2 0.12111431550136E2 - 0.12100730683825E2 0.12090031655106E2 0.12079334477941E2 0.12068639166255E2 0.12057945733935E2 - 0.12047254194831E2 0.12036564562757E2 0.12025876851489E2 0.12015191074765E2 0.12004507246287E2 - 0.11993825379721E2 0.11983145488695E2 0.119724675868E2 0.11961791687592E2 0.11951117804588E2 - 0.1194044595127E2 0.11929776141084E2 0.11919108387438E2 0.11908442703704E2 0.1189777910322E2 - 0.11887117599284E2 0.11876458205162E2 0.1186580093408E2 0.1185514579923E2 0.11844492813769E2 - 0.11833841990816E2 0.11823193343456E2 0.11812546884737E2 0.11801902627672E2 0.11791260585238E2 - 0.11780620770377E2 0.11769983195995E2 0.11759347874964E2 0.11748714820118E2 0.11738084044258E2 - 0.11727455560149E2 0.11716829380521E2 0.1170620551807E2 0.11695583985454E2 0.11684964795299E2 - 0.11674347960196E2 0.11663733492699E2 0.11653121405329E2 0.11642511710573E2 0.1163190442088E2 - 0.1162129954867E2 0.11610697106323E2 0.11600097106187E2 0.11589499560578E2 0.11578904481773E2 - 0.11568311882017E2 0.11557721773523E2 0.11547134168467E2 0.11536549078992E2 0.11525966517207E2 - 0.11515386495187E2 0.11504809024973E2 0.11494234118573E2 0.11483661787961E2 0.11473092045076E2 - 0.11462524901827E2 0.11451960370085E2 0.11441398461692E2 0.11430839188452E2 0.11420282562139E2 - 0.11409728594493E2 0.11399177297221E2 0.11388628681996E2 0.11378082760458E2 0.11367539544216E2 - 0.11356999044843E2 0.11346461273882E2 0.11335926242841E2 0.11325393963197E2 0.11314864446392E2 - 0.11304337703838E2 0.11293813746913E2 0.11283292586964E2 0.11272774235302E2 0.1126225870321E2 - 0.11251746001936E2 0.11241236142697E2 0.11230729136676E2 0.11220224995027E2 0.11209723728869E2 - 0.1119922534929E2 0.11188729867346E2 0.11178237294063E2 0.11167747640431E2 0.11157260917413E2 - 0.11146777135937E2 0.111362963069E2 0.11125818441169E2 0.11115343549578E2 0.11104871642929E2 - 0.11094402731994E2 0.11083936827514E2 0.11073473940197E2 0.11063014080721E2 0.11052557259732E2 - 0.11042103487847E2 0.11031652775648E2 0.11021205133691E2 0.11010760572497E2 0.11000319102558E2 - 0.10989880734335E2 0.10979445478258E2 0.10969013344727E2 0.10958584344111E2 0.10948158486747E2 - 0.10937735782944E2 0.10927316242979E2 0.10916899877099E2 0.10906486695521E2 0.10896076708432E2 - 0.10885669925986E2 0.10875266358312E2 0.10864866015504E2 0.10854468907628E2 0.10844075044721E2 - 0.10833684436789E2 0.10823297093807E2 0.10812913025723E2 0.10802532242452E2 0.10792154753882E2 - 0.10781780569871E2 0.10771409700246E2 0.10761042154805E2 0.10750677943317E2 0.10740317075523E2 - 0.1072995956113E2 0.10719605409822E2 0.10709254631248E2 0.10698907235032E2 0.10688563230768E2 - 0.10678222628018E2 0.10667885436318E2 0.10657551665175E2 0.10647221324067E2 0.10636894422441E2 - 0.10626570969717E2 0.10616250975287E2 0.10605934448513E2 0.10595621398729E2 0.1058531183524E2 - 0.10575005767322E2 0.10564703204225E2 0.10554404155168E2 0.10544108629342E2 0.10533816635911E2 - 0.1052352818401E2 0.10513243282746E2 0.10502961941198E2 0.10492684168416E2 0.10482409973424E2 - 0.10472139365215E2 0.10461872352758E2 0.1045160894499E2 0.10441349150824E2 0.10431092979142E2 - 0.10420840438802E2 0.1041059153863E2 0.10400346287428E2 0.10390104693968E2 0.10379866766997E2 - 0.10369632515233E2 0.10359401947367E2 0.10349175072062E2 0.10338951897954E2 0.10328732433654E2 - 0.10318516687743E2 0.10308304668775E2 0.1029809638528E2 0.10287891845757E2 0.10277691058682E2 - 0.102674940325E2 0.10257300775633E2 0.10247111296473E2 0.10236925603388E2 0.10226743704719E2 - 0.10216565608777E2 0.10206391323851E2 0.10196220858201E2 0.10186054220062E2 0.10175891417639E2 - 0.10165732459116E2 0.10155577352646E2 0.10145426106359E2 0.10135278728357E2 0.10125135226716E2 - 0.10114995609487E2 0.10104859884694E2 0.10094728060335E2 0.10084600144383E2 0.10074476144783E2 - 0.10064356069457E2 0.100542399263E2 0.10044127723179E2 0.1003401946794E2 0.10023915168398E2 - 0.10013814832347E2 0.10003718467554E2 0.99936260817583E1 0.99835376826768E1 0.99734532779995E1 - 0.99633728753914E1 0.99532964824923E1 0.99432241069165E1 0.99331557562533E1 0.99230914380668E1 - 0.99130311598962E1 0.99029749292554E1 0.98929227536335E1 0.98828746404945E1 0.98728305972778E1 - 0.98627906313976E1 0.98527547502435E1 0.98427229611804E1 0.98326952715486E1 0.98226716886634E1 - 0.98126522198158E1 0.98026368722723E1 0.97926256532747E1 0.97826185700405E1 0.97726156297627E1 - 0.97626168396101E1 0.97526222067271E1 0.97426317382338E1 0.97326454412262E1 0.97226633227761E1 - 0.97126853899313E1 0.97027116497153E1 0.96927421091278E1 0.96827767751444E1 0.9672815654717E1 - 0.96628587547734E1 0.96529060822177E1 0.96429576439303E1 0.96330134467675E1 0.96230734975625E1 - 0.96131378031244E1 0.9603206370239E1 0.95932792056684E1 0.95833563161513E1 0.95734377084028E1 - 0.9563523389115E1 0.95536133649561E1 0.95437076425715E1 0.9533806228583E1 0.95239091295894E1 - 0.95140163521662E1 0.9504127902866E1 0.94942437882181E1 0.94843640147288E1 0.94744885888816E1 - 0.94646175171369E1 0.94547508059323E1 0.94448884616825E1 0.94350304907795E1 0.94251768995925E1 - 0.9415327694468E1 0.94054828817299E1 0.93956424676793E1 0.93858064585951E1 0.93759748607333E1 - 0.93661476803276E1 0.93563249235893E1 0.93465065967073E1 0.93366927058482E1 0.93268832571561E1 - 0.93170782567531E1 0.93072777107391E1 0.92974816251916E1 0.92876900061663E1 0.92779028596965E1 - 0.92681201917937E1 0.92583420084475E1 0.92485683156253E1 0.92387991192728E1 0.92290344253138E1 - 0.92192742396503E1 0.92095185681626E1 0.91997674167091E1 0.91900207911268E1 0.91802786972309E1 - 0.91705411408151E1 0.91608081276514E1 0.91510796634906E1 0.91413557540617E1 0.91316364050726E1 - 0.91219216222097E1 0.9112211411138E1 0.91025057775014E1 0.90928047269225E1 0.90831082650027E1 - 0.90734163973222E1 0.90637291294401E1 0.90540464668945E1 0.90443684152026E1 0.90346949798603E1 - 0.90250261663429E1 0.90153619801045E1 0.90057024265787E1 0.89960475111781E1 0.89863972392945E1 - 0.8976751616299E1 0.89671106475421E1 0.89574743383536E1 0.89478426940428E1 0.89382157198982E1 - 0.89285934211882E1 0.89189758031604E1 0.89093628710421E1 0.88997546300401E1 0.8890151085341E1 - 0.88805522421112E1 0.88709581054964E1 0.88613686806227E1 0.88517839725955E1 0.88422039865003E1 - 0.88326287274025E1 0.88230582003474E1 0.88134924103603E1 0.88039313624466E1 0.87943750615917E1 - 0.87848235127612E1 0.87752767209006E1 0.87657346909359E1 0.87561974277732E1 0.87466649362988E1 - 0.87371372213796E1 0.87276142878625E1 0.8718096140575E1 0.8708582784325E1 0.86990742239008E1 - 0.86895704640715E1 0.86800715095863E1 0.86705773651754E1 0.86610880355495E1 0.86516035253998E1 - 0.86421238393986E1 0.86326489821987E1 0.86231789584336E1 0.86137137727179E1 0.86042534296469E1 - 0.85947979337969E1 0.85853472897252E1 0.85759015019699E1 0.85664605750503E1 0.85570245134668E1 - 0.85475933217008E1 0.8538167004215E1 0.8528745565453E1 0.851932900984E1 0.85099173417822E1 - 0.85005105656673E1 0.84911086858641E1 0.84817117067232E1 0.84723196325761E1 0.84629324677361E1 - 0.8453550216498E1 0.84441728831381E1 0.84348004719141E1 0.84254329870656E1 0.84160704328137E1 - 0.84067128133613E1 0.83973601328927E1 0.83880123955745E1 0.83786696055547E1 0.83693317669634E1 - 0.83599988839122E1 0.83506709604952E1 0.83413480007879E1 0.83320300088482E1 0.83227169887158E1 - 0.83134089444126E1 0.83041058799426E1 0.82948077992917E1 0.82855147064285E1 0.82762266053033E1 - 0.82669434998489E1 0.82576653939805E1 0.82483922915954E1 0.82391241965735E1 0.82298611127768E1 - 0.82206030440501E1 0.82113499942205E1 0.82021019670975E1 0.81928589664734E1 0.8183620996123E1 - 0.81743880598037E1 0.81651601612556E1 0.81559373042014E1 0.81467194923467E1 0.81375067293797E1 - 0.81282990189716E1 0.81190963647763E1 0.81098987704307E1 0.81007062395545E1 0.80915187757504E1 - 0.80823363826042E1 0.80731590636845E1 0.80639868225432E1 0.80548196627151E1 0.80456575877182E1 - 0.80365006010539E1 0.80273487062064E1 0.80182019066435E1 0.8009060205816E1 0.79999236071583E1 - 0.79907921140878E1 0.79816657300055E1 0.79725444582959E1 0.79634283023268E1 0.79543172654495E1 - 0.79452113509988E1 0.79361105622933E1 0.79270149026348E1 0.79179243753091E1 0.79088389835854E1 - 0.78997587307167E1 0.78906836199398E1 0.78816136544752E1 0.78725488375271E1 0.78634891722837E1 - 0.78544346619171E1 0.78453853095832E1 0.78363411184217E1 0.78273020915566E1 0.78182682320956E1 - 0.78092395431307E1 0.78002160277379E1 0.77911976889771E1 0.77821845298927E1 0.7773176553513E1 - 0.77641737628506E1 0.77551761609024E1 0.77461837506495E1 0.77371965350574E1 0.77282145170759E1 - 0.77192376996391E1 0.77102660856656E1 0.77012996780586E1 0.76923384797055E1 0.76833824934783E1 - 0.76744317222337E1 0.76654861688128E1 0.76565458360413E1 0.76476107267299E1 0.76386808436735E1 - 0.76297561896519E1 0.76208367674298E1 0.76119225797566E1 0.76030136293662E1 0.75941099189779E1 - 0.75852114512955E1 0.75763182290077E1 0.75674302547884E1 0.75585475312962E1 0.75496700611749E1 - 0.75407978470532E1 0.7531930891545E1 0.75230691972492E1 0.751421276675E1 0.75053616026165E1 - 0.74965157074032E1 0.74876750836499E1 0.74788397338815E1 0.74700096606082E1 0.74611848663256E1 - 0.74523653535147E1 0.74435511246418E1 0.74347421821587E1 0.74259385285025E1 0.74171401660961E1 - 0.74083470973477E1 0.7399559324651E1 0.73907768503854E1 0.73819996769158E1 0.7373227806593E1 - 0.73644612417532E1 0.73556999847185E1 0.73469440377965E1 0.73381934032809E1 0.7329448083451E1 - 0.73207080805718E1 0.73119733968945E1 0.7303244034656E1 0.7294519996079E1 0.72858012833725E1 - 0.72770878987312E1 0.7268379844336E1 0.72596771223536E1 0.7250979734937E1 0.72422876842253E1 - 0.72336009723436E1 0.72249196014034E1 0.72162435735022E1 0.72075728907237E1 0.71989075551381E1 - 0.71902475688017E1 0.71815929337571E1 0.71729436520334E1 0.7164299725646E1 0.71556611565967E1 - 0.71470279468737E1 0.71384000984519E1 0.71297776132924E1 0.7121160493343E1 0.71125487405381E1 - 0.71039423567985E1 0.70953413440319E1 0.70867457041323E1 0.70781554389808E1 0.70695705504448E1 - 0.70609910403787E1 0.70524169106236E1 0.70438481630074E1 0.70352847993448E1 0.70267268214373E1 - 0.70181742310735E1 0.70096270300288E1 0.70010852200654E1 0.69925488029326E1 0.69840177803668E1 - 0.69754921540912E1 0.69669719258163E1 0.69584570972395E1 0.69499476700454E1 0.69414436459058E1 - 0.69329450264796E1 0.69244518134129E1 0.69159640083391E1 0.69074816128787E1 0.68990046286397E1 - 0.68905330572173E1 0.68820669001941E1 0.68736061591399E1 0.68651508356122E1 0.68567009311557E1 - 0.68482564473026E1 0.68398173855727E1 0.68313837474731E1 0.68229555344988E1 0.68145327481319E1 - 0.68061153898426E1 0.67977034610882E1 0.67892969633142E1 0.67808958979533E1 0.67725002664262E1 - 0.67641100701412E1 0.67557253104946E1 0.67473459888701E1 0.67389721066396E1 0.67306036651626E1 - 0.67222406657866E1 0.67138831098469E1 0.67055309986669E1 0.66971843335578E1 0.66888431158188E1 - 0.66805073467372E1 0.66721770275882E1 0.66638521596353E1 0.66555327441299E1 0.66472187823115E1 - 0.66389102754079E1 0.66306072246349E1 0.66223096311967E1 0.66140174962855E1 0.6605730821082E1 - 0.65974496067549E1 0.65891738544615E1 0.65809035653472E1 0.65726387405459E1 0.65643793811799E1 - 0.65561254883597E1 0.65478770631846E1 0.6539634106742E1 0.65313966201081E1 0.65231646043473E1 - 0.65149380605129E1 0.65067169896465E1 0.64985013927785E1 0.64902912709276E1 0.64820866251016E1 - 0.64738874562967E1 0.64656937654977E1 0.64575055536784E1 0.64493228218012E1 0.64411455708173E1 - 0.64329738016668E1 0.64248075152784E1 0.64166467125699E1 0.64084913944478E1 0.64003415618014E1 - 0.63921972155256E1 0.63840583565079E1 0.63759249856281E1 0.63677971037069E1 0.63596747115598E1 - 0.63515578100169E1 0.63434463999102E1 0.63353404820538E1 0.63272400572469E1 0.63191451262773E1 - 0.63110556899247E1 0.63029717489637E1 0.6294893304173E1 0.62868203562735E1 0.62787529059738E1 - 0.62706909539729E1 0.6262634501005E1 0.6254583547778E1 0.62465380949864E1 0.62384981433124E1 - 0.6230463693426E1 0.62224347459956E1 0.62144113016853E1 0.6206393361135E1 0.61983809249449E1 - 0.61903739937005E1 0.61823725680241E1 0.61743766485244E1 0.61663862357956E1 0.61584013304126E1 - 0.61504219329404E1 0.61424480439399E1 0.61344796639643E1 0.61265167935722E1 0.61185594332562E1 - 0.61106075835061E1 0.61026612448188E1 0.60947204177125E1 0.60867851026845E1 0.6078855300212E1 - 0.60709310107677E1 0.60630122348163E1 0.60550989727757E1 0.6047191225054E1 0.60392889920814E1 - 0.60313922742891E1 0.60235010720975E1 0.60156153858914E1 0.60077352160498E1 0.59998605629494E1 - 0.59919914269701E1 0.59841278084936E1 0.59762697078424E1 0.59684171253312E1 0.5960570061269E1 - 0.59527285159915E1 0.59448924898134E1 0.59370619830339E1 0.59292369959405E1 0.5921417528807E1 - 0.59136035819178E1 0.59057951555532E1 0.58979922499628E1 0.5890194865356E1 0.58824030019273E1 - 0.58746166599181E1 0.58668358395522E1 0.58590605410401E1 0.58512907645744E1 0.58435265103392E1 - 0.58357677785151E1 0.58280145692758E1 0.58202668827991E1 0.58125247191979E1 0.58047880785826E1 - 0.5797056961076E1 0.57893313668198E1 0.578161129594E1 0.57738967485328E1 0.57661877246882E1 - 0.57584842244884E1 0.57507862480144E1 0.57430937953468E1 0.57354068665283E1 0.57277254615854E1 - 0.57200495805291E1 0.5712379223413E1 0.57047143902643E1 0.56970550811006E1 0.56894012959299E1 - 0.56817530347711E1 0.56741102975783E1 0.56664730843E1 0.56588413948836E1 0.56512152293144E1 - 0.56435945875574E1 0.56359794695507E1 0.56283698752206E1 0.56207658044826E1 0.56131672572657E1 - 0.56055742334951E1 0.55979867330631E1 0.55904047558297E1 0.55828283016373E1 0.55752573703816E1 - 0.55676919619387E1 0.55601320761712E1 0.55525777129166E1 0.55450288720048E1 0.55374855532717E1 - 0.55299477565482E1 0.55224154816638E1 0.55148887283928E1 0.55073674965084E1 0.54998517857932E1 - 0.54923415960376E1 0.54848369270164E1 0.54773377784916E1 0.54698441502165E1 0.54623560419377E1 - 0.54548734533992E1 0.54473963843489E1 0.54399248344891E1 0.54324588035081E1 0.54249982910814E1 - 0.5417543296934E1 0.54100938207694E1 0.54026498622685E1 0.53952114210926E1 0.53877784968915E1 - 0.53803510893357E1 0.53729291980882E1 0.53655128227945E1 0.53581019630559E1 0.53506966184623E1 - 0.53432967886436E1 0.53359024732237E1 0.53285136718141E1 0.53211303840054E1 0.53137526093944E1 - 0.53063803475503E1 0.52990135980208E1 0.52916523603371E1 0.52842966340718E1 0.52769464187781E1 - 0.52696017139977E1 0.52622625192547E1 0.52549288340643E1 0.52476006579481E1 0.52402779904235E1 - 0.52329608310047E1 0.52256491791514E1 0.52183430343195E1 0.52110423959854E1 0.52037472636346E1 - 0.51964576367416E1 0.51891735147533E1 0.51818948971143E1 0.5174621783263E1 0.51673541726343E1 - 0.51600920646649E1 0.51528354587545E1 0.51455843542928E1 0.51383387506613E1 0.51310986472732E1 - 0.51238640435229E1 0.51166349387913E1 0.51094113324487E1 0.51021932238602E1 0.50949806123833E1 - 0.50877734973716E1 0.50805718781666E1 0.50733757540908E1 0.5066185124457E1 0.50589999885909E1 - 0.50518203458103E1 0.50446461954241E1 0.50374775367294E1 0.50303143690133E1 0.50231566915661E1 - 0.50160045036756E1 0.50088578046373E1 0.50017165936846E1 0.49945808700554E1 0.49874506329902E1 - 0.49803258817438E1 0.49732066155502E1 0.49660928336476E1 0.4958984535271E1 0.49518817196497E1 - 0.49447843859628E1 0.49376925333879E1 0.49306061611207E1 0.49235252683599E1 0.49164498542939E1 - 0.49093799180928E1 0.49023154589213E1 0.4895256475941E1 0.48882029683132E1 0.48811549352027E1 - 0.48741123757315E1 0.48670752890157E1 0.48600436741674E1 0.48530175303177E1 0.48459968565801E1 - 0.48389816520642E1 0.48319719158742E1 0.48249676471078E1 0.48179688448527E1 0.48109755081966E1 - 0.48039876362115E1 0.47970052279484E1 0.47900282824492E1 0.47830567987752E1 0.47760907759776E1 - 0.47691302130988E1 0.4762175109177E1 0.47552254632424E1 0.47482812743203E1 0.47413425414312E1 - 0.47344092636006E1 0.47274814398048E1 0.472055906902E1 0.47136421502302E1 0.47067306824339E1 - 0.46998246646174E1 0.46929240957487E1 0.46860289747894E1 0.46791393006964E1 0.46722550724336E1 - 0.46653762889675E1 0.46585029492248E1 0.46516350521171E1 0.46447725965439E1 0.46379155814495E1 - 0.46310640057539E1 0.46242178683725E1 0.46173771682161E1 0.46105419042069E1 0.46037120752092E1 - 0.45968876800844E1 0.45900687176939E1 0.45832551869335E1 0.45764470866832E1 0.45696444158017E1 - 0.45628471731377E1 0.45560553575313E1 0.45492689678419E1 0.45424880029257E1 0.45357124616116E1 - 0.45289423426986E1 0.45221776449731E1 0.45154183672699E1 0.45086645084097E1 0.45019160672025E1 - 0.44951730424378E1 0.44884354328999E1 0.44817032373799E1 0.4474976454667E1 0.44682550835518E1 - 0.44615391227748E1 0.44548285710776E1 0.44481234272125E1 0.44414236899414E1 0.44347293580142E1 - 0.44280404301698E1 0.44213569051404E1 0.44146787816546E1 0.44080060584454E1 0.44013387342514E1 - 0.43946768077674E1 0.43880202776768E1 0.43813691426541E1 0.43747234014164E1 0.43680830526602E1 - 0.43614480950727E1 0.43548185273328E1 0.43481943481175E1 0.43415755560921E1 0.43349621499199E1 - 0.43283541282571E1 0.43217514897458E1 0.43151542330203E1 0.43085623567243E1 0.43019758594959E1 - 0.42953947399637E1 0.42888189967625E1 0.42822486285284E1 0.42756836338704E1 0.42691240113751E1 - 0.42625697596168E1 0.42560208772138E1 0.42494773627678E1 0.42429392148721E1 0.42364064321056E1 - 0.42298790130414E1 0.42233569562587E1 0.4216840260335E1 0.42103289238484E1 0.42038229453252E1 - 0.41973223232908E1 0.41908270562913E1 0.41843371428847E1 0.41778525816215E1 0.41713733710243E1 - 0.41648995096132E1 0.41584309959083E1 0.41519678284404E1 0.41455100057445E1 0.41390575263087E1 - 0.41326103886128E1 0.41261685911319E1 0.41197321323767E1 0.41133010108408E1 0.41068752250075E1 - 0.41004547733522E1 0.40940396543423E1 0.40876298664549E1 0.4081225408165E1 0.40748262779331E1 - 0.40684324741875E1 0.40620439953489E1 0.40556608398714E1 0.40492830062035E1 0.40429104927854E1 - 0.40365432980447E1 0.40301814204069E1 0.40238248582933E1 0.40174736101204E1 0.40111276743113E1 - 0.40047870492504E1 0.39984517333218E1 0.39921217249142E1 0.3985797022433E1 0.39794776242706E1 - 0.39731635288124E1 0.39668547344422E1 0.39605512395415E1 0.3954253042459E1 0.39479601415394E1 - 0.39416725351447E1 0.39353902216421E1 0.39291131993962E1 0.39228414667402E1 0.39165750220071E1 - 0.39103138635321E1 0.39040579896605E1 0.38978073987396E1 0.3891562089077E1 0.38853220589766E1 - 0.38790873067402E1 0.3872857830687E1 0.3866633629122E1 0.38604147003495E1 0.38542010426702E1 - 0.384799265438E1 0.384178953377E1 0.38355916791343E1 0.38293990887513E1 0.38232117608753E1 - 0.38170296937475E1 0.38108528856543E1 0.38046813348752E1 0.3798515039682E1 0.37923539983143E1 - 0.37861982090114E1 0.37800476700236E1 0.37739023796025E1 0.37677623360044E1 0.37616275374374E1 - 0.37554979821119E1 0.37493736682477E1 0.37432545940814E1 0.37371407578417E1 0.3731032157736E1 - 0.37249287919677E1 0.37188306587376E1 0.37127377562595E1 0.37066500827507E1 0.37005676363907E1 - 0.36944904153449E1 0.36884184177668E1 0.36823516418625E1 0.36762900858148E1 0.36702337478027E1 - 0.36641826260016E1 0.36581367185996E1 0.36520960237328E1 0.36460605395356E1 0.36400302641432E1 - 0.36340051957246E1 0.36279853324347E1 0.3621970672414E1 0.36159612137961E1 0.36099569547079E1 - 0.36039578932928E1 0.35979640276922E1 0.35919753560276E1 0.35859918763965E1 0.35800135868903E1 - 0.35740404856313E1 0.35680725707327E1 0.35621098403014E1 0.35561522924334E1 0.35501999252181E1 - 0.35442527367553E1 0.35383107251455E1 0.35323738884955E1 0.35264422248549E1 0.35205157322782E1 - 0.35145944088345E1 0.35086782526088E1 0.35027672616753E1 0.34968614340966E1 0.34909607679337E1 - 0.34850652612443E1 0.34791749120811E1 0.34732897185014E1 0.346740967854E1 0.34615347902237E1 - 0.34556650515697E1 0.34498004606347E1 0.34439410154633E1 0.34380867140843E1 0.34322375545109E1 - 0.34263935347494E1 0.34205546528311E1 0.34147209067839E1 0.34088922946257E1 0.34030688143371E1 - 0.33972504638936E1 0.33914372413063E1 0.3385629144588E1 0.33798261717452E1 0.33740283207607E1 - 0.33682355896262E1 0.33624479763197E1 0.33566654788072E1 0.33508880950437E1 0.33451158230172E1 - 0.33393486607045E1 0.33335866060768E1 0.33278296570968E1 0.33220778117266E1 0.33163310679233E1 - 0.33105894236417E1 0.33048528768379E1 0.32991214254419E1 0.32933950673805E1 0.32876738005939E1 - 0.32819576230319E1 0.32762465326387E1 0.32705405273411E1 0.32648396050656E1 0.32591437637369E1 - 0.32534530012803E1 0.32477673156242E1 0.32420867046732E1 0.32364111663263E1 0.3230740698478E1 - 0.32250752990478E1 0.32194149659436E1 0.32137596970669E1 0.32081094903138E1 0.32024643435768E1 - 0.31968242547508E1 0.31911892217299E1 0.31855592424004E1 0.31799343146287E1 0.31743144362756E1 - 0.31686996052242E1 0.31630898193553E1 0.31574850765449E1 0.31518853746546E1 0.31462907115435E1 - 0.31407010850755E1 0.31351164931167E1 0.31295369335416E1 0.31239624041782E1 0.3118392902858E1 - 0.31128284274189E1 0.3107268975721E1 0.31017145456129E1 0.30961651349346E1 0.30906207415258E1 - 0.30850813632269E1 0.30795469978481E1 0.3074017643198E1 0.30684932971017E1 0.3062973957391E1 - 0.3057459621893E1 0.30519502884186E1 0.30464459547777E1 0.30409466187809E1 0.30354522782433E1 - 0.30299629309865E1 0.3024478574796E1 0.30189992074534E1 0.30135248267385E1 0.3008055430462E1 - 0.30025910164221E1 0.29971315824083E1 0.29916771262048E1 0.29862276455913E1 0.29807831383557E1 - 0.29753436022854E1 0.29699090351566E1 0.29644794347253E1 0.2959054798743E1 0.2953635124985E1 - 0.2948220411222E1 0.29428106552199E1 0.29374058547381E1 0.29320060075335E1 0.29266111113638E1 - 0.29212211639858E1 0.2915836163164E1 0.29104561066239E1 0.29050809920935E1 0.28997108173096E1 - 0.28943455800264E1 0.28889852779923E1 0.28836299089373E1 0.28782794705893E1 0.2872933960675E1 - 0.28675933769302E1 0.28622577170922E1 0.28569269788767E1 0.28516011599902E1 0.28462802581317E1 - 0.28409642710325E1 0.28356531964111E1 0.28303470319817E1 0.28250457754535E1 0.28197494245428E1 - 0.28144579769408E1 0.28091714303357E1 0.28038897824158E1 0.27986130308927E1 0.27933411734714E1 - 0.27880742078416E1 0.27828121316869E1 0.27775549426851E1 0.27723026385351E1 0.2767055216933E1 - 0.27618126755596E1 0.27565750120757E1 0.2751342224136E1 0.27461143094275E1 0.27408912656314E1 - 0.27356730904236E1 0.27304597814638E1 0.27252513364093E1 0.27200477529286E1 0.27148490286924E1 - 0.27096551613757E1 0.27044661486136E1 0.2699281988046E1 0.26941026773213E1 0.26889282140992E1 - 0.26837585960325E1 0.26785938207669E1 0.26734338859466E1 0.26682787892138E1 0.2663128528214E1 - 0.26579831005973E1 0.265284250399E1 0.26477067360109E1 0.26425757942733E1 0.26374496764222E1 - 0.26323283800909E1 0.26272119029071E1 0.26221002424934E1 0.261699339647E1 0.26118913624606E1 - 0.26067941380892E1 0.26017017209752E1 0.25966141087162E1 0.25915312989056E1 0.25864532891595E1 - 0.25813800770957E1 0.25763116603274E1 0.25712480364569E1 0.25661892030915E1 0.2561135157829E1 - 0.25560858982577E1 0.25510414219598E1 0.25460017265386E1 0.25409668095913E1 0.25359366687113E1 - 0.25309113014845E1 0.25258907054938E1 0.25208748783291E1 0.25158638175812E1 0.2510857520843E1 - 0.25058559856764E1 0.25008592096461E1 0.24958671903262E1 0.24908799252987E1 0.24858974121409E1 - 0.24809196484215E1 0.24759466317069E1 0.24709783595641E1 0.24660148295673E1 0.24610560392961E1 - 0.24561019863006E1 0.24511526681262E1 0.24462080823151E1 0.24412682264382E1 0.24363330980563E1 - 0.24314026947236E1 0.24264770139891E1 0.24215560533968E1 0.24166398105046E1 0.24117282828698E1 - 0.2406821468042E1 0.24019193635468E1 0.23970219669093E1 0.23921292756745E1 0.23872412873862E1 - 0.23823579995839E1 0.23774794098042E1 0.23726055155817E1 0.23677363144511E1 0.23628718039464E1 - 0.23580119816105E1 0.23531568449519E1 0.23483063914818E1 0.23434606187167E1 0.23386195241922E1 - 0.23337831054338E1 0.23289513599652E1 0.23241242853103E1 0.23193018789953E1 0.23144841385182E1 - 0.23096710613795E1 0.23048626450906E1 0.23000588871683E1 0.22952597851251E1 0.22904653364677E1 - 0.22856755387012E1 0.2280890389331E1 0.22761098858658E1 0.22713340258184E1 0.22665628066796E1 - 0.22617962259381E1 0.22570342810815E1 0.22522769696137E1 0.22475242890301E1 0.22427762368258E1 - 0.22380328104957E1 0.2233294007533E1 0.2228559825428E1 0.2223830261674E1 0.22191053137575E1 - 0.22143849791503E1 0.22096692553196E1 0.22049581397557E1 0.22002516299465E1 0.21955497233767E1 - 0.21908524175219E1 0.21861597098573E1 0.21814715978611E1 0.21767880790121E1 0.21721091507952E1 - 0.21674348106668E1 0.21627650560872E1 0.2158099884521E1 0.21534392934434E1 0.21487832803239E1 - 0.21441318426282E1 0.2139484977821E1 0.21348426833651E1 0.21302049567275E1 0.21255717953783E1 - 0.21209431967715E1 0.21163191583543E1 0.21116996775681E1 0.21070847518808E1 0.21024743787499E1 - 0.20978685556318E1 0.20932672799816E1 0.20886705492612E1 0.20840783609098E1 0.20794907123654E1 - 0.20749076010664E1 0.2070329024469E1 0.2065754980023E1 0.20611854651721E1 0.20566204773573E1 - 0.20520600140147E1 0.20475040725964E1 0.2042952650554E1 0.20384057453274E1 0.20338633543371E1 - 0.20293254750015E1 0.20247921047649E1 0.20202632410669E1 0.20157388813443E1 0.20112190230276E1 - 0.2006703663545E1 0.20021928003311E1 0.19976864308223E1 0.19931845524629E1 0.1988687162657E1 - 0.19841942588131E1 0.19797058383523E1 0.1975221898714E1 0.19707424373342E1 0.19662674516281E1 - 0.19617969390113E1 0.19573308969E1 0.19528693227234E1 0.19484122139134E1 0.1943959567879E1 - 0.1939511382023E1 0.1935067653744E1 0.19306283804695E1 0.19261935596164E1 0.19217631885995E1 - 0.19173372648307E1 0.1912915785721E1 0.19084987486833E1 0.19040861511322E1 0.18996779904797E1 - 0.18952742641178E1 0.18908749694359E1 0.18864801038448E1 0.1882089664758E1 0.1877703649585E1 - 0.18733220557297E1 0.18689448806023E1 0.18645721216016E1 0.18602037761138E1 0.18558398415191E1 - 0.1851480315229E1 0.18471251946492E1 0.18427744771816E1 0.18384281602168E1 0.18340862411451E1 - 0.18297487173649E1 0.18254155862761E1 0.18210868452816E1 0.18167624917584E1 0.18124425230869E1 - 0.18081269366559E1 0.18038157298626E1 0.1799508900101E1 0.17952064447577E1 0.17909083612194E1 - 0.17866146468727E1 0.17823252991083E1 0.17780403153207E1 0.17737596928871E1 0.17694834291808E1 - 0.1765211521573E1 0.17609439674573E1 0.17566807642202E1 0.17524219092439E1 0.17481673999068E1 - 0.17439172335849E1 0.17396714076633E1 0.17354299195273E1 0.17311927665579E1 0.17269599461187E1 - 0.17227314555718E1 0.17185072922981E1 0.17142874536791E1 0.17100719370943E1 0.17058607399146E1 - 0.17016538595115E1 0.16974512932591E1 0.16932530385338E1 0.16890590927179E1 0.16848694531686E1 - 0.16806841172458E1 0.16765030823129E1 0.16723263457484E1 0.16681539049234E1 0.16639857572087E1 - 0.16598218999761E1 0.16556623306001E1 0.16515070464324E1 0.16473560448267E1 0.16432093231479E1 - 0.16390668787682E1 0.16349287090576E1 0.16307948113766E1 0.16266651830859E1 0.16225398215481E1 - 0.16184187241301E1 0.16143018882036E1 0.16101893111204E1 0.16060809902302E1 0.16019769228819E1 - 0.15978771064457E1 0.15937815382854E1 0.15896902157599E1 0.15856031362257E1 0.15815202970366E1 - 0.15774416955558E1 0.15733673291471E1 0.15692971951682E1 0.15652312909616E1 0.15611696138672E1 - 0.15571121612469E1 0.15530589304617E1 0.15490099188709E1 0.15449651238238E1 0.15409245426702E1 - 0.15368881727651E1 0.15328560114656E1 0.15288280561357E1 0.15248043041117E1 0.15207847527341E1 - 0.15167693993489E1 0.15127582413152E1 0.15087512759885E1 0.15047485007168E1 0.15007499128474E1 - 0.14967555097273E1 0.14927652887106E1 0.14887792471538E1 0.14847973824002E1 0.14808196917875E1 - 0.14768461726489E1 0.14728768223413E1 0.14689116382132E1 0.14649506176132E1 0.1460993757891E1 - 0.14570410564034E1 0.14530925104848E1 0.14491481174689E1 0.14452078746902E1 0.14412717795042E1 - 0.14373398292604E1 0.14334120213025E1 0.14294883529717E1 0.14255688216074E1 0.14216534245586E1 - 0.14177421591749E1 0.14138350227989E1 0.14099320127612E1 0.140603312639E1 0.14021383610339E1 - 0.13982477140395E1 0.13943611827514E1 0.13904787645081E1 0.13866004566479E1 0.13827262565138E1 - 0.13788561614504E1 0.13749901688081E1 0.1371128275912E1 0.13672704800908E1 0.13634167786806E1 - 0.1359567169029E1 0.13557216484801E1 0.13518802143712E1 0.13480428640396E1 0.13442095948229E1 - 0.13403804040641E1 0.13365552891098E1 0.13327342472922E1 0.13289172759395E1 0.13251043723762E1 - 0.13212955339505E1 0.13174907580035E1 0.1313690041874E1 0.13098933828974E1 0.1306100778408E1 - 0.1302312225747E1 0.12985277222568E1 0.12947472652782E1 0.12909708521353E1 0.12871984801524E1 - 0.12834301466681E1 0.12796658490241E1 0.12759055845583E1 0.12721493506103E1 0.12683971445232E1 - 0.12646489636321E1 0.12609048052615E1 0.12571646667331E1 0.12534285453907E1 0.12496964385749E1 - 0.12459683436243E1 0.12422442578714E1 0.12385241786489E1 0.12348081032947E1 0.12310960291486E1 - 0.12273879535538E1 0.12236838738328E1 0.12199837873112E1 0.12162876913221E1 0.12125955832073E1 - 0.12089074603065E1 0.12052233199526E1 0.12015431594785E1 0.11978669762183E1 0.11941947675125E1 - 0.11905265307052E1 0.11868622631249E1 0.11832019620969E1 0.1179545624945E1 0.1175893249014E1 - 0.11722448316425E1 0.11686003701667E1 0.11649598619201E1 0.11613233042348E1 0.11576906944508E1 - 0.11540620299089E1 0.11504373079474E1 0.11468165258903E1 0.11431996810614E1 0.11395867707996E1 - 0.11359777924458E1 0.11323727433396E1 0.11287716208136E1 0.11251744222013E1 0.11215811448398E1 - 0.11179917860693E1 0.11144063432359E1 0.11108248136639E1 0.11072471946802E1 0.11036734836153E1 - 0.11001036778133E1 0.10965377746122E1 0.10929757713515E1 0.1089417665372E1 0.10858634540179E1 - 0.10823131346135E1 0.10787667044854E1 0.107522416097E1 0.10716855014106E1 0.10681507231485E1 - 0.10646198235197E1 0.10610927998607E1 0.10575696495094E1 0.1054050369808E1 0.10505349581033E1 - 0.1047023411726E1 0.10435157280045E1 0.10400119042671E1 0.10365119378627E1 0.10330158261349E1 - 0.10295235664234E1 0.1026035156066E1 0.10225505923997E1 0.1019069872768E1 0.10155929945154E1 - 0.10121199549832E1 0.10086507515022E1 0.10051853814027E1 0.10017238420292E1 0.9982661307263E0 - 0.99481224483789E0 0.99136218170387E0 0.98791593866443E0 0.98447351306299E0 0.98103490224493E0 - 0.97760010356233E0 0.97416911434473E0 0.97074193192532E0 0.96731855364238E0 0.96389897684715E0 - 0.96048319888773E0 0.95707121710676E0 0.95366302884699E0 0.9502586314516E0 0.94685802226843E0 - 0.9434611986483E0 0.94006815793265E0 0.93667889745847E0 0.93329341455869E0 0.92991170658894E0 - 0.9265337708982E0 0.92315960483521E0 0.91978920574797E0 0.91642257099069E0 0.91305969790297E0 - 0.90970058382416E0 0.90634522609423E0 0.90299362206944E0 0.89964576910209E0 0.89630166454099E0 - 0.89296130573357E0 0.88962469002593E0 0.8862918147731E0 0.88296267733113E0 0.87963727505097E0 - 0.8763156052729E0 0.87299766533589E0 0.86968345259722E0 0.86637296441344E0 0.86306619814001E0 - 0.85976315112655E0 0.8564638207234E0 0.85316820428561E0 0.84987629917038E0 0.84658810274041E0 - 0.84330361233777E0 0.84002282530852E0 0.83674573900441E0 0.83347235078698E0 0.83020265801514E0 - 0.82693665804443E0 0.82367434823085E0 0.82041572593103E0 0.81716078850482E0 0.81390953331545E0 - 0.81066195771692E0 0.80741805906034E0 0.80417783469447E0 0.80094128198636E0 0.79770839829839E0 - 0.79447918099175E0 0.79125362742561E0 0.78803173495889E0 0.78481350095595E0 0.78159892278269E0 - 0.77838799780484E0 0.77518072337488E0 0.77197709684619E0 0.76877711558396E0 0.76558077695671E0 - 0.76238807833073E0 0.75919901707286E0 0.75601359055368E0 0.75283179613832E0 0.74965363118438E0 - 0.74647909304838E0 0.74330817910239E0 0.74014088671664E0 0.73697721326074E0 0.73381715610276E0 - 0.73066071261142E0 0.72750788015731E0 0.72435865611226E0 0.72121303785178E0 0.71807102273776E0 - 0.71493260813427E0 0.7117977914115E0 0.70866656994692E0 0.70553894111727E0 0.70241490229358E0 - 0.69929445084756E0 0.69617758415226E0 0.69306429958734E0 0.68995459453566E0 0.6868484663679E0 - 0.68374591245254E0 0.68064693015722E0 0.67755151686633E0 0.67445966995981E0 0.67137138681689E0 - 0.66828666481607E0 0.66520550133548E0 0.66212789375812E0 0.65905383946877E0 0.65598333585099E0 - 0.65291638027659E0 0.64985297011753E0 0.64679310275945E0 0.64373677559048E0 0.64068398599833E0 - 0.63763473136451E0 0.63458900907191E0 0.6315468165069E0 0.62850815105903E0 0.62547301012311E0 - 0.62244139107683E0 0.6194132913003E0 0.6163887081768E0 0.61336763910161E0 0.6103500814657E0 - 0.60733603266117E0 0.60432549008161E0 0.60131845112417E0 0.59831491317056E0 0.59531487360517E0 - 0.59231832982E0 0.58932527921351E0 0.58633571918291E0 0.58334964712188E0 0.58036706042512E0 - 0.57738795648864E0 0.57441233271176E0 0.57144018649765E0 0.56847151523876E0 0.56550631632663E0 - 0.56254458715254E0 0.55958632512339E0 0.55663152764292E0 0.5536801921123E0 0.55073231593105E0 - 0.54778789649759E0 0.54484693121988E0 0.54190941750685E0 0.53897535276475E0 0.53604473438859E0 - 0.53311755977379E0 0.53019382632987E0 0.52727353146748E0 0.52435667259669E0 0.52144324712399E0 - 0.51853325245698E0 0.51562668600608E0 0.51272354518373E0 0.5098238274077E0 0.50692753008002E0 - 0.50403465060572E0 0.50114518639368E0 0.49825913486283E0 0.49537649342989E0 0.49249725950927E0 - 0.48962143051579E0 0.48674900386488E0 0.48387997697616E0 0.48101434727214E0 0.47815211216841E0 - 0.47529326907695E0 0.47243781540792E0 0.46958574858768E0 0.46673706603805E0 0.46389176518204E0 - 0.46104984344505E0 0.45821129825788E0 0.45537612703744E0 0.45254432720071E0 0.44971589616566E0 - 0.44689083136476E0 0.44406913022707E0 0.44125079018024E0 0.43843580865157E0 0.43562418306784E0 - 0.43281591086237E0 0.43001098947022E0 0.42720941632298E0 0.4244111888433E0 0.42161630445317E0 - 0.41882476059054E0 0.41603655469351E0 0.4132516841997E0 0.41047014654225E0 0.4076919391559E0 - 0.40491705947856E0 0.40214550495002E0 0.39937727301473E0 0.39661236110323E0 0.39385076664927E0 - 0.39109248709096E0 0.38833751987442E0 0.38558586244466E0 0.38283751224361E0 0.38009246671387E0 - 0.37735072329895E0 0.37461227944735E0 0.37187713261055E0 0.369145280232E0 0.36641671975259E0 - 0.36369144861136E0 0.36096946426458E0 0.35825076416467E0 0.35553534576345E0 0.3528232065111E0 - 0.35011434385827E0 0.3474087552603E0 0.34470643817415E0 0.3420073900574E0 0.33931160835754E0 - 0.33661909052336E0 0.3339298340129E0 0.33124383628756E0 0.32856109480712E0 0.32588160703313E0 - 0.32320537043049E0 0.32053238246005E0 0.31786264057599E0 0.31519614223162E0 0.31253288489558E0 - 0.30987286603573E0 0.30721608311964E0 0.30456253361103E0 0.30191221497464E0 0.29926512467976E0 - 0.29662126019783E0 0.29398061900437E0 0.29134319856036E0 0.28870899633006E0 0.28607800978339E0 - 0.28345023639762E0 0.280825673649E0 0.27820431901089E0 0.27558616995773E0 0.27297122396513E0 - 0.2703594785124E0 0.26775093108203E0 0.26514557914893E0 0.2625434201867E0 0.25994445166824E0 - 0.2573486710805E0 0.25475607590743E0 0.25216666363257E0 0.24958043173887E0 0.24699737770963E0 - 0.24441749903243E0 0.24184079319652E0 0.23926725769104E0 0.23669688999694E0 0.23412968759614E0 - 0.23156564797996E0 0.22900476864205E0 0.22644704707596E0 0.22389248077321E0 0.22134106722659E0 - 0.21879280393087E0 0.21624768838288E0 0.21370571808376E0 0.21116689052381E0 0.20863120319511E0 - 0.20609865359228E0 0.20356923921983E0 0.20104295757942E0 0.19851980617389E0 0.19599978250762E0 - 0.19348288408859E0 0.19096910841255E0 0.18845845297782E0 0.18595091528925E0 0.18344649285786E0 - 0.18094518319426E0 0.17844698380558E0 0.17595189220029E0 0.17345990588841E0 0.17097102238382E0 - 0.16848523920381E0 0.16600255385714E0 0.163522963852E0 0.16104646669656E0 0.15857305991265E0 - 0.15610274101944E0 0.15363550753514E0 0.15117135697735E0 0.14871028686386E0 0.1462522947179E0 - 0.1437973780643E0 0.14134553442683E0 0.13889676132176E0 0.13645105626588E0 0.13400841678685E0 - 0.13156884041397E0 0.12913232467665E0 0.12669886710131E0 0.12426846521575E0 0.12184111655066E0 - 0.11941681863911E0 0.11699556901877E0 0.114577365215E0 0.11216220475583E0 0.10975008517261E0 - 0.1073410040057E0 0.10493495879418E0 0.10253194707503E0 0.10013196638589E0 0.97735014265197E-1 - 0.9534108825608E-1 0.9295018590413E-1 0.90562304749816E-1 0.88177442330717E-1 0.85795596183148E-1 - 0.83416763857824E-1 0.81040942902465E-1 0.786681308656E-1 0.7629832529692E-1 0.73931523750827E-1 - 0.71567723772534E-1 0.69206922907447E-1 0.66849118701805E-1 0.64494308714245E-1 0.62142490501237E-1 - 0.59793661618037E-1 0.57447819619658E-1 0.55104962061322E-1 0.52765086503818E-1 0.50428190509455E-1 - 0.48094271638857E-1 0.45763327446578E-1 0.43435355487397E-1 0.4111035332727E-1 0.38788318532958E-1 - 0.36469248671398E-1 0.34153141307082E-1 0.31839994005777E-1 0.29529804336391E-1 0.27222569869861E-1 - 0.24918288181762E-1 0.2261695683551E-1 0.20318573397731E-1 0.18023135438988E-1 0.15730640537578E-1 - 0.13441086271038E-1 0.1115447021459E-1 0.88707899443598E-2 0.65900430375683E-2 0.43122270758305E-2 - 0.20373396434173E-2 -0.23462168099993E-3 -0.25036593205453E-2 -0.47697756996174E-2 -0.70329732283716E-2 --0.92932543196477E-2 -0.11550621386308E-1 -0.13805076842261E-1 -0.16056623100746E-1 -0.18305262570498E-1 --0.20550997658506E-1 -0.22793830770661E-1 -0.2503376432134E-1 -0.27270800723351E-1 -0.2950494238152E-1 --0.31736191697175E-1 -0.33964551072488E-1 -0.36190022908638E-1 -0.38412609603707E-1 -0.40632313558076E-1 --0.42849137176896E-1 -0.45063082865404E-1 -0.47274153017314E-1 -0.49482350026254E-1 -0.51687676285619E-1 --0.53890134190969E-1 -0.56089726136707E-1 -0.58286454513624E-1 -0.60480321710472E-1 -0.62671330112091E-1 --0.64859482114608E-1 -0.67044780111156E-1 -0.69227226490132E-1 -0.71406823633248E-1 -0.73583573922637E-1 --0.75757479742749E-1 -0.7792854347685E-1 -0.80096767506867E-1 -0.8226215421095E-1 -0.84424705964415E-1 --0.86584425148315E-1 -0.8874131414463E-1 -0.90895375335807E-1 -0.93046611091783E-1 -0.95195023784629E-1 --0.97340615786579E-1 -0.99483389470458E-1 -0.10162334720852E0 -0.10376049136832E0 -0.10589482431574E0 --0.10802634841625E0 -0.11015506604253E0 -0.11228097956597E0 -0.11440409134959E0 -0.11652440375369E0 --0.11864191913816E0 -0.1207566398655E0 -0.12286856829658E0 -0.12497770678989E0 -0.12708405770123E0 --0.12918762338253E0 -0.13128840619492E0 -0.13338640849775E0 -0.13548163264804E0 -0.13757408099382E0 --0.13966375588521E0 -0.14175065967116E0 -0.14383479469914E0 -0.14591616331313E0 -0.14799476786652E0 --0.15007061071019E0 -0.15214369418977E0 -0.15421402064546E0 -0.15628159241742E0 -0.15834641184815E0 --0.16040848127876E0 -0.1624678030488E0 -0.16452437949417E0 -0.16657821294777E0 -0.16862930574875E0 --0.17067766023652E0 -0.17272327875027E0 -0.17476616361782E0 -0.17680631716874E0 -0.17884374173294E0 --0.18087843964059E0 -0.18291041322131E0 -0.1849396647999E0 -0.18696619669953E0 -0.18899001124365E0 --0.19101111076165E0 -0.19302949758206E0 -0.19504517402445E0 -0.19705814240648E0 -0.1990684050453E0 --0.20107596426042E0 -0.20308082236978E0 -0.20508298168865E0 -0.20708244452992E0 -0.20907921320252E0 --0.21107329002468E0 -0.21306467731232E0 -0.21505337737847E0 -0.21703939252828E0 -0.21902272506753E0 --0.22100337730359E0 -0.22298135154304E0 -0.22495665009158E0 -0.22692927525017E0 -0.2288992293176E0 --0.23086651459639E0 -0.23283113339135E0 -0.23479308800794E0 -0.23675238073946E0 -0.23870901388103E0 - -0.240662989727E0 -0.24261431057106E0 -0.24456297870301E0 -0.24650899641825E0 -0.24845236601178E0 --0.25039308977772E0 -0.25233117000081E0 -0.25426660896698E0 -0.25619940896241E0 -0.25812957227304E0 --0.26005710118424E0 -0.26198199797695E0 -0.2639042649305E0 -0.2658239043249E0 -0.26774091844463E0 --0.2696553095735E0 -0.27156707998657E0 -0.27347623195765E0 -0.27538276775998E0 -0.27728668966878E0 --0.27918799995793E0 -0.28108670089813E0 -0.28298279475778E0 -0.28487628380121E0 -0.28676717030262E0 --0.2886554565333E0 -0.29054114476113E0 -0.29242423724707E0 -0.29430473625246E0 -0.29618264404011E0 --0.29805796287185E0 -0.29993069500829E0 -0.30180084270615E0 -0.30366840821981E0 -0.30553339380737E0 --0.30739580172812E0 -0.30925563424171E0 -0.31111289359731E0 -0.31296758204577E0 -0.31481970183709E0 --0.31666925522075E0 -0.31851624444515E0 -0.32036067175661E0 -0.3222025393997E0 -0.32404184961751E0 --0.32587860465832E0 -0.32771280676909E0 -0.32954445819063E0 -0.33137356116057E0 -0.3332001179166E0 --0.3350241306959E0 -0.33684560173288E0 -0.33866453326294E0 -0.34048092752455E0 -0.34229478675593E0 --0.34410611318643E0 -0.34591490904473E0 -0.34772117655895E0 -0.34952491795879E0 -0.35132613547272E0 --0.35312483132584E0 -0.35492100774122E0 -0.35671466693844E0 -0.35850581114559E0 -0.36029444258791E0 --0.36208056348697E0 -0.36386417605887E0 -0.36564528251971E0 -0.36742388508665E0 -0.36919998597564E0 --0.37097358740133E0 -0.37274469157493E0 -0.3745133007051E0 -0.37627941700458E0 -0.37804304268665E0 --0.37980417996474E0 -0.38156283104195E0 -0.38331899812279E0 -0.3850726834114E0 -0.38682388911195E0 --0.38857261742762E0 -0.39031887055825E0 -0.39206265070197E0 -0.39380396005599E0 -0.39554280082266E0 --0.39727917520313E0 -0.39901308539158E0 -0.40074453357954E0 -0.40247352195765E0 -0.40420005271907E0 --0.40592412805525E0 -0.4076457501552E0 -0.40936492120512E0 -0.41108164338791E0 -0.41279591889354E0 --0.41450774991035E0 -0.4162171386246E0 -0.41792408721485E0 -0.41962859786102E0 -0.42133067274176E0 --0.42303031403427E0 -0.42472752391234E0 -0.42642230455739E0 -0.42811465814829E0 -0.42980458685967E0 --0.43149209286131E0 -0.43317717832303E0 -0.43485984541519E0 -0.43654009630678E0 -0.43821793316541E0 --0.43989335815651E0 -0.4415663734427E0 -0.44323698119036E0 -0.44490518356577E0 -0.44657098273503E0 --0.44823438085459E0 -0.44989538008195E0 -0.45155398257471E0 -0.45321019049058E0 -0.4548640059865E0 --0.45651543121504E0 -0.45816446832708E0 -0.45981111947332E0 -0.46145538680945E0 -0.46309727249028E0 --0.46473677866241E0 -0.46637390747024E0 -0.46800866105737E0 -0.46964104157015E0 -0.47127105115293E0 --0.47289869194773E0 -0.47452396609431E0 -0.47614687572931E0 -0.4777674229954E0 -0.47938561003328E0 --0.48100143898134E0 -0.48261491197193E0 -0.48422603113786E0 -0.48583479861149E0 -0.48744121652408E0 --0.4890452870057E0 -0.49064701218447E0 -0.49224639418612E0 -0.49384343513809E0 -0.49543813716889E0 --0.49703050240698E0 -0.49862053297252E0 -0.5002082309866E0 -0.50179359856898E0 -0.50337663783688E0 --0.5049573509037E0 -0.50653573988982E0 -0.50811180691539E0 -0.50968555409981E0 -0.51125698355135E0 --0.51282609737984E0 -0.51439289769549E0 -0.51595738660845E0 -0.51751956622761E0 -0.51907943865889E0 --0.52063700600669E0 -0.52219227037515E0 -0.52374523387048E0 -0.52529589859773E0 -0.52684426665702E0 --0.52839034014719E0 -0.52993412116638E0 -0.5314756118116E0 -0.53301481417895E0 -0.53455173036259E0 --0.53608636245505E0 -0.53761871254524E0 -0.53914878272879E0 -0.54067657509914E0 -0.5422020917467E0 --0.54372533475549E0 -0.54524630620941E0 -0.5467650081941E0 -0.54828144279418E0 -0.54979561209304E0 --0.55130751816991E0 -0.55281716310196E0 -0.55432454896934E0 -0.55582967785325E0 -0.55733255183527E0 --0.55883317298666E0 -0.56033154337998E0 -0.56182766508704E0 -0.56332154017943E0 -0.56481317072694E0 --0.56630255879852E0 -0.56778970646152E0 -0.56927461578176E0 -0.57075728882722E0 -0.57223772766484E0 --0.57371593435716E0 -0.575191910964E0 -0.57666565954508E0 -0.57813718215848E0 -0.5796064808599E0 --0.58107355770571E0 -0.58253841475529E0 -0.58400105406752E0 -0.585461477693E0 -0.58691968768146E0 --0.5883756860819E0 -0.58982947494454E0 -0.59128105631773E0 -0.59273043224799E0 -0.59417760478037E0 --0.59562257595735E0 -0.59706534782488E0 -0.59850592242703E0 -0.59994430180528E0 -0.60138048799715E0 --0.60281448303944E0 -0.60424628896975E0 -0.60567590782464E0 -0.60710334163943E0 -0.60852859244462E0 --0.60995166226831E0 -0.61137255314323E0 -0.61279126710308E0 -0.61420780618213E0 -0.61562217240216E0 --0.61703436778663E0 -0.61844439435913E0 -0.61985225414437E0 -0.62125794916535E0 -0.62266148144212E0 --0.62406285299324E0 -0.62546206583621E0 -0.62685912199071E0 -0.62825402347504E0 -0.62964677230358E0 --0.63103737048863E0 -0.63242582004182E0 -0.63381212297407E0 -0.63519628129521E0 -0.63657829701333E0 --0.63795817213441E0 -0.63933590866123E0 -0.64071150860204E0 -0.6420849739637E0 -0.64345630675123E0 - -0.644825508962E0 -0.64619258259406E0 -0.64755752964561E0 -0.64892035211384E0 -0.65028105199332E0 --0.6516396312815E0 -0.65299609197418E0 -0.6543504360642E0 -0.65570266554086E0 -0.65705278239256E0 --0.65840078860875E0 -0.6597466861776E0 -0.66109047708577E0 -0.66243216331632E0 -0.6637717468503E0 --0.66510922967164E0 -0.66644461376403E0 -0.66777790111022E0 -0.66910909368698E0 -0.67043819347168E0 --0.67176520244009E0 -0.67309012256635E0 -0.67441295582337E0 -0.6757337041834E0 -0.67705236961679E0 --0.67836895409292E0 -0.6796834595835E0 -0.68099588805915E0 -0.68230624148538E0 -0.68361452182577E0 --0.6849207310431E0 -0.68622487110034E0 -0.6875269439591E0 -0.68882695157904E0 -0.69012489591776E0 --0.69142077892987E0 -0.69271460257505E0 -0.69400636881125E0 -0.69529607959426E0 -0.6965837368739E0 --0.69786934259999E0 -0.69915289872279E0 -0.70043440719155E0 -0.70171386995437E0 -0.70299128895648E0 --0.70426666614122E0 -0.70554000345313E0 -0.70681130283773E0 -0.7080805662406E0 -0.7093477955989E0 --0.71061299285018E0 -0.71187615993109E0 -0.7131372987775E0 -0.7143964113219E0 -0.71565349950056E0 --0.71690856524929E0 -0.71816161050318E0 -0.71941263718907E0 -0.72066164723456E0 -0.72190864256695E0 --0.7231536251131E0 -0.72439659679853E0 -0.72563755954655E0 -0.72687651527892E0 -0.72811346591677E0 --0.72934841338266E0 -0.73058135959803E0 -0.73181230647995E0 -0.73304125594413E0 -0.73426820990546E0 --0.7354931702778E0 -0.73671613897369E0 -0.73793711790446E0 -0.73915610898026E0 -0.74037311410898E0 --0.74158813520022E0 -0.74280117416196E0 -0.7440122329004E0 -0.74522131331861E0 -0.7464284173191E0 --0.74763354680348E0 -0.74883670367224E0 -0.75003788982482E0 -0.75123710715784E0 -0.75243435756591E0 --0.75362964294526E0 -0.75482296519269E0 -0.75601432620493E0 -0.75720372787078E0 -0.75839117207935E0 --0.75957666071913E0 -0.76076019567912E0 -0.761941778847E0 -0.76312141210748E0 -0.76429909734382E0 --0.76547483643798E0 -0.76664863127337E0 -0.76782048373154E0 -0.76899039569206E0 -0.77015836903284E0 --0.77132440563192E0 -0.77248850736284E0 -0.77365067609791E0 -0.77481091370962E0 -0.77596922207284E0 --0.7771256030613E0 -0.77828005854306E0 -0.77943259038508E0 -0.7805832004535E0 -0.78173189061428E0 --0.78287866273231E0 -0.78402351867005E0 -0.78516646028821E0 -0.78630748944421E0 -0.78744660800152E0 --0.78858381782095E0 -0.78971912076068E0 -0.79085251867455E0 -0.79198401341652E0 -0.79311360683937E0 --0.79424130079464E0 -0.79536709713257E0 -0.79649099770253E0 -0.79761300435141E0 -0.79873311892739E0 --0.7998513432785E0 -0.80096767925289E0 -0.80208212869088E0 -0.8031946934333E0 -0.80430537532042E0 --0.80541417619263E0 -0.80652109788919E0 -0.80762614224618E0 -0.80872931109825E0 -0.80983060627881E0 --0.81093002962386E0 -0.81202758296779E0 -0.81312326814133E0 -0.81421708697313E0 -0.81530904129084E0 --0.81639913292229E0 -0.8174873636941E0 -0.81857373543088E0 -0.8196582499547E0 -0.82074090908521E0 --0.82182171464585E0 -0.82290066845861E0 -0.82397777234387E0 -0.82505302811731E0 -0.82612643759506E0 --0.82719800259118E0 -0.82826772491815E0 -0.82933560638564E0 -0.83040164880838E0 -0.83146585399892E0 --0.83252822376662E0 -0.83358875991667E0 -0.83464746425331E0 -0.83570433858273E0 -0.83675938470978E0 --0.83781260443776E0 -0.83886399956625E0 -0.83991357189334E0 -0.84096132321885E0 -0.84200725534222E0 --0.84305137006192E0 -0.84409366917163E0 -0.84513415446513E0 -0.84617282773475E0 -0.84720969077129E0 --0.84824474536374E0 -0.84927799330197E0 -0.85030943637431E0 -0.8513390763677E0 -0.85236691506765E0 --0.85339295425894E0 -0.85441719572411E0 -0.85543964124423E0 -0.85646029259912E0 -0.85747915156846E0 --0.85849621993086E0 -0.85951149946275E0 -0.86052499193823E0 -0.86153669912879E0 -0.8625466228103E0 --0.8635547647571E0 -0.86456112674153E0 -0.86556571053019E0 -0.86656851788926E0 -0.86756955058617E0 --0.8685688103877E0 -0.86956629905972E0 -0.87056201836326E0 -0.87155597005826E0 -0.87254815590559E0 --0.87353857766703E0 -0.87452723710354E0 -0.87551413597054E0 -0.87649927602325E0 -0.8774826590158E0 --0.87846428670089E0 -0.87944416082886E0 -0.88042228315187E0 -0.88139865542136E0 -0.88237327938761E0 --0.88334615679671E0 -0.88431728939477E0 -0.88528667892629E0 -0.88625432713427E0 -0.88722023576058E0 --0.88818440654605E0 -0.88914684122985E0 -0.89010754155029E0 -0.89106650924647E0 -0.89202374605621E0 --0.8929792537142E0 -0.89393303395374E0 -0.8948850885072E0 -0.89583541910595E0 -0.89678402748042E0 --0.89773091535909E0 -0.89867608446861E0 -0.89961953653286E0 -0.90056127328012E0 -0.90150129643687E0 --0.90243960772733E0 -0.90337620887068E0 -0.9043111015859E0 -0.90524428759226E0 -0.90617576860795E0 --0.90710554634978E0 -0.90803362253286E0 -0.90895999887066E0 -0.9098846770767E0 -0.91080765886421E0 --0.9117289459461E0 -0.91264854002989E0 -0.9135664428227E0 -0.91448265603096E0 -0.91539718136133E0 --0.91631002051937E0 -0.91722117520758E0 -0.91813064712688E0 -0.91903843797671E0 -0.91994454945935E0 --0.92084898327542E0 -0.9217517411222E0 -0.9226528246945E0 -0.92355223568697E0 -0.92444997579225E0 --0.92534604670127E0 -0.92624045010471E0 -0.92713318769499E0 -0.92802426116343E0 -0.92891367219656E0 --0.92980142247966E0 -0.93068751369708E0 -0.93157194753343E0 -0.93245472567211E0 -0.93333584979415E0 --0.93421532157874E0 -0.93509314270267E0 -0.93596931484637E0 -0.93684383968809E0 -0.93771671890414E0 --0.93858795416778E0 -0.93945754715193E0 -0.94032549952813E0 -0.94119181296666E0 -0.94205648913659E0 --0.94291952970618E0 -0.94378093634195E0 -0.9446407107101E0 -0.9454988544761E0 -0.94635536930463E0 --0.94721025685701E0 -0.94806351879402E0 -0.94891515677531E0 -0.94976517245934E0 -0.95061356750357E0 --0.9514603435637E0 -0.95230550229405E0 -0.95314904534763E0 -0.95399097437856E0 -0.95483129103947E0 --0.95566999698052E0 -0.95650709385017E0 -0.95734258329591E0 -0.95817646696448E0 -0.95900874650161E0 --0.95983942355135E0 -0.96066849975561E0 -0.96149597675434E0 -0.96232185618961E0 -0.96314613970226E0 --0.96396882893173E0 -0.96478992551362E0 -0.96560943108318E0 -0.96642734727481E0 -0.96724367572181E0 --0.96805841805558E0 -0.9688715759084E0 -0.9696831509113E0 -0.97049314469312E0 -0.97130155887973E0 --0.97210839509574E0 -0.97291365496707E0 -0.97371734011838E0 -0.97451945217293E0 -0.97531999275095E0 --0.97611896347119E0 -0.97691636595357E0 -0.97771220181757E0 -0.97850647268225E0 -0.97929918016075E0 --0.98009032586599E0 -0.98087991141077E0 -0.98166793840819E0 -0.98245440847086E0 -0.98323932320629E0 --0.98402268422074E0 -0.98480449311969E0 -0.98558475151235E0 -0.98636346100613E0 -0.98714062320418E0 --0.98791623970777E0 -0.98869031211748E0 -0.98946284203328E0 -0.99023383105372E0 -0.99100328077624E0 --0.99177119279711E0 -0.99253756871095E0 -0.99330241011249E0 -0.99406571859508E0 -0.99482749575083E0 --0.99558774317034E0 -0.99634646244355E0 -0.99710365515826E0 -0.99785932290094E0 -0.99861346725712E0 --0.99936608981091E0 -0.10001171921445E1 -0.10008667758405E1 -0.1001614842482E1 -0.10023613936517E1 --0.10031064309262E1 -0.10038499558817E1 -0.10045919700937E1 -0.10053324751373E1 -0.10060714725848E1 --0.10068089640104E1 -0.1007544950988E1 -0.1008279435091E1 -0.10090124178854E1 -0.10097439009375E1 --0.10104738858139E1 -0.10112023740817E1 -0.1011929367307E1 -0.10126548670518E1 -0.10133788748772E1 --0.10141013923432E1 -0.10148224210115E1 -0.10155419624417E1 -0.10162600181913E1 -0.10169765898161E1 --0.10176916788712E1 -0.10184052869108E1 -0.10191174154887E1 -0.1019828066156E1 -0.10205372404618E1 --0.10212449399531E1 -0.10219511661801E1 -0.10226559206912E1 -0.10233592050329E1 -0.10240610207494E1 --0.10247613693849E1 -0.10254602524807E1 -0.10261576715769E1 -0.10268536282121E1 -0.10275481239263E1 --0.10282411602571E1 -0.10289327387415E1 -0.10296228609154E1 -0.10303115283147E1 -0.10309987424708E1 --0.10316845049146E1 -0.10323688171759E1 -0.10330516807853E1 -0.1033733097272E1 -0.10344130681623E1 --0.10350915949809E1 -0.1035768679251E1 -0.10364443224988E1 -0.10371185262486E1 -0.10377912920219E1 --0.10384626213374E1 -0.10391325157129E1 -0.10398009766669E1 -0.10404680057166E1 -0.10411336043776E1 --0.1041797774164E1 -0.10424605165889E1 -0.10431218331645E1 -0.10437817254018E1 -0.10444401948109E1 --0.10450972428998E1 -0.10457528711757E1 -0.10464070811448E1 -0.10470598743127E1 -0.10477112521838E1 --0.10483612162604E1 -0.10490097680436E1 -0.10496569090338E1 -0.10503026407312E1 -0.10509469646347E1 --0.10515898822414E1 -0.10522313950471E1 -0.10528715045472E1 -0.10535102122335E1 -0.10541475195961E1 --0.1054783428127E1 -0.10554179393186E1 -0.10560510546631E1 -0.10566827756455E1 -0.10573131037509E1 --0.1057942040464E1 -0.1058569587271E1 -0.10591957456567E1 -0.10598205171024E1 -0.10604439030881E1 --0.10610659050929E1 -0.10616865245966E1 -0.10623057630773E1 -0.10629236220115E1 -0.1063540102874E1 --0.10641552071393E1 -0.10647689362797E1 -0.10653812917661E1 -0.10659922750689E1 -0.10666018876584E1 --0.10672101310027E1 -0.10678170065699E1 -0.10684225158269E1 -0.10690266602396E1 -0.10696294412705E1 --0.10702308603811E1 -0.10708309190331E1 -0.10714296186867E1 -0.10720269608005E1 -0.10726229468342E1 --0.10732175782459E1 -0.10738108564921E1 -0.10744027830262E1 -0.10749933593011E1 -0.10755825867693E1 --0.1076170466882E1 -0.10767570010892E1 -0.107734219084E1 -0.10779260375821E1 -0.10785085427627E1 --0.1079089707828E1 -0.10796695342231E1 -0.10802480233911E1 -0.10808251767744E1 -0.10814009958141E1 - -0.108197548195E1 -0.10825486366211E1 -0.10831204612646E1 -0.10836909573164E1 -0.10842601262115E1 --0.10848279693854E1 -0.10853944882717E1 -0.10859596843031E1 -0.10865235589106E1 -0.10870861135253E1 --0.10876473495745E1 -0.10882072684849E1 -0.10887658716828E1 -0.10893231605938E1 -0.10898791366417E1 --0.10904338012502E1 -0.10909871558419E1 -0.10915392018381E1 -0.10920899406585E1 -0.10926393737223E1 --0.10931875024465E1 -0.10937343282466E1 -0.10942798525365E1 -0.10948240767319E1 -0.10953670022466E1 --0.10959086304928E1 -0.10964489628806E1 -0.10969880008194E1 -0.10975257457181E1 -0.10980621989843E1 --0.10985973620246E1 -0.10991312362436E1 -0.10996638230447E1 -0.11001951238313E1 -0.11007251400063E1 --0.11012538729713E1 -0.11017813241251E1 -0.11023074948661E1 -0.11028323865915E1 -0.11033560006979E1 --0.11038783385814E1 -0.1104399401635E1 -0.11049191912507E1 -0.11054377088197E1 -0.11059549557343E1 --0.11064709333858E1 -0.11069856431621E1 -0.11074990864498E1 -0.11080112646344E1 -0.11085221791023E1 --0.11090318312385E1 -0.11095402224259E1 -0.11100473540454E1 -0.11105532274754E1 -0.11110578440979E1 --0.11115612052934E1 -0.11120633124406E1 -0.11125641669137E1 -0.11130637700868E1 -0.11135621233342E1 --0.11140592280299E1 -0.11145550855464E1 -0.11150496972539E1 -0.11155430645215E1 -0.11160351887179E1 --0.11165260712114E1 -0.11170157133689E1 -0.11175041165562E1 -0.1117991282138E1 -0.1118477211478E1 --0.11189619059375E1 -0.11194453668772E1 -0.11199275956567E1 -0.11204085936343E1 -0.11208883621665E1 --0.11213669026119E1 -0.11218442163272E1 -0.11223203046672E1 -0.11227951689844E1 -0.11232688106304E1 --0.11237412309568E1 -0.11242124313137E1 -0.11246824130502E1 -0.11251511775143E1 -0.11256187260535E1 --0.11260850600131E1 -0.1126550180737E1 -0.11270140895681E1 -0.11274767878502E1 -0.11279382769263E1 --0.11283985581366E1 -0.11288576328186E1 -0.11293155023084E1 -0.11297721679449E1 -0.11302276310654E1 --0.11306818930055E1 -0.11311349550977E1 -0.11315868186736E1 -0.11320374850654E1 -0.11324869556046E1 --0.11329352316215E1 -0.11333823144436E1 -0.1133828205398E1 -0.11342729058109E1 -0.11347164170074E1 --0.1135158740311E1 -0.11355998770458E1 -0.11360398285347E1 -0.11364785960991E1 -0.11369161810574E1 --0.11373525847274E1 -0.1137787808427E1 -0.11382218534725E1 -0.11386547211792E1 -0.11390864128627E1 --0.11395169298374E1 -0.11399462734156E1 -0.11403744449076E1 -0.11408014456223E1 -0.11412272768698E1 --0.11416519399585E1 -0.11420754361954E1 -0.11424977668858E1 -0.11429189333335E1 -0.11433389368431E1 --0.11437577787182E1 -0.11441754602611E1 -0.11445919827715E1 -0.11450073475485E1 -0.11454215558904E1 --0.11458346090943E1 -0.11462465084561E1 -0.11466572552715E1 -0.11470668508354E1 -0.1147475296441E1 --0.11478825933795E1 -0.11482887429412E1 -0.11486937464168E1 -0.11490976050961E1 -0.11495003202678E1 --0.11499018932169E1 -0.11503023252272E1 -0.11507016175838E1 -0.11510997715717E1 -0.1151496788475E1 --0.1151892669574E1 -0.11522874161491E1 -0.11526810294792E1 -0.11530735108422E1 -0.11534648615137E1 --0.11538550827712E1 -0.11542441758908E1 -0.11546321421477E1 -0.11550189828134E1 -0.1155404699159E1 --0.11557892924556E1 -0.11561727639739E1 -0.11565551149841E1 -0.11569363467523E1 -0.11573164605442E1 --0.11576954576251E1 -0.11580733392608E1 -0.11584501067156E1 -0.11588257612518E1 -0.11592003041303E1 --0.11595737366115E1 -0.11599460599548E1 -0.11603172754185E1 -0.116068738426E1 -0.11610563877358E1 --0.11614242871016E1 -0.11617910836109E1 -0.11621567785167E1 -0.11625213730708E1 -0.11628848685246E1 --0.11632472661281E1 -0.11636085671302E1 -0.11639687727784E1 -0.11643278843193E1 -0.11646859029992E1 --0.1165042830063E1 -0.11653986667546E1 -0.11657534143164E1 -0.11661070739899E1 -0.11664596470163E1 --0.1166811134636E1 -0.11671615380876E1 -0.11675108586074E1 -0.11678590974299E1 -0.11682062557923E1 --0.11685523349302E1 -0.11688973360781E1 -0.11692412604668E1 -0.11695841093268E1 -0.11699258838884E1 --0.11702665853814E1 -0.11706062150341E1 -0.11709447740736E1 -0.1171282263726E1 -0.11716186852165E1 --0.11719540397692E1 -0.11722883286071E1 -0.11726215529526E1 -0.1172953714027E1 -0.11732848130507E1 --0.11736148512426E1 -0.11739438298216E1 -0.11742717500038E1 -0.11745986130039E1 -0.11749244200349E1 --0.1175249172313E1 -0.11755728710525E1 -0.11758955174654E1 -0.11762171127615E1 -0.11765376581499E1 --0.11768571548398E1 -0.11771756040392E1 -0.1177493006955E1 -0.11778093647924E1 -0.11781246787555E1 --0.11784389500483E1 -0.1178752179874E1 -0.11790643694345E1 -0.11793755199306E1 -0.11796856325628E1 --0.11799947085294E1 -0.11803027490271E1 -0.11806097552506E1 -0.11809157283969E1 -0.11812206696613E1 --0.1181524580238E1 -0.11818274613191E1 -0.11821293140969E1 -0.11824301397611E1 -0.11827299395004E1 --0.1183028714502E1 -0.11833264659553E1 -0.11836231950488E1 -0.11839189029678E1 -0.11842135908947E1 --0.11845072600101E1 -0.11847999114992E1 -0.11850915465451E1 -0.11853821663294E1 -0.11856717720296E1 --0.11859603648229E1 -0.11862479458872E1 -0.11865345163998E1 -0.11868200775365E1 -0.11871046304715E1 --0.11873881763785E1 -0.11876707164297E1 -0.11879522517959E1 -0.11882327836467E1 -0.11885123131525E1 --0.11887908414827E1 -0.11890683698052E1 -0.11893448992858E1 -0.11896204310895E1 -0.11898949663809E1 --0.11901685063235E1 -0.11904410520796E1 -0.11907126048111E1 -0.11909831656788E1 -0.11912527358422E1 --0.11915213164598E1 -0.11917889086893E1 -0.11920555136868E1 -0.11923211326076E1 -0.11925857666061E1 --0.11928494168364E1 -0.11931120844518E1 -0.11933737706035E1 -0.11936344764414E1 -0.11938942031146E1 --0.11941529517728E1 -0.11944107235645E1 -0.11946675196363E1 -0.11949233411328E1 -0.11951781891973E1 --0.1195432064975E1 -0.11956849696094E1 -0.11959369042426E1 -0.11961878700149E1 -0.11964378680658E1 --0.11966868995342E1 -0.11969349655575E1 -0.11971820672724E1 -0.11974282058151E1 -0.11976733823207E1 --0.11979175979231E1 -0.11981608537554E1 -0.11984031509497E1 -0.11986444906368E1 -0.11988848739468E1 --0.11991243020087E1 -0.11993627759504E1 -0.11996002968988E1 -0.119983686598E1 -0.12000724843195E1 --0.12003071530417E1 -0.12005408732693E1 -0.12007736461244E1 -0.12010054727284E1 -0.12012363542023E1 --0.12014662916668E1 -0.12016952862388E1 -0.12019233390352E1 -0.12021504511721E1 -0.12023766237652E1 --0.12026018579292E1 -0.12028261547775E1 -0.12030495154228E1 -0.12032719409766E1 -0.12034934325501E1 --0.12037139912533E1 -0.12039336181948E1 -0.12041523144815E1 -0.12043700812191E1 -0.12045869195145E1 --0.12048028304728E1 -0.12050178151983E1 -0.12052318747939E1 -0.12054450103625E1 -0.12056572230044E1 --0.1205868513819E1 -0.12060788839051E1 -0.12062883343609E1 -0.12064968662833E1 -0.12067044807688E1 --0.12069111789136E1 -0.1207116961813E1 -0.12073218305591E1 -0.12075257862432E1 -0.12077288299566E1 --0.12079309627913E1 -0.12081321858383E1 -0.12083325001853E1 -0.12085319069193E1 -0.12087304071265E1 --0.12089280018929E1 -0.12091246923034E1 -0.12093204794418E1 -0.1209515364391E1 -0.12097093482328E1 --0.12099024320483E1 -0.12100946169178E1 -0.12102859039204E1 -0.12104762941347E1 -0.12106657886387E1 --0.12108543885079E1 -0.12110420948169E1 -0.12112289086394E1 -0.12114148310501E1 -0.12115998631223E1 --0.12117840059275E1 -0.12119672605358E1 -0.12121496280165E1 -0.12123311094388E1 -0.12125117058714E1 --0.1212691418381E1 -0.12128702480324E1 -0.12130481958893E1 -0.1213225263017E1 -0.12134014504797E1 --0.12135767593404E1 -0.12137511906591E1 -0.12139247454949E1 -0.1214097424908E1 -0.12142692299579E1 --0.12144401617034E1 -0.12146102211994E1 -0.12147794095005E1 -0.12149477276611E1 -0.12151151767361E1 --0.12152817577794E1 -0.12154474718424E1 -0.12156123199763E1 -0.12157763032312E1 -0.12159394226562E1 --0.12161016793001E1 -0.12162630742097E1 -0.12164236084304E1 -0.12165832830064E1 -0.12167420989838E1 --0.12169000574069E1 -0.12170571593189E1 -0.12172134057614E1 -0.12173687977757E1 -0.12175233364011E1 --0.12176770226763E1 -0.1217829857639E1 -0.12179818423266E1 -0.12181329777758E1 -0.12182832650215E1 --0.12184327050976E1 -0.12185812990369E1 -0.12187290478733E1 -0.12188759526395E1 -0.12190220143662E1 --0.12191672340826E1 -0.12193116128172E1 -0.12194551515981E1 -0.12195978514523E1 -0.1219739713406E1 --0.1219880738485E1 -0.12200209277146E1 -0.12201602821173E1 -0.12202988027146E1 -0.12204364905267E1 --0.12205733465763E1 -0.12207093718849E1 -0.1220844567471E1 -0.12209789343514E1 -0.12211124735416E1 --0.12212451860586E1 -0.12213770729179E1 -0.12215081351339E1 -0.12216383737209E1 -0.12217677896927E1 --0.12218963840603E1 -0.12220241578334E1 -0.12221511120209E1 -0.12222772476329E1 -0.12224025656782E1 --0.12225270671636E1 -0.12226507530946E1 -0.1222773624475E1 -0.12228956823107E1 -0.12230169276059E1 --0.12231373613637E1 -0.12232569845847E1 -0.12233757982685E1 -0.12234938034159E1 -0.1223611001027E1 --0.12237273921009E1 -0.12238429776341E1 -0.12239577586228E1 -0.12240717360622E1 -0.12241849109465E1 --0.12242972842682E1 -0.12244088570212E1 -0.1224519630198E1 -0.12246296047897E1 -0.12247387817864E1 --0.12248471621773E1 -0.12249547469506E1 -0.12250615370934E1 -0.12251675335922E1 -0.12252727374325E1 --0.12253771495986E1 -0.12254807710749E1 -0.12255836028451E1 -0.1225685645893E1 -0.12257869011982E1 --0.12258873697401E1 -0.12259870524979E1 -0.12260859504509E1 -0.12261840645768E1 -0.12262813958528E1 --0.12263779452551E1 -0.12264737137592E1 -0.12265687023382E1 -0.12266629119637E1 -0.1226756343609E1 --0.12268489982472E1 -0.12269408768513E1 -0.12270319803889E1 -0.12271223098278E1 -0.12272118661354E1 --0.12273006502797E1 -0.12273886632273E1 -0.12274759059435E1 -0.12275623793929E1 -0.12276480845391E1 --0.12277330223452E1 -0.12278171937741E1 -0.12279005997862E1 -0.12279832413407E1 -0.12280651193953E1 --0.12281462349101E1 -0.12282265888435E1 -0.12283061821526E1 -0.12283850157928E1 -0.1228463090719E1 --0.12285404078851E1 -0.12286169682441E1 -0.12286927727481E1 -0.12287678223485E1 -0.12288421179957E1 --0.12289156606397E1 -0.12289884512299E1 -0.12290604907151E1 -0.1229131780042E1 -0.12292023201569E1 --0.12292721120052E1 -0.12293411565312E1 -0.12294094546781E1 -0.12294770073891E1 -0.12295438156064E1 --0.12296098802715E1 -0.12296752023242E1 -0.12297397827036E1 -0.12298036223483E1 -0.12298667221962E1 --0.12299290831839E1 -0.12299907062482E1 -0.12300515923254E1 -0.12301117423498E1 -0.12301711572537E1 --0.12302298379684E1 -0.12302877854261E1 -0.12303450005578E1 -0.12304014842937E1 -0.12304572375633E1 --0.12305122612949E1 -0.12305665564165E1 -0.12306201238549E1 -0.12306729645369E1 -0.12307250793859E1 --0.1230776469325E1 -0.12308271352773E1 -0.12308770781661E1 -0.12309262989137E1 -0.12309747984403E1 --0.12310225776657E1 -0.12310696375088E1 -0.12311159788877E1 -0.12311616027197E1 -0.12312065099215E1 --0.12312507014091E1 -0.1231294178098E1 -0.12313369409012E1 -0.12313789907309E1 -0.12314203284993E1 --0.12314609551189E1 -0.12315008715019E1 -0.12315400785567E1 -0.12315785771915E1 -0.12316163683137E1 --0.12316534528318E1 -0.12316898316531E1 -0.12317255056826E1 -0.12317604758246E1 -0.12317947429822E1 --0.12318283080592E1 -0.12318611719584E1 -0.1231893335581E1 -0.12319247998273E1 -0.12319555655969E1 --0.12319856337886E1 -0.12320150053003E1 -0.12320436810292E1 -0.12320716618726E1 -0.12320989487274E1 --0.12321255424878E1 -0.1232151444047E1 -0.12321766542974E1 -0.12322011741321E1 -0.1232225004443E1 --0.12322481461206E1 -0.1232270600054E1 -0.12322923671313E1 -0.12323134482412E1 -0.12323338442709E1 --0.12323535561068E1 -0.12323725846362E1 -0.12323909307463E1 -0.12324085953195E1 -0.12324255792374E1 --0.12324418833804E1 -0.12324575086318E1 -0.12324724558724E1 -0.12324867259827E1 -0.12325003198426E1 --0.12325132383322E1 -0.12325254823271E1 -0.12325370527028E1 -0.12325479503344E1 -0.12325581760986E1 --0.12325677308714E1 -0.12325766155251E1 -0.12325848309308E1 -0.1232592377959E1 -0.12325992574829E1 --0.12326054703749E1 -0.12326110175041E1 -0.12326158997374E1 -0.12326201179415E1 -0.12326236729835E1 --0.12326265657293E1 -0.12326287970441E1 -0.1232630367794E1 -0.1232631278845E1 -0.12326315310587E1 --0.12326311252953E1 -0.12326300624145E1 -0.12326283432789E1 -0.12326259687499E1 -0.12326229396861E1 --0.1232619256944E1 -0.12326149213786E1 -0.12326099338488E1 -0.12326042952118E1 -0.12325980063231E1 --0.12325910680351E1 -0.12325834811994E1 -0.12325752466696E1 -0.12325663652984E1 -0.12325568379379E1 --0.12325466654365E1 -0.12325358486421E1 -0.12325243884036E1 -0.12325122855696E1 -0.12324995409884E1 --0.12324861555054E1 -0.12324721299663E1 -0.12324574652155E1 -0.1232442162096E1 -0.12324262214494E1 --0.12324096441185E1 -0.12323924309452E1 -0.12323745827703E1 -0.12323561004338E1 -0.12323369847754E1 --0.12323172366329E1 -0.12322968568431E1 -0.12322758462415E1 -0.12322542056651E1 -0.12322319359497E1 --0.12322090379296E1 -0.12321855124378E1 -0.12321613603064E1 -0.12321365823675E1 -0.12321111794528E1 --0.12320851523926E1 -0.12320585020163E1 -0.12320312291518E1 -0.12320033346276E1 -0.12319748192719E1 --0.12319456839127E1 -0.12319159293737E1 -0.1231885556478E1 -0.12318545660494E1 -0.1231822958913E1 --0.12317907358937E1 -0.12317578978118E1 -0.12317244454872E1 -0.12316903797395E1 -0.12316557013887E1 --0.12316204112536E1 -0.12315845101521E1 -0.12315479989014E1 -0.12315108783176E1 -0.12314731492168E1 --0.12314348124145E1 -0.12313958687249E1 -0.12313563189601E1 -0.12313161639313E1 -0.12312754044512E1 --0.12312340413316E1 -0.12311920753834E1 -0.12311495074149E1 -0.12311063382338E1 -0.12310625686486E1 --0.12310181994672E1 -0.12309732314977E1 -0.12309276655437E1 -0.12308815024089E1 -0.1230834742897E1 --0.12307873878122E1 -0.12307394379574E1 -0.12306908941342E1 -0.12306417571433E1 -0.12305920277849E1 --0.12305417068585E1 -0.12304907951627E1 -0.12304392934956E1 -0.12303872026547E1 -0.12303345234368E1 --0.12302812566374E1 -0.12302274030512E1 -0.12301729634724E1 -0.12301179386947E1 -0.12300623295112E1 --0.1230006136713E1 -0.122994936109E1 -0.12298920034319E1 -0.12298340645299E1 -0.12297755451743E1 --0.12297164461526E1 -0.12296567682514E1 -0.12295965122568E1 -0.1229535678955E1 -0.12294742691312E1 --0.12294122835698E1 -0.12293497230545E1 -0.12292865883682E1 -0.12292228802929E1 -0.12291585996093E1 --0.1229093747098E1 -0.12290283235392E1 -0.12289623297117E1 -0.12288957663946E1 -0.12288286343663E1 --0.12287609344048E1 -0.12286926672847E1 -0.12286238337805E1 -0.12285544346663E1 -0.12284844707161E1 --0.12284139427018E1 -0.12283428513974E1 -0.12282711975759E1 -0.12281989820088E1 -0.12281262054654E1 --0.12280528687149E1 -0.12279789725254E1 -0.12279045176642E1 -0.12278295048975E1 -0.12277539349933E1 --0.1227677808718E1 -0.12276011268371E1 -0.12275238901151E1 -0.12274460993174E1 -0.12273677552041E1 --0.1227288858535E1 -0.12272094100702E1 -0.12271294105731E1 -0.12270488608057E1 -0.12269677615269E1 --0.12268861134946E1 -0.12268039174667E1 -0.12267211741992E1 -0.12266378844473E1 -0.12265540489665E1 --0.12264696685121E1 -0.12263847438384E1 -0.12262992756989E1 -0.12262132648466E1 -0.12261267120334E1 --0.12260396180096E1 -0.12259519835242E1 -0.12258638093281E1 -0.12257750961717E1 -0.12256858448051E1 --0.12255960559732E1 -0.12255057304212E1 -0.12254148688951E1 -0.12253234721407E1 -0.12252315409017E1 --0.12251390759237E1 -0.12250460779512E1 -0.12249525477272E1 -0.12248584859922E1 -0.12247638934867E1 --0.12246687709505E1 -0.12245731191224E1 -0.12244769387402E1 -0.12243802305437E1 -0.12242829952718E1 --0.12241852336612E1 -0.12240869464471E1 -0.12239881343639E1 -0.1223888798147E1 -0.12237889385303E1 --0.12236885562472E1 -0.12235876520294E1 -0.12234862266078E1 -0.12233842807139E1 -0.12232818150783E1 --0.12231788304308E1 -0.12230753275002E1 -0.12229713070144E1 -0.12228667697012E1 -0.12227617162875E1 --0.12226561475006E1 -0.12225500640641E1 -0.12224434667012E1 -0.12223363561355E1 -0.12222287330916E1 --0.1222120598293E1 -0.12220119524615E1 -0.12219027963188E1 -0.12217931305859E1 -0.12216829559804E1 --0.12215722732191E1 -0.12214610830208E1 -0.12213493861048E1 -0.12212371831899E1 -0.12211244749906E1 --0.12210112622219E1 -0.12208975455984E1 -0.12207833258347E1 -0.12206686036451E1 -0.12205533797417E1 --0.12204376548359E1 -0.12203214296385E1 -0.12202047048606E1 -0.12200874812121E1 -0.12199697594021E1 --0.12198515401391E1 -0.121973282413E1 -0.12196136120831E1 -0.1219493904706E1 -0.12193737027045E1 --0.12192530067818E1 -0.12191318176399E1 -0.12190101359842E1 -0.12188879625192E1 -0.12187652979485E1 --0.12186421429721E1 -0.12185184982901E1 -0.12183943646027E1 -0.121826974261E1 -0.12181446330112E1 --0.12180190365038E1 -0.12178929537848E1 -0.12177663855509E1 -0.12176393324984E1 -0.12175117953225E1 --0.12173837747184E1 -0.12172552713809E1 -0.12171262860037E1 -0.12169968192785E1 -0.12168668718967E1 --0.12167364445494E1 -0.12166055379271E1 -0.1216474152719E1 -0.12163422896156E1 -0.12162099493067E1 --0.12160771324802E1 -0.12159438398226E1 -0.12158100720198E1 -0.12156758297577E1 -0.12155411137213E1 --0.1215405924595E1 -0.12152702630628E1 -0.12151341298081E1 -0.12149975255135E1 -0.12148604508608E1 --0.12147229065316E1 -0.12145848932057E1 -0.12144464115628E1 -0.12143074622818E1 -0.12141680460413E1 --0.12140281635188E1 -0.12138878153918E1 -0.12137470023371E1 -0.12136057250307E1 -0.1213463984147E1 --0.12133217803601E1 -0.12131791143437E1 -0.12130359867714E1 -0.12128923983161E1 -0.12127483496487E1 --0.12126038414392E1 -0.12124588743582E1 -0.12123134490761E1 -0.12121675662631E1 -0.12120212265872E1 --0.1211874430716E1 -0.12117271793165E1 -0.12115794730551E1 -0.12114313125978E1 -0.12112826986094E1 --0.12111336317535E1 -0.12109841126933E1 -0.1210834142093E1 -0.1210683720616E1 -0.12105328489242E1 --0.12103815276783E1 -0.12102297575389E1 -0.1210077539165E1 -0.12099248732152E1 -0.12097717603473E1 --0.12096182012201E1 -0.12094641964913E1 -0.12093097468176E1 -0.12091548528548E1 -0.12089995152581E1 --0.12088437346828E1 -0.12086875117841E1 -0.12085308472146E1 -0.12083737416253E1 -0.12082161956656E1 --0.12080582099884E1 -0.1207899785245E1 -0.12077409220859E1 -0.12075816211605E1 -0.12074218831176E1 --0.12072617086054E1 -0.12071010982714E1 -0.12069400527623E1 -0.12067785727242E1 -0.12066166588028E1 --0.12064543116425E1 -0.12062915318871E1 -0.12061283201793E1 -0.12059646771623E1 -0.12058006034781E1 --0.12056360997684E1 -0.12054711666758E1 -0.12053058048433E1 -0.12051400149086E1 -0.12049737975084E1 --0.12048071532786E1 -0.12046400828595E1 -0.12044725868899E1 -0.12043046660061E1 -0.12041363208432E1 --0.12039675520353E1 -0.12037983602183E1 -0.12036287460275E1 -0.12034587100964E1 -0.12032882530557E1 --0.12031173755351E1 -0.12029460781668E1 -0.12027743615822E1 -0.12026022264117E1 -0.12024296732854E1 --0.12022567028338E1 -0.12020833156844E1 -0.12019095124635E1 -0.12017352937964E1 -0.12015606603106E1 --0.12013856126319E1 -0.12012101513858E1 -0.12010342771968E1 -0.1200857990689E1 -0.12006812924854E1 --0.12005041832083E1 -0.12003266634796E1 -0.12001487339204E1 -0.11999703951519E1 -0.11997916477935E1 --0.1199612492464E1 -0.11994329297806E1 -0.11992529603629E1 -0.1199072584829E1 -0.1198891803796E1 --0.1198710617881E1 -0.11985290277014E1 -0.11983470338703E1 -0.11981646369999E1 -0.1197981837702E1 --0.11977986365921E1 -0.11976150342841E1 -0.119743103139E1 -0.11972466285205E1 -0.11970618262857E1 --0.11968766252963E1 -0.11966910261623E1 -0.11965050294925E1 -0.11963186358942E1 -0.11961318459736E1 --0.11959446603386E1 -0.11957570795961E1 -0.11955691043526E1 -0.11953807352123E1 -0.11951919727793E1 --0.11950028176577E1 -0.11948132704517E1 -0.11946233317658E1 -0.11944330021996E1 -0.11942422823527E1 --0.11940511728253E1 -0.11938596742193E1 -0.11936677871351E1 -0.11934755121718E1 -0.11932828499282E1 --0.11930898010023E1 -0.11928963659902E1 -0.11927025454873E1 -0.11925083400896E1 -0.11923137503932E1 --0.11921187769931E1 -0.11919234204832E1 -0.11917276814567E1 -0.11915315605067E1 -0.11913350582263E1 --0.11911381752094E1 -0.11909409120449E1 -0.11907432693218E1 -0.11905452476284E1 -0.11903468475558E1 --0.11901480696935E1 -0.11899489146296E1 -0.11897493829518E1 -0.11895494752471E1 -0.11893491921019E1 --0.11891485341021E1 -0.11889475018326E1 -0.11887460958765E1 -0.1188544316815E1 -0.11883421652336E1 --0.11881396417166E1 -0.11879367468475E1 -0.11877334812061E1 -0.11875298453721E1 -0.11873258399262E1 --0.1187121465449E1 -0.11869167225208E1 -0.11867116117187E1 -0.11865061336195E1 -0.11863002888006E1 --0.11860940778391E1 -0.11858875013115E1 -0.11856805597934E1 -0.118547325386E1 -0.11852655840858E1 --0.11850575510444E1 -0.1184849155309E1 -0.11846403974518E1 -0.11844312780445E1 -0.11842217976574E1 --0.11840119568625E1 -0.11838017562302E1 -0.11835911963307E1 -0.11833802777335E1 -0.11831690010092E1 --0.11829573667236E1 -0.11827453754428E1 -0.11825330277327E1 -0.11823203241596E1 -0.11821072652879E1 --0.11818938516839E1 -0.11816800839138E1 -0.11814659625442E1 -0.11812514881353E1 -0.11810366612478E1 --0.11808214824437E1 -0.11806059522858E1 -0.11803900713367E1 -0.1180173840155E1 -0.11799572592991E1 --0.11797403293274E1 -0.11795230508008E1 -0.11793054242792E1 -0.11790874503196E1 -0.11788691294777E1 --0.11786504623095E1 -0.11784314493702E1 -0.11782120912136E1 -0.11779923883947E1 -0.1177772341469E1 --0.11775519509925E1 -0.11773312175157E1 -0.11771101415894E1 -0.11768887237645E1 -0.11766669645939E1 --0.11764448646298E1 -0.11762224244212E1 -0.11759996445168E1 -0.11757765254645E1 -0.11755530678132E1 --0.11753292721107E1 -0.11751051389041E1 -0.11748806687397E1 -0.11746558621623E1 -0.11744307197191E1 --0.11742052419568E1 -0.11739794294204E1 -0.11737532826513E1 -0.11735268021902E1 -0.11732999885808E1 --0.11730728423664E1 -0.11728453640894E1 -0.11726175542898E1 -0.11723894135083E1 -0.11721609422841E1 --0.11719321411556E1 -0.11717030106592E1 -0.11714735513355E1 -0.11712437637237E1 -0.11710136483615E1 --0.1170783205784E1 -0.11705524365261E1 -0.11703213411235E1 -0.11700899201114E1 -0.11698581740248E1 --0.11696261033957E1 -0.11693937087558E1 -0.11691609906377E1 -0.11689279495745E1 -0.11686945860984E1 --0.11684609007401E1 -0.11682268940302E1 -0.11679925664985E1 -0.11677579186742E1 -0.11675229510863E1 --0.11672876642622E1 -0.11670520587285E1 -0.11668161350114E1 -0.11665798936381E1 -0.1166343335135E1 --0.11661064600275E1 -0.11658692688399E1 -0.11656317620954E1 -0.11653939403196E1 -0.11651558040373E1 --0.11649173537714E1 -0.11646785900414E1 -0.1164439513366E1 -0.11642001242671E1 -0.11639604232656E1 --0.11637204108811E1 -0.11634800876346E1 -0.11632394540469E1 -0.11629985106356E1 -0.1162757257917E1 --0.11625156964065E1 -0.11622738266216E1 -0.11620316490786E1 -0.1161789164293E1 -0.11615463727791E1 --0.11613032750499E1 -0.11610598716209E1 -0.11608161630074E1 -0.1160572149724E1 -0.11603278322798E1 --0.11600832111844E1 -0.11598382869492E1 -0.11595930600857E1 -0.11593475311047E1 -0.11591017005153E1 --0.11588555688257E1 -0.11586091365446E1 -0.11583624041812E1 -0.11581153722449E1 -0.1157868041241E1 --0.11576204116743E1 -0.11573724840497E1 -0.11571242588736E1 -0.11568757366513E1 -0.11566269178868E1 --0.11563778030833E1 -0.1156128392743E1 -0.11558786873699E1 -0.11556286874674E1 -0.1155378393537E1 --0.11551278060781E1 -0.11548769255884E1 -0.11546257525699E1 -0.11543742875236E1 -0.11541225309497E1 --0.11538704833444E1 -0.11536181452036E1 -0.11533655170253E1 -0.11531125993077E1 -0.11528593925487E1 --0.11526058972425E1 -0.11523521138838E1 -0.11520980429667E1 -0.11518436849854E1 -0.11515890404329E1 --0.11513341098029E1 -0.11510788935886E1 -0.11508233922823E1 -0.11505676063754E1 -0.11503115363588E1 --0.1150055182723E1 -0.11497985459578E1 -0.11495416265525E1 -0.11492844249958E1 -0.11490269417752E1 --0.11487691773789E1 -0.11485111322951E1 -0.11482528070121E1 -0.11479942020153E1 -0.11477353177901E1 --0.11474761548214E1 -0.11472167135935E1 -0.11469569945891E1 -0.11466969982928E1 -0.11464367251891E1 --0.11461761757621E1 -0.11459153504932E1 -0.11456542498642E1 -0.11453928743559E1 -0.11451312244482E1 --0.11448693006196E1 -0.11446071033509E1 -0.11443446331217E1 -0.11440818904109E1 -0.11438188756965E1 --0.1143555589456E1 -0.11432920321666E1 -0.11430282043046E1 -0.11427641063462E1 -0.11424997387665E1 --0.11422351020401E1 -0.11419701966414E1 -0.11417050230451E1 -0.11414395817253E1 -0.11411738731546E1 --0.11409078978053E1 -0.11406416561494E1 -0.11403751486587E1 -0.11401083758052E1 -0.11398413380583E1 --0.11395740358868E1 -0.11393064697586E1 -0.11390386401441E1 -0.11387705475126E1 -0.11385021923323E1 --0.11382335750705E1 -0.11379646961945E1 -0.11376955561704E1 -0.11374261554643E1 -0.11371564945416E1 --0.11368865738666E1 -0.11366163939033E1 -0.11363459551158E1 -0.11360752579679E1 -0.11358043029228E1 --0.11355330904428E1 -0.11352616209901E1 -0.11349898950262E1 -0.11347179130115E1 -0.11344456754059E1 - -0.113417318267E1 -0.11339004352639E1 -0.11336274336466E1 -0.11333541782755E1 -0.11330806696069E1 --0.11328069080994E1 -0.11325328942113E1 -0.11322586284005E1 -0.11319841111216E1 -0.11317093428299E1 --0.11314343239799E1 -0.11311590550261E1 -0.11308835364214E1 -0.11306077686203E1 -0.11303317520766E1 --0.11300554872433E1 -0.11297789745716E1 -0.11295022145126E1 -0.11292252075177E1 -0.11289479540379E1 --0.11286704545238E1 -0.11283927094244E1 -0.11281147191887E1 -0.11278364842651E1 -0.11275580051014E1 --0.11272792821438E1 -0.11270003158416E1 -0.11267211066434E1 -0.11264416549961E1 -0.11261619613432E1 --0.1125882026128E1 -0.1125601849796E1 -0.11253214327919E1 -0.11250407755598E1 -0.11247598785429E1 --0.11244787421838E1 -0.11241973669252E1 -0.11239157532099E1 -0.11236339014808E1 -0.11233518121771E1 --0.11230694857382E1 -0.11227869226035E1 -0.11225041232131E1 -0.11222210880056E1 -0.11219378174202E1 --0.11216543118961E1 -0.11213705718721E1 -0.11210865977829E1 -0.11208023900625E1 -0.11205179491481E1 --0.11202332754776E1 -0.11199483694897E1 -0.11196632316153E1 -0.11193778622861E1 -0.11190922619354E1 --0.11188064309995E1 -0.11185203699149E1 -0.11182340791117E1 -0.11179475590202E1 -0.11176608100704E1 --0.11173738326943E1 -0.11170866273225E1 -0.11167991943849E1 -0.11165115343108E1 -0.11162236475284E1 --0.11159355344678E1 -0.11156471955583E1 -0.11153586312279E1 -0.11150698419022E1 -0.11147808280069E1 --0.11144915899685E1 -0.1114202128213E1 -0.11139124431656E1 -0.11136225352513E1 -0.11133324048946E1 --0.11130420525198E1 -0.11127514785511E1 -0.11124606834131E1 -0.11121696675263E1 -0.11118784313114E1 --0.11115869751897E1 -0.11112952995831E1 -0.11110034049126E1 -0.11107112915983E1 -0.11104189600596E1 --0.11101264107155E1 -0.11098336439861E1 -0.11095406602913E1 -0.11092474600485E1 -0.11089540436737E1 --0.11086604115828E1 -0.11083665641928E1 -0.11080725019192E1 -0.1107778225178E1 -0.11074837343853E1 --0.11071890299578E1 -0.11068941123082E1 -0.1106598981849E1 -0.11063036389929E1 -0.11060080841535E1 --0.1105712317744E1 -0.11054163401761E1 -0.11051201518611E1 -0.11048237532096E1 -0.11045271446332E1 --0.11042303265439E1 -0.1103933299351E1 -0.11036360634616E1 -0.11033386192818E1 -0.11030409672216E1 --0.11027431076898E1 -0.11024450410944E1 -0.11021467678423E1 -0.110184828834E1 -0.11015496029941E1 --0.11012507122109E1 -0.1100951616397E1 -0.11006523159549E1 -0.11003528112872E1 -0.11000531027973E1 --0.10997531908898E1 -0.10994530759681E1 -0.10991527584345E1 -0.10988522386906E1 -0.1098551517138E1 --0.10982505941788E1 -0.10979494702151E1 -0.10976481456465E1 -0.10973466208718E1 -0.10970448962898E1 --0.10967429722996E1 -0.10964408492996E1 -0.10961385276879E1 -0.10958360078628E1 -0.10955332902219E1 --0.10952303751619E1 -0.10949272630796E1 -0.10946239543709E1 -0.10943204494302E1 -0.10940167486515E1 --0.10937128524294E1 -0.10934087611579E1 -0.10931044752298E1 -0.10927999950404E1 -0.10924953209844E1 --0.10921904534541E1 -0.10918853928393E1 -0.10915801395298E1 -0.10912746939172E1 -0.10909690563917E1 --0.10906632273435E1 -0.10903572071633E1 -0.10900509962413E1 -0.10897445949665E1 -0.1089438003728E1 --0.1089131222915E1 -0.10888242529125E1 -0.10885170941052E1 -0.10882097468801E1 -0.1087902211625E1 --0.1087594488728E1 -0.10872865785729E1 -0.10869784815436E1 -0.10866701980243E1 -0.10863617284003E1 --0.10860530730566E1 -0.10857442323753E1 -0.1085435206738E1 -0.10851259965255E1 -0.10848166021221E1 --0.10845070239109E1 -0.10841972622729E1 -0.10838873175878E1 -0.10835771902348E1 -0.10832668805943E1 --0.10829563890458E1 -0.10826457159684E1 -0.10823348617399E1 -0.10820238267376E1 -0.10817126113397E1 --0.10814012159242E1 -0.10810896408687E1 -0.1080777886549E1 -0.10804659533402E1 -0.10801538416186E1 --0.10798415517604E1 -0.10795290841434E1 -0.10792164391379E1 -0.10789036171148E1 -0.10785906184462E1 --0.10782774435088E1 -0.10779640926782E1 -0.10776505663255E1 -0.10773368648214E1 -0.10770229885368E1 --0.10767089378429E1 -0.10763947131101E1 -0.10760803147084E1 -0.10757657430074E1 -0.10754509983759E1 --0.1075136081183E1 -0.10748209917971E1 -0.10745057305862E1 -0.10741902979189E1 -0.10738746941637E1 --0.10735589196862E1 -0.10732429748513E1 -0.10729268600239E1 -0.10726105755706E1 -0.10722941218568E1 --0.10719774992476E1 -0.10716607081078E1 -0.10713437488027E1 -0.10710266216944E1 -0.1070709327146E1 --0.10703918655195E1 -0.10700742371769E1 -0.10697564424789E1 -0.10694384817875E1 -0.10691203554642E1 --0.10688020638699E1 -0.10684836073645E1 -0.10681649863074E1 -0.10678462010587E1 -0.10675272519784E1 --0.10672081394282E1 -0.1066888863762E1 -0.10665694253342E1 -0.10662498245014E1 -0.10659300616243E1 --0.1065610137064E1 -0.10652900511735E1 -0.10649698043057E1 -0.1064649396814E1 -0.10643288290567E1 --0.10640081013915E1 -0.10636872141717E1 -0.10633661677484E1 -0.10630449624727E1 -0.10627235986991E1 --0.10624020767801E1 -0.10620803970682E1 -0.1061758559916E1 -0.10614365656771E1 -0.10611144147003E1 --0.10607921073339E1 -0.10604696439267E1 -0.10601470248305E1 -0.10598242503961E1 -0.10595013209718E1 --0.10591782369054E1 -0.10588549985435E1 -0.10585316062356E1 -0.10582080603308E1 -0.10578843611758E1 --0.1057560509115E1 -0.10572365044918E1 -0.10569123476537E1 -0.10565880389474E1 -0.10562635787187E1 --0.10559389673108E1 -0.10556142050665E1 -0.10552892923303E1 -0.10549642294467E1 -0.10546390167603E1 --0.10543136546117E1 -0.10539881433415E1 -0.10536624832912E1 -0.10533366748037E1 -0.10530107182213E1 --0.10526846138837E1 -0.1052358362131E1 -0.10520319633028E1 -0.10517054177382E1 -0.10513787257753E1 --0.10510518877532E1 -0.10507249040107E1 -0.10503977748859E1 -0.10500705007162E1 -0.10497430818389E1 --0.10494155185903E1 -0.10490878113062E1 -0.10487599603212E1 -0.10484319659721E1 -0.10481038285953E1 --0.10477755485263E1 -0.10474471260982E1 -0.10471185616446E1 -0.10467898554987E1 -0.10464610079933E1 --0.10461320194603E1 -0.1045802890234E1 -0.10454736206482E1 -0.10451442110345E1 -0.10448146617221E1 --0.10444849730387E1 -0.10441551453176E1 -0.1043825178891E1 -0.10434950740897E1 -0.10431648312405E1 --0.10428344506697E1 -0.10425039327076E1 -0.10421732776849E1 -0.10418424859321E1 -0.10415115577729E1 --0.10411804935311E1 -0.10408492935337E1 -0.104051795811E1 -0.10401864875897E1 -0.10398548822953E1 --0.10395231425499E1 -0.10391912686774E1 -0.10388592610042E1 -0.10385271198561E1 -0.10381948455561E1 --0.10378624384267E1 -0.10375298987901E1 -0.10371972269695E1 -0.1036864423287E1 -0.10365314880645E1 --0.10361984216235E1 -0.10358652242843E1 -0.10355318963687E1 -0.10351984381982E1 -0.10348648500932E1 --0.10345311323717E1 -0.10341972853522E1 -0.10338633093533E1 -0.10335292046933E1 -0.10331949716898E1 --0.10328606106616E1 -0.10325261219272E1 -0.10321915058034E1 -0.10318567626066E1 -0.10315218926531E1 --0.10311868962581E1 -0.10308517737359E1 -0.10305165254015E1 -0.10301811515711E1 -0.10298456525607E1 --0.10295100286837E1 -0.10291742802533E1 -0.10288384075824E1 -0.10285024109837E1 -0.10281662907696E1 --0.10278300472519E1 -0.10274936807423E1 -0.10271571915509E1 -0.10268205799905E1 -0.10264838463732E1 - -0.102614699101E1 -0.10258100142106E1 -0.10254729162854E1 -0.10251356975426E1 -0.10247983582902E1 --0.10244608988356E1 -0.10241233194878E1 -0.10237856205548E1 -0.1023447802344E1 -0.10231098651624E1 --0.10227718093158E1 -0.10224336351122E1 -0.10220953428595E1 -0.10217569328636E1 -0.1021418405428E1 --0.10210797608562E1 -0.10207409994531E1 -0.10204021215228E1 -0.10200631273689E1 -0.10197240172961E1 --0.10193847916086E1 -0.10190454506091E1 -0.10187059945997E1 -0.10183664238822E1 -0.10180267387582E1 --0.10176869395286E1 -0.10173470264945E1 -0.10170069999567E1 -0.1016666860216E1 -0.1016326607572E1 --0.10159862423242E1 -0.10156457647716E1 -0.10153051752138E1 -0.10149644739505E1 -0.10146236612793E1 --0.10142827374972E1 -0.10139417029011E1 -0.1013600557788E1 -0.10132593024539E1 -0.10129179371956E1 --0.10125764623111E1 -0.10122348780992E1 -0.10118931848532E1 -0.10115513828664E1 -0.10112094724325E1 --0.1010867453846E1 -0.10105253274E1 -0.10101830933888E1 -0.10098407521069E1 -0.10094983038482E1 --0.10091557489053E1 -0.10088130875715E1 -0.10084703201386E1 -0.10081274468962E1 -0.10077844681334E1 --0.10074413841422E1 -0.10070981952144E1 -0.10067549016408E1 -0.10064115037098E1 -0.1006068001709E1 --0.10057243959288E1 -0.10053806866609E1 -0.10050368741976E1 -0.1004692958822E1 -0.10043489408181E1 --0.10040048204729E1 -0.1003660598077E1 -0.10033162739215E1 -0.10029718482898E1 -0.10026273214658E1 --0.10022826937338E1 -0.10019379653815E1 -0.10015931366963E1 -0.1001248207962E1 -0.10009031794611E1 --0.10005580514762E1 -0.1000212824292E1 -0.99986749819199E0 -0.99952207345943E0 -0.99917655037736E0 --0.99883092922845E0 -0.99848521029492E0 -0.99813939385889E0 -0.99779348020198E0 -0.99744746960444E0 --0.99710136234679E0 -0.99675515870876E0 -0.9964088589696E0 -0.99606246340756E0 -0.99571597230386E0 --0.99536938593901E0 -0.99502270459167E0 -0.99467592853869E0 -0.99432905805646E0 -0.99398209342371E0 --0.99363503491834E0 -0.9932878828177E0 -0.99294063739823E0 -0.99259329893601E0 -0.99224586770744E0 --0.99189834398885E0 -0.99155072805682E0 -0.99120302018399E0 -0.99085522064288E0 -0.99050732970782E0 --0.99015934765453E0 -0.98981127475896E0 -0.98946311129238E0 -0.9891148575259E0 -0.98876651373139E0 --0.9884180801845E0 -0.98806955716159E0 -0.98772094493164E0 -0.98737224376244E0 -0.98702345392101E0 --0.98667457568269E0 -0.98632560932111E0 -0.98597655510608E0 -0.9856274133045E0 -0.98527818418179E0 --0.98492886801083E0 -0.98457946506311E0 -0.98422997560841E0 -0.98388039991168E0 -0.98353073823834E0 --0.98318099085633E0 -0.98283115803323E0 -0.98248124003575E0 -0.98213123713135E0 -0.98178114958739E0 --0.98143097766963E0 -0.98108072164272E0 -0.98073038177089E0 -0.98037995831977E0 -0.98002945155462E0 --0.97967886173979E0 -0.97932818913746E0 -0.97897743400834E0 -0.97862659661796E0 -0.97827567723201E0 --0.97792467611599E0 -0.97757359352861E0 -0.97722242972976E0 -0.97687118498085E0 -0.97651985954399E0 --0.97616845367994E0 -0.97581696765034E0 -0.9754654017164E0 -0.97511375613867E0 -0.97476203117646E0 --0.97441022708975E0 -0.97405834413667E0 -0.97370638257486E0 -0.97335434266149E0 -0.97300222465649E0 --0.97265002881916E0 -0.97229775540668E0 -0.97194540467502E0 -0.97159297687857E0 -0.97124047227705E0 --0.97088789112909E0 -0.97053523369148E0 -0.97018250021719E0 -0.96982969095933E0 -0.96947680617368E0 --0.96912384611545E0 -0.96877081103928E0 -0.96841770119941E0 -0.96806451684965E0 -0.96771125824383E0 --0.9673579256356E0 -0.9670045192793E0 -0.96665103942559E0 -0.96629748632535E0 -0.96594386023005E0 --0.96559016139271E0 -0.96523639006593E0 -0.96488254649998E0 -0.96452863094473E0 -0.96417464364987E0 --0.96382058486704E0 -0.96346645484804E0 -0.96311225384129E0 -0.96275798209369E0 -0.96240363985049E0 --0.96204922736392E0 -0.96169474488418E0 -0.96134019266004E0 -0.96098557093809E0 -0.9606308799661E0 --0.9602761199898E0 -0.95992129125437E0 -0.9595663940048E0 -0.95921142848934E0 -0.95885639495521E0 --0.95850129364785E0 -0.9581461248119E0 -0.9577908886916E0 -0.95743558553255E0 -0.95708021558007E0 --0.9567247790782E0 -0.95636927626916E0 -0.95601370739486E0 -0.95565807269892E0 -0.95530237242427E0 --0.95494660681341E0 -0.9545907761092E0 -0.95423488055443E0 -0.95387892039045E0 -0.95352289585793E0 --0.95316680719719E0 -0.95281065464959E0 -0.95245443845597E0 -0.95209815885648E0 -0.95174181609038E0 --0.95138541039665E0 -0.95102894201475E0 -0.95067241118365E0 -0.95031581814201E0 -0.94995916312843E0 --0.94960244638141E0 -0.94924566813848E0 -0.94888882863677E0 -0.94853192811328E0 -0.94817496680456E0 --0.94781794494656E0 -0.94746086277561E0 -0.94710372052858E0 -0.9467465184423E0 -0.94638925675143E0 --0.94603193569067E0 -0.94567455549458E0 -0.94531711639746E0 -0.94495961863307E0 -0.94460206243548E0 --0.94424444803858E0 -0.94388677567571E0 -0.94352904558043E0 -0.94317125798651E0 -0.94281341312616E0 --0.94245551122992E0 -0.9420975525273E0 -0.9417395372517E0 -0.94138146563569E0 -0.9410233379111E0 --0.9406651543076E0 -0.94030691505462E0 -0.93994862038307E0 -0.93959027052397E0 -0.93923186570868E0 --0.9388734061639E0 -0.9385148921169E0 -0.93815632379596E0 -0.93779770143026E0 -0.93743902524785E0 --0.93708029547714E0 -0.93672151234609E0 -0.93636267608236E0 -0.93600378691347E0 -0.93564484506755E0 --0.93528585076998E0 -0.93492680424556E0 -0.93456770571885E0 -0.93420855541597E0 -0.93384935356175E0 --0.93349010038147E0 -0.93313079610081E0 -0.93277144094551E0 -0.9324120351389E0 -0.93205257890454E0 --0.93169307246581E0 -0.93133351604576E0 -0.93097390986696E0 -0.93061425415228E0 -0.93025454912427E0 --0.92989479500506E0 -0.92953499201702E0 -0.9291751403823E0 -0.92881524032227E0 -0.92845529205768E0 --0.92809529580963E0 -0.92773525179769E0 -0.92737516024103E0 -0.92701502135909E0 -0.9266548353729E0 --0.92629460250301E0 -0.92593432296816E0 -0.92557399698682E0 -0.92521362477738E0 -0.92485320655789E0 --0.92449274254572E0 -0.92413223295887E0 -0.92377167801562E0 -0.92341107793466E0 -0.92305043293134E0 --0.92268974322126E0 -0.92232900902024E0 -0.92196823054523E0 -0.92160740801231E0 -0.92124654163723E0 --0.92088563163557E0 -0.92052467822284E0 -0.92016368161279E0 -0.91980264201934E0 -0.91944155965637E0 --0.91908043473765E0 -0.91871926747533E0 -0.91835805808503E0 -0.91799680678205E0 -0.91763551377989E0 --0.91727417928772E0 -0.91691280351392E0 -0.91655138667271E0 -0.91618992897817E0 -0.91582843064387E0 --0.91546689187856E0 -0.91510531289158E0 -0.91474369389381E0 -0.91438203509672E0 -0.91402033671186E0 --0.91365859894774E0 -0.91329682201322E0 -0.91293500611732E0 -0.9125731514697E0 -0.91221125827994E0 --0.91184932675577E0 -0.91148735710448E0 -0.91112534953306E0 -0.91076330425104E0 -0.91040122146782E0 --0.91003910138985E0 -0.90967694422216E0 -0.90931475016852E0 -0.90895251943806E0 -0.90859025223835E0 --0.90822794877583E0 -0.9078656092549E0 -0.90750323388058E0 -0.90714082285742E0 -0.90677837638945E0 --0.90641589468038E0 -0.90605337793591E0 -0.90569082636112E0 -0.90532824015968E0 -0.90496561953462E0 --0.90460296468906E0 -0.90424027582567E0 -0.90387755314676E0 -0.90351479685463E0 -0.90315200715178E0 --0.90278918424046E0 -0.90242632832203E0 -0.90206343959762E0 -0.90170051826817E0 -0.90133756453459E0 --0.90097457859752E0 -0.9006115606572E0 -0.90024851091353E0 -0.89988542956597E0 -0.89952231681459E0 --0.89915917285906E0 -0.89879599789853E0 -0.89843279213155E0 -0.89806955575645E0 -0.89770628897162E0 --0.89734299197514E0 -0.89697966496483E0 -0.89661630813805E0 -0.89625292169207E0 -0.8958895058237E0 --0.8955260607294E0 -0.89516258660539E0 -0.89479908364791E0 -0.89443555205285E0 -0.89407199201587E0 --0.89370840373254E0 -0.8933447873978E0 -0.89298114320718E0 -0.89261747135607E0 -0.8922537720395E0 --0.89189004545021E0 -0.89152629178077E0 -0.89116251122523E0 -0.89079870397772E0 -0.89043487023178E0 --0.89007101018062E0 -0.8897071240178E0 -0.88934321193559E0 -0.88897927412488E0 -0.88861531077601E0 --0.88825132208154E0 -0.88788730823341E0 -0.88752326942312E0 -0.88715920584123E0 -0.88679511767776E0 --0.88643100512384E0 -0.88606686837076E0 -0.88570270761045E0 -0.88533852302957E0 -0.88497431481526E0 --0.88461008315634E0 -0.88424582824332E0 -0.8838815502658E0 -0.88351724941215E0 -0.88315292587078E0 --0.88278857982984E0 -0.88242421147696E0 -0.88205982100007E0 -0.88169540858574E0 -0.88133097442004E0 --0.88096651868849E0 -0.88060204157877E0 -0.88023754327795E0 -0.87987302397196E0 -0.87950848384562E0 --0.87914392308291E0 -0.87877934187064E0 -0.87841474039514E0 -0.87805011884197E0 -0.87768547739374E0 --0.87732081623289E0 -0.87695613554423E0 -0.87659143551275E0 -0.87622671632335E0 -0.87586197815749E0 --0.8754972211966E0 -0.87513244562372E0 -0.87476765162311E0 -0.87440283937979E0 -0.87403800907326E0 --0.87367316088344E0 -0.87330829499087E0 -0.8729434115789E0 -0.8725785108298E0 -0.87221359292455E0 --0.87184865804401E0 -0.87148370636941E0 -0.87111873807907E0 -0.87075375335123E0 -0.87038875236557E0 --0.87002373530267E0 -0.86965870234303E0 -0.86929365366451E0 -0.86892858944495E0 -0.86856350986251E0 --0.86819841509695E0 -0.86783330532823E0 -0.86746818073292E0 -0.86710304148709E0 -0.86673788776653E0 - -0.86637271975E0 -0.86600753761539E0 -0.86564234153923E0 -0.86527713169712E0 -0.86491190826419E0 --0.86454667141724E0 -0.86418142133272E0 -0.86381615818626E0 -0.86345088215151E0 -0.86308559340121E0 --0.86272029211162E0 -0.86235497845903E0 -0.86198965261951E0 -0.86162431476477E0 -0.86125896506712E0 --0.8608936037001E0 -0.86052823083785E0 -0.86016284665397E0 -0.85979745132119E0 -0.85943204501221E0 --0.85906662789949E0 -0.85870120015523E0 -0.85833576195213E0 -0.85797031346072E0 -0.85760485485085E0 --0.85723938629187E0 -0.85687390795837E0 -0.85650842002457E0 -0.85614292266031E0 -0.85577741603324E0 --0.85541190031042E0 -0.85504637566377E0 -0.8546808422634E0 -0.85431530027908E0 -0.85394974988016E0 --0.85358419123677E0 -0.85321862451629E0 -0.85285304988566E0 -0.8524874675116E0 -0.85212187756443E0 --0.85175628021343E0 -0.85139067562626E0 -0.8510250639698E0 -0.85065944541136E0 -0.85029382011724E0 --0.84992818825347E0 -0.8495625499863E0 -0.84919690548266E0 -0.84883125490966E0 -0.84846559843196E0 --0.84809993621393E0 -0.84773426841978E0 -0.8473685952163E0 -0.84700291676919E0 -0.84663723324349E0 --0.84627154480389E0 -0.84590585161639E0 -0.84554015384217E0 -0.84517445164268E0 -0.84480874518084E0 --0.84444303462219E0 -0.84407732013153E0 -0.84371160187121E0 -0.84334588000359E0 -0.84298015469101E0 --0.84261442609588E0 -0.84224869438076E0 -0.84188295970685E0 -0.84151722223472E0 -0.8411514821244E0 --0.8407857395379E0 -0.84041999463609E0 -0.84005424757994E0 -0.83968849853068E0 -0.83932274764974E0 --0.83895699509652E0 -0.83859124103052E0 -0.83822548561118E0 -0.83785972899713E0 -0.83749397134615E0 --0.83712821281786E0 -0.83676245357229E0 -0.8363966937699E0 -0.83603093356672E0 -0.83566517311919E0 --0.83529941258495E0 -0.83493365212318E0 -0.83456789189328E0 -0.83420213205079E0 -0.8338363727513E0 --0.83347061415055E0 -0.83310485640652E0 -0.83273909967629E0 -0.83237334411609E0 -0.83200758988194E0 --0.83164183713021E0 -0.8312760860151E0 -0.83091033669062E0 -0.83054458931174E0 -0.83017884403461E0 --0.82981310101575E0 -0.82944736040778E0 -0.82908162236326E0 -0.82871588703507E0 -0.82835015457945E0 --0.82798442515227E0 -0.82761869890614E0 -0.827252975993E0 -0.82688725656497E0 -0.82652154077472E0 --0.82615582877428E0 -0.8257901207158E0 -0.8254244167516E0 -0.8250587170331E0 -0.82469302171294E0 --0.82432733094362E0 -0.82396164487708E0 -0.82359596366283E0 -0.82323028745084E0 -0.82286461639141E0 --0.82249895063453E0 -0.82213329032923E0 -0.8217676356275E0 -0.82140198668048E0 -0.82103634363818E0 --0.82067070664961E0 -0.82030507586508E0 -0.81993945143148E0 -0.81957383349568E0 -0.81920822220497E0 --0.81884261770932E0 -0.8184770201575E0 -0.81811142969761E0 -0.81774584647752E0 -0.81738027064515E0 --0.81701470234738E0 -0.81664914173103E0 -0.81628358894302E0 -0.8159180441303E0 -0.81555250744046E0 --0.81518697901805E0 -0.81482145900726E0 -0.81445594755307E0 -0.8140904448042E0 -0.81372495090955E0 --0.81335946601263E0 -0.81299399025627E0 -0.81262852378319E0 -0.81226306673988E0 -0.81189761927183E0 --0.81153218152301E0 -0.8111667536363E0 -0.81080133575317E0 -0.81043592802E0 -0.81007053058262E0 --0.80970514358533E0 -0.80933976716797E0 -0.80897440147035E0 -0.80860904663631E0 -0.8082437028097E0 --0.80787837013396E0 -0.8075130487498E0 -0.80714773879779E0 -0.80678244042013E0 -0.80641715375991E0 --0.80605187896185E0 -0.80568661616306E0 -0.80532136550158E0 -0.80495612711667E0 -0.80459090115131E0 --0.80422568774729E0 -0.80386048704439E0 -0.8034952991823E0 -0.80313012430075E0 -0.80276496253884E0 --0.80239981403555E0 -0.80203467892982E0 -0.80166955736043E0 -0.80130444946619E0 -0.80093935538499E0 --0.80057427525418E0 -0.80020920921167E0 -0.79984415739715E0 -0.79947911995101E0 -0.7991140970087E0 --0.79874908870537E0 -0.79838409517619E0 -0.79801911655969E0 -0.79765415299284E0 -0.79728920461251E0 --0.7969242715554E0 -0.79655935395822E0 -0.7961944519568E0 -0.79582956568759E0 -0.795464695286E0 --0.79509984088538E0 -0.79473500261807E0 -0.79437018062071E0 -0.79400537502963E0 -0.79364058598069E0 --0.79327581360714E0 -0.7929110580426E0 -0.79254631942121E0 -0.7921815978772E0 -0.79181689354537E0 --0.79145220655725E0 -0.79108753704449E0 -0.79072288513966E0 -0.79035825097726E0 -0.78999363469138E0 --0.78962903641389E0 -0.78926445627662E0 -0.78889989441138E0 -0.78853535095026E0 -0.78817082602521E0 --0.78780631976742E0 -0.78744183230765E0 -0.78707736377612E0 -0.78671291430459E0 -0.78634848402428E0 --0.78598407306587E0 -0.78561968155927E0 -0.78525530963411E0 -0.78489095742083E0 -0.78452662504974E0 --0.78416231265087E0 -0.78379802035278E0 -0.78343374828391E0 -0.7830694965735E0 -0.7827052653509E0 --0.78234105474511E0 -0.78197686488461E0 -0.78161269589812E0 -0.78124854791364E0 -0.7808844210582E0 --0.78052031545829E0 -0.78015623124224E0 -0.77979216853798E0 -0.77942812747312E0 -0.77906410817416E0 --0.77870011076729E0 -0.77833613537965E0 -0.77797218213851E0 -0.77760825117167E0 -0.77724434260316E0 --0.77688045655736E0 -0.77651659315966E0 -0.77615275253677E0 -0.77578893481489E0 -0.77542514011888E0 --0.7750613685735E0 -0.77469762030346E0 -0.77433389543405E0 -0.77397019409105E0 -0.7736065163977E0 --0.7732428624765E0 -0.77287923244925E0 -0.77251562644156E0 -0.77215204457775E0 -0.7717884869815E0 --0.77142495377577E0 -0.77106144508346E0 -0.77069796102771E0 -0.77033450173174E0 -0.76997106731838E0 --0.76960765790816E0 -0.76924427362119E0 -0.76888091458008E0 -0.76851758090775E0 -0.76815427272685E0 --0.76779099015798E0 -0.76742773332187E0 -0.76706450233974E0 -0.76670129733325E0 -0.76633811842497E0 --0.76597496573321E0 -0.76561183937632E0 -0.76524873947317E0 -0.76488566614598E0 -0.7645226195162E0 --0.76415959970302E0 -0.76379660682525E0 -0.76343364100185E0 -0.76307070235169E0 -0.76270779099326E0 --0.76234490704523E0 -0.76198205062638E0 -0.76161922185519E0 -0.76125642084988E0 -0.76089364772838E0 --0.76053090260856E0 -0.76016818560877E0 -0.75980549684795E0 -0.75944283644223E0 -0.75908020450715E0 --0.75871760115785E0 -0.7583550265129E0 -0.75799248068973E0 -0.757629963805E0 -0.75726747597471E0 --0.75690501731484E0 -0.75654258794156E0 -0.756180187971E0 -0.75581781751884E0 -0.75545547669949E0 --0.75509316562685E0 -0.75473088441663E0 -0.75436863318454E0 -0.75400641204601E0 -0.75364422111506E0 --0.75328206050578E0 -0.75291993033251E0 -0.75255783070973E0 -0.75219576175242E0 -0.75183372357285E0 --0.75147171628355E0 -0.75110973999726E0 -0.75074779482776E0 -0.75038588088782E0 -0.7500239982909E0 --0.74966214715039E0 -0.74930032757959E0 -0.74893853969022E0 -0.74857678359465E0 -0.74821505940443E0 --0.74785336723062E0 -0.74749170718363E0 -0.74713007937577E0 -0.74676848391836E0 -0.74640692092278E0 - -0.746045390501E0 -0.7456838927658E0 -0.74532242782634E0 -0.7449609957914E0 -0.74459959676953E0 --0.74423823087297E0 -0.74387689821315E0 -0.74351559889967E0 -0.74315433304105E0 -0.742793100745E0 --0.7424319021227E0 -0.74207073728463E0 -0.74170960634027E0 -0.74134850939681E0 -0.74098744656145E0 --0.74062641794325E0 -0.74026542365102E0 -0.73990446379321E0 -0.73954353847792E0 -0.73918264781324E0 --0.73882179190689E0 -0.73846097086631E0 -0.7381001847993E0 -0.73773943381212E0 -0.73737871801102E0 --0.73701803750252E0 -0.73665739239399E0 -0.73629678279226E0 -0.73593620880383E0 -0.73557567053499E0 --0.73521516809192E0 -0.73485470158077E0 -0.73449427110809E0 -0.73413387677886E0 -0.73377351869731E0 --0.73341319696686E0 -0.73305291169413E0 -0.7326926629847E0 -0.73233245094362E0 -0.73197227567498E0 --0.73161213728264E0 -0.73125203587148E0 -0.73089197154637E0 -0.73053194441195E0 -0.73017195457069E0 --0.72981200212519E0 -0.72945208717897E0 -0.72909220983571E0 -0.72873237019828E0 -0.72837256837068E0 --0.72801280445696E0 -0.72765307855995E0 -0.72729339078042E0 -0.72693374121869E0 -0.72657412997816E0 --0.72621455716185E0 -0.72585502287238E0 -0.72549552721055E0 -0.72513607027709E0 -0.72477665217383E0 --0.72441727300282E0 -0.72405793286639E0 -0.72369863186409E0 -0.72333937009573E0 -0.72298014766174E0 --0.72262096466351E0 -0.72226182120244E0 -0.72190271737779E0 -0.72154365328872E0 -0.7211846290344E0 --0.72082564471582E0 -0.72046670043374E0 -0.72010779628713E0 -0.71974893237437E0 -0.71939010879359E0 --0.71903132564454E0 -0.71867258302636E0 -0.71831388103775E0 -0.71795521977683E0 -0.71759659934151E0 --0.71723801983024E0 -0.71687948134125E0 -0.71652098397256E0 -0.7161625278215E0 -0.71580411298536E0 --0.71544573956155E0 -0.71508740764739E0 -0.71472911734008E0 -0.71437086873631E0 -0.71401266193239E0 --0.71365449702515E0 -0.7132963741121E0 -0.71293829329168E0 -0.71258025465791E0 -0.71222225830491E0 --0.71186430432724E0 -0.71150639282272E0 -0.71114852388804E0 -0.71079069761879E0 -0.71043291411039E0 - -0.710075173459E0 -0.70971747575746E0 -0.70935982109861E0 -0.70900220957696E0 -0.70864464128882E0 --0.70828711633011E0 -0.7079296347944E0 -0.70757219677562E0 -0.7072148023676E0 -0.70685745166408E0 --0.7065001447588E0 -0.70614288174487E0 -0.70578566271511E0 -0.70542848776196E0 -0.70507135697927E0 --0.7047142704604E0 -0.70435722829816E0 -0.70400023058481E0 -0.70364327741226E0 -0.7032863688735E0 --0.70292950506135E0 -0.7025726860682E0 -0.70221591198495E0 -0.70185918290264E0 -0.70150249891285E0 --0.70114586010705E0 -0.70078926657641E0 -0.70043271841218E0 -0.70007621570512E0 -0.69971975854638E0 --0.6993633470274E0 -0.69900698124059E0 -0.69865066127421E0 -0.69829438721686E0 -0.69793815915768E0 --0.69758197718829E0 -0.69722584139939E0 -0.69686975188075E0 -0.69651370872204E0 -0.69615771201311E0 --0.69580176184244E0 -0.69544585829853E0 -0.69509000147031E0 -0.69473419144689E0 -0.69437842831739E0 --0.69402271216976E0 -0.69366704309175E0 -0.69331142117136E0 -0.69295584649788E0 -0.69260031916081E0 --0.69224483924698E0 -0.6918894068429E0 -0.69153402203501E0 -0.69117868491155E0 -0.69082339555986E0 --0.69046815406724E0 -0.69011296052091E0 -0.68975781500795E0 -0.68940271761518E0 -0.68904766842965E0 --0.68869266753783E0 -0.68833771502487E0 -0.68798281097547E0 -0.68762795547637E0 -0.68727314861425E0 --0.68691839047557E0 -0.68656368114512E0 -0.6862090207077E0 -0.68585440924887E0 -0.68549984685456E0 --0.68514533361166E0 -0.68479086960261E0 -0.68443645491022E0 -0.68408208961835E0 -0.68372777381372E0 --0.68337350758272E0 -0.68301929100858E0 -0.68266512417439E0 -0.68231100716326E0 -0.68195694006016E0 --0.68160292295015E0 -0.68124895591602E0 -0.6808950390394E0 -0.68054117240056E0 -0.68018735608546E0 --0.6798335901786E0 -0.67947987476345E0 -0.67912620992138E0 -0.67877259573372E0 -0.67841903228372E0 --0.67806551965458E0 -0.67771205792937E0 -0.67735864718823E0 -0.6770052875114E0 -0.67665197898082E0 --0.67629872167907E0 -0.67594551568818E0 -0.67559236108964E0 -0.67523925796544E0 -0.67488620639666E0 --0.67453320646285E0 -0.67418025824324E0 -0.67382736181921E0 -0.67347451727181E0 -0.67312172468177E0 --0.67276898412928E0 -0.67241629569443E0 -0.67206365945743E0 -0.67171107549848E0 -0.67135854389823E0 --0.67100606473499E0 -0.67065363808728E0 -0.67030126403411E0 -0.66994894265534E0 -0.66959667403039E0 --0.66924445823811E0 -0.66889229535721E0 -0.66854018546632E0 -0.66818812864424E0 -0.66783612497009E0 --0.66748417452155E0 -0.66713227737572E0 -0.66678043360894E0 -0.66642864330084E0 -0.6660769065302E0 --0.66572522337492E0 -0.66537359391157E0 -0.66502201821641E0 -0.66467049636768E0 -0.66431902844352E0 --0.66396761452172E0 -0.66361625467735E0 -0.66326494898551E0 -0.66291369752322E0 -0.66256250036792E0 --0.66221135759712E0 -0.66186026928568E0 -0.66150923550849E0 -0.66115825634155E0 -0.66080733186232E0 --0.66045646214912E0 -0.66010564727515E0 -0.65975488731378E0 -0.65940418233882E0 -0.6590535324277E0 --0.65870293765707E0 -0.65835239810146E0 -0.65800191383488E0 -0.65765148493133E0 -0.65730111146579E0 --0.65695079351285E0 -0.65660053114675E0 -0.6562503244413E0 -0.65590017347011E0 -0.65555007830733E0 --0.6552000390268E0 -0.65485005570223E0 -0.65450012840745E0 -0.65415025721645E0 -0.65380044220211E0 --0.65345068343696E0 -0.65310098099331E0 -0.65275133494459E0 -0.6524017453638E0 -0.65205221232356E0 --0.65170273589616E0 -0.65135331615332E0 -0.65100395316821E0 -0.65065464701389E0 -0.65030539776294E0 --0.64995620548593E0 -0.64960707025352E0 -0.64925799213757E0 -0.64890897121E0 -0.64856000754247E0 --0.64821110120598E0 -0.64786225227122E0 -0.64751346080945E0 -0.64716472689241E0 -0.64681605059233E0 --0.64646743197853E0 -0.64611887112063E0 -0.64577036808846E0 -0.64542192295294E0 -0.64507353578455E0 --0.64472520665318E0 -0.64437693562851E0 -0.64402872277987E0 -0.64368056817795E0 -0.64333247189343E0 --0.64298443399558E0 -0.64263645455261E0 -0.64228853363249E0 -0.64194067130494E0 -0.64159286763889E0 --0.64124512270326E0 -0.64089743656782E0 -0.64054980930319E0 -0.64020224097628E0 -0.63985473165345E0 --0.63950728140073E0 -0.63915989028861E0 -0.63881255838642E0 -0.63846528576214E0 -0.63811807248283E0 --0.6377709186151E0 -0.63742382422776E0 -0.63707678938946E0 -0.63672981416798E0 -0.63638289862842E0 --0.63603604283558E0 -0.63568924685748E0 -0.63534251076226E0 -0.63499583461779E0 -0.63464921848948E0 --0.63430266244315E0 -0.63395616654521E0 -0.63360973086243E0 -0.63326335546173E0 -0.63291704040827E0 --0.6325707857674E0 -0.6322245916046E0 -0.63187845798589E0 -0.63153238497688E0 -0.63118637264306E0 --0.63084042104974E0 -0.63049453026202E0 -0.63014870034566E0 -0.62980293136681E0 -0.62945722338973E0 --0.62911157647759E0 -0.62876599069251E0 -0.62842046610117E0 -0.62807500276924E0 -0.62772960076147E0 --0.62738426014066E0 -0.62703898096904E0 -0.6266937633121E0 -0.62634860723554E0 -0.62600351280499E0 --0.62565848008081E0 -0.62531350912396E0 -0.62496859999794E0 -0.62462375276741E0 -0.62427896749601E0 --0.62393424424738E0 -0.6235895830857E0 -0.62324498407401E0 -0.62290044727297E0 -0.6225559727432E0 --0.62221156054776E0 -0.62186721074925E0 -0.62152292340985E0 -0.62117869859265E0 -0.62083453636088E0 --0.62049043677625E0 -0.6201463998998E0 -0.61980242579273E0 -0.61945851451656E0 -0.61911466613247E0 --0.61877088070165E0 -0.61842715828536E0 -0.61808349894446E0 -0.61773990274022E0 -0.61739636973369E0 --0.6170528999857E0 -0.61670949355763E0 -0.61636615051159E0 -0.61602287090706E0 -0.61567965480254E0 --0.6153365022555E0 -0.61499341332857E0 -0.61465038808296E0 -0.614307426579E0 -0.61396452887558E0 --0.61362169503141E0 -0.61327892510699E0 -0.61293621916277E0 -0.61259357725897E0 -0.61225099945295E0 --0.61190848580223E0 -0.61156603636602E0 -0.61122365120396E0 -0.61088133037514E0 -0.61053907393847E0 --0.61019688195276E0 -0.60985475447672E0 -0.60951269156904E0 -0.6091706932892E0 -0.60882875969392E0 --0.60848689083992E0 -0.60814508678407E0 -0.60780334758533E0 -0.60746167330152E0 -0.60712006399079E0 --0.60677851971134E0 -0.60643704052196E0 -0.60609562647836E0 -0.60575427763683E0 -0.60541299405412E0 --0.60507177578758E0 -0.6047306228939E0 -0.60438953543021E0 -0.60404851345349E0 -0.60370755702055E0 --0.60336666618769E0 -0.60302584101133E0 -0.6026850815476E0 -0.60234438785249E0 -0.60200375998204E0 --0.60166319799177E0 -0.60132270193685E0 -0.60098227187306E0 -0.60064190785684E0 -0.60030160994433E0 --0.59996137819099E0 -0.59962121265267E0 -0.5992811133849E0 -0.59894108044104E0 -0.59860111387403E0 --0.59826121373955E0 -0.59792138009368E0 -0.59758161299234E0 -0.59724191248925E0 -0.59690227863863E0 --0.59656271149485E0 -0.59622321111246E0 -0.59588377754609E0 -0.59554441084933E0 -0.59520511107584E0 --0.59486587827925E0 -0.59452671251323E0 -0.59418761383112E0 -0.59384858228655E0 -0.59350961793298E0 --0.59317072082332E0 -0.59283189101229E0 -0.59249312855503E0 -0.59215443350421E0 -0.59181580591036E0 --0.59147724582289E0 -0.59113875329726E0 -0.59080032838778E0 -0.59046197114803E0 -0.59012368162928E0 --0.58978545988367E0 -0.58944730596302E0 -0.58910921991893E0 -0.58877120180256E0 -0.58843325166698E0 --0.58809536956476E0 -0.58775755554773E0 -0.5874198096671E0 -0.58708213197381E0 -0.58674452252005E0 --0.58640698135787E0 -0.58606950853887E0 -0.58573210411306E0 -0.58539476813054E0 -0.58505750064242E0 --0.58472030169972E0 -0.58438317135312E0 -0.58404610965365E0 -0.58370911665211E0 -0.58337219239916E0 --0.58303533694538E0 -0.5826985503418E0 -0.58236183263769E0 -0.58202518388243E0 -0.58168860412563E0 --0.58135209341768E0 -0.58101565180852E0 -0.58067927934788E0 -0.58034297608542E0 -0.58000674207066E0 --0.57967057735306E0 -0.57933448198218E0 -0.57899845600699E0 -0.57866249947611E0 -0.57832661243746E0 --0.57799079494113E0 -0.57765504703684E0 -0.57731936877367E0 -0.5769837601989E0 -0.57664822135925E0 --0.57631275230451E0 -0.5759773530846E0 -0.57564202374936E0 -0.57530676434484E0 -0.57497157491768E0 --0.57463645551583E0 -0.57430140618792E0 -0.57396642698179E0 -0.57363151794579E0 -0.57329667912866E0 --0.57296191057809E0 -0.57262721233926E0 -0.57229258445663E0 -0.57195802697904E0 -0.57162353995514E0 --0.57128912343315E0 -0.57095477745936E0 -0.57062050208081E0 -0.57028629734406E0 -0.56995216329538E0 --0.56961809998095E0 -0.56928410744723E0 -0.56895018574036E0 -0.56861633490654E0 -0.56828255499228E0 --0.56794884604386E0 -0.56761520810733E0 -0.56728164122857E0 -0.56694814545339E0 -0.56661472082797E0 --0.56628136739894E0 -0.56594808521117E0 -0.56561487430874E0 -0.56528173473473E0 -0.56494866653654E0 --0.56461566976026E0 -0.56428274445147E0 -0.56394989065489E0 -0.5636171084155E0 -0.56328439777813E0 --0.56295175878768E0 -0.56261919148898E0 -0.56228669592548E0 -0.56195427214034E0 -0.56162192017819E0 --0.56128964008415E0 -0.56095743190306E0 -0.56062529567878E0 -0.56029323145525E0 -0.5599612392765E0 --0.55962931918675E0 -0.55929747123051E0 -0.55896569545074E0 -0.55863399189043E0 -0.55830236059262E0 --0.55797080160072E0 -0.55763931495742E0 -0.55730790070643E0 -0.55697655889175E0 -0.55664528955789E0 --0.55631409274613E0 -0.55598296849836E0 -0.55565191685701E0 -0.55532093786535E0 -0.55499003156595E0 --0.55465919800172E0 -0.5543284372155E0 -0.55399774924998E0 -0.55366713414716E0 -0.55333659194943E0 --0.55300612269847E0 -0.55267572643568E0 -0.55234540320197E0 -0.55201515304018E0 -0.55168497599265E0 --0.55135487210113E0 -0.55102484140655E0 -0.55069488394947E0 -0.55036499977215E0 -0.55003518891665E0 --0.54970545142478E0 -0.54937578733651E0 -0.54904619669207E0 -0.54871667953244E0 -0.54838723589861E0 --0.54805786583107E0 -0.5477285693713E0 -0.54739934656049E0 -0.54707019743944E0 -0.5467411220485E0 --0.54641212042888E0 -0.54608319261949E0 -0.54575433865917E0 -0.54542555858708E0 -0.54509685244456E0 --0.54476822027204E0 -0.54443966210969E0 -0.54411117799761E0 -0.54378276797622E0 -0.54345443208431E0 --0.54312617036106E0 -0.54279798284569E0 -0.54246986957747E0 -0.54214183059535E0 -0.54181386593869E0 --0.54148597564655E0 -0.54115815975794E0 -0.54083041831259E0 -0.54050275135068E0 -0.54017515891013E0 --0.53984764102846E0 -0.53952019774298E0 -0.53919282909334E0 -0.53886553511846E0 -0.53853831585678E0 --0.5382111713463E0 -0.53788410162452E0 -0.53755710673065E0 -0.53723018670398E0 -0.53690334158311E0 --0.53657657140363E0 -0.53624987620097E0 -0.5359232560138E0 -0.53559671088101E0 -0.53527024084116E0 --0.53494384593122E0 -0.53461752618865E0 -0.53429128165072E0 -0.53396511235455E0 -0.53363901833734E0 --0.53331299963584E0 -0.53298705628678E0 -0.53266118832686E0 -0.53233539579301E0 -0.53200967872185E0 --0.53168403715021E0 -0.53135847111476E0 -0.53103298065192E0 -0.530707565799E0 -0.53038222659373E0 - -0.530056963072E0 -0.52973177526844E0 -0.52940666321737E0 -0.52908162695571E0 -0.52875666651935E0 --0.52843178194427E0 -0.528106973267E0 -0.527782240524E0 -0.52745758375092E0 -0.52713300298344E0 --0.52680849825747E0 -0.52648406960734E0 -0.52615971706739E0 -0.52583544067282E0 -0.52551124045927E0 --0.52518711646157E0 -0.52486306871565E0 -0.52453909725747E0 -0.52421520212228E0 -0.52389138334348E0 --0.52356764095461E0 -0.52324397499057E0 -0.52292038548611E0 -0.52259687247568E0 -0.52227343599399E0 --0.52195007607546E0 -0.52162679275463E0 -0.52130358606607E0 -0.52098045604532E0 -0.52065740272448E0 --0.52033442613591E0 -0.52001152631294E0 -0.51968870329133E0 -0.51936595710681E0 -0.51904328779195E0 --0.51872069537923E0 -0.51839817990122E0 -0.51807574139246E0 -0.51775337988707E0 -0.51743109541815E0 --0.51710888801832E0 -0.51678675772004E0 -0.51646470455684E0 -0.51614272856187E0 -0.51582082976812E0 --0.51549900820831E0 -0.51517726391484E0 -0.51485559692095E0 -0.51453400725996E0 -0.51421249496521E0 --0.51389106006741E0 -0.51356970259723E0 -0.51324842258737E0 -0.51292722007136E0 -0.5126060950828E0 --0.51228504765267E0 -0.51196407781241E0 -0.51164318559387E0 -0.5113223710297E0 -0.51100163415283E0 --0.51068097499405E0 -0.51036039358412E0 -0.51003988995389E0 -0.50971946413551E0 -0.50939911616026E0 --0.50907884605997E0 -0.50875865386663E0 -0.50843853961274E0 -0.5081185033282E0 -0.5077985450436E0 --0.50747866478958E0 -0.50715886259687E0 -0.50683913849526E0 -0.50651949251664E0 -0.50619992469275E0 --0.50588043505508E0 -0.50556102363322E0 -0.5052416904571E0 -0.50492243555721E0 -0.50460325896417E0 --0.50428416070848E0 -0.50396514082017E0 -0.5036461993294E0 -0.50332733626615E0 -0.50300855166017E0 --0.50268984554042E0 -0.50237121793798E0 -0.50205266888392E0 -0.50173419840897E0 -0.50141580654099E0 --0.50109749330819E0 -0.50077925874027E0 -0.50046110286715E0 -0.50014302571828E0 -0.49982502732338E0 --0.49950710771204E0 -0.49918926691367E0 -0.49887150495742E0 -0.49855382187302E0 -0.49823621768854E0 --0.49791869243207E0 -0.49760124613181E0 -0.49728387881711E0 -0.49696659051648E0 -0.49664938125906E0 --0.49633225107409E0 -0.49601519999102E0 -0.49569822803748E0 -0.49538133524157E0 -0.49506452163143E0 --0.49474778723531E0 -0.49443113208141E0 -0.49411455619751E0 -0.49379805961098E0 -0.49348164234941E0 --0.49316530444247E0 -0.49284904592031E0 -0.49253286680948E0 -0.49221676713582E0 -0.49190074692493E0 - -0.491584806206E0 -0.49126894500692E0 -0.49095316335541E0 -0.49063746127895E0 -0.49032183880518E0 --0.49000629596126E0 -0.48969083277442E0 -0.4893754492718E0 -0.48906014548025E0 -0.48874492142658E0 --0.48842977713768E0 -0.48811471264044E0 -0.4877997279617E0 -0.48748482312788E0 -0.48716999816497E0 --0.48685525309986E0 -0.48654058796025E0 -0.48622600277461E0 -0.48591149756753E0 -0.48559707236417E0 --0.48528272719004E0 -0.48496846207207E0 -0.48465427703644E0 -0.48434017210948E0 -0.4840261473175E0 --0.48371220268677E0 -0.48339833824309E0 -0.48308455401257E0 -0.48277085002078E0 -0.48245722629289E0 --0.48214368285387E0 -0.48183021972957E0 -0.48151683694526E0 -0.48120353452638E0 -0.4808903124994E0 --0.48057717089118E0 -0.48026410972612E0 -0.4799511290283E0 -0.47963822882173E0 -0.4793254091326E0 --0.47901266998665E0 -0.47870001140868E0 -0.47838743342279E0 -0.47807493605231E0 -0.47776251932371E0 --0.47745018326304E0 -0.47713792789555E0 -0.47682575324414E0 -0.4765136593317E0 -0.47620164618342E0 --0.47588971382448E0 -0.47557786227993E0 -0.4752660915732E0 -0.47495440172765E0 -0.47464279276775E0 --0.47433126471857E0 -0.47401981760581E0 -0.4737084514518E0 -0.47339716627944E0 -0.47308596211205E0 --0.47277483897422E0 -0.47246379689E0 -0.47215283588309E0 -0.4718419559771E0 -0.47153115719558E0 --0.47122043956225E0 -0.47090980310105E0 -0.47059924783501E0 -0.47028877378661E0 -0.46997838097751E0 --0.4696680694325E0 -0.46935783917559E0 -0.46904769023031E0 -0.46873762261911E0 -0.46842763636475E0 --0.46811773149012E0 -0.46780790801804E0 -0.46749816597128E0 -0.46718850537229E0 -0.46687892624316E0 --0.46656942860679E0 -0.46626001248633E0 -0.46595067790531E0 -0.4656414248852E0 -0.46533225344777E0 --0.46502316361515E0 -0.46471415541027E0 -0.4644052288562E0 -0.46409638397431E0 -0.46378762078579E0 --0.46347893931184E0 -0.46317033957563E0 -0.46286182159969E0 -0.46255338540608E0 -0.46224503101664E0 --0.46193675845383E0 -0.46162856773821E0 -0.46132045889052E0 -0.46101243193203E0 -0.46070448688506E0 --0.46039662377161E0 -0.46008884261297E0 -0.45978114343044E0 -0.4594735262453E0 -0.45916599107902E0 --0.45885853795324E0 -0.45855116688877E0 -0.45824387790609E0 -0.45793667102531E0 -0.45762954626822E0 --0.45732250365606E0 -0.45701554320982E0 -0.4567086649501E0 -0.4564018688973E0 -0.45609515507256E0 --0.45578852349703E0 -0.45548197419175E0 -0.45517550717617E0 -0.45486912246967E0 -0.45456282009296E0 --0.45425660006702E0 -0.45395046241277E0 -0.45364440715004E0 -0.45333843429874E0 -0.45303254387906E0 --0.45272673591171E0 -0.45242101041782E0 -0.4521153674163E0 -0.45180980692606E0 -0.45150432896615E0 --0.45119893355753E0 -0.45089362072067E0 -0.45058839047517E0 -0.45028324284035E0 -0.44997817783532E0 --0.44967319548043E0 -0.44936829579596E0 -0.44906347880134E0 -0.44875874451497E0 -0.44845409295493E0 --0.44814952414128E0 -0.44784503809366E0 -0.44754063483151E0 -0.44723631437418E0 -0.44693207674142E0 --0.44662792195181E0 -0.4463238500236E0 -0.44601986097476E0 -0.44571595482506E0 -0.44541213159368E0 --0.44510839129953E0 -0.44480473396114E0 -0.4445011595968E0 -0.44419766822568E0 -0.44389425986701E0 --0.4435909345397E0 -0.44328769226103E0 -0.44298453304818E0 -0.44268145691995E0 -0.44237846389532E0 --0.44207555399308E0 -0.44177272723118E0 -0.44146998362759E0 -0.44116732320056E0 -0.44086474596853E0 --0.44056225195031E0 -0.44025984116299E0 -0.43995751362377E0 -0.43965526935003E0 -0.43935310836024E0 --0.43905103067247E0 -0.43874903630443E0 -0.43844712527369E0 -0.43814529759768E0 -0.43784355329453E0 --0.43754189238253E0 -0.43724031487898E0 -0.43693882080036E0 -0.43663741016274E0 -0.43633608298439E0 --0.43603483928309E0 -0.43573367907635E0 -0.43543260238108E0 -0.43513160921414E0 -0.43483069959292E0 --0.43452987353489E0 -0.43422913105766E0 -0.4339284721772E0 -0.43362789690961E0 -0.43332740527171E0 --0.43302699728079E0 -0.43272667295372E0 -0.43242643230752E0 -0.43212627535931E0 -0.43182620212593E0 --0.43152621262307E0 -0.43122630686634E0 -0.43092648487253E0 -0.43062674665844E0 -0.43032709224063E0 --0.4300275216355E0 -0.42972803485935E0 -0.42942863192858E0 -0.42912931285957E0 -0.42883007766924E0 --0.42853092637266E0 -0.42823185898498E0 -0.42793287552179E0 -0.427633976E0 -0.42733516043628E0 --0.42703642884624E0 -0.4267377812453E0 -0.42643921764885E0 -0.42614073807341E0 -0.42584234253553E0 --0.4255440310506E0 -0.42524580363327E0 -0.42494766029777E0 -0.42464960106065E0 -0.42435162593776E0 --0.42405373494476E0 -0.42375592809683E0 -0.42345820540901E0 -0.42316056689696E0 -0.42286301257639E0 --0.42256554246304E0 -0.42226815657104E0 -0.4219708549146E0 -0.42167363750882E0 -0.4213765043692E0 --0.42107945551093E0 -0.420782490949E0 -0.42048561069833E0 -0.42018881477386E0 -0.41989210319061E0 --0.41959547596386E0 -0.41929893310779E0 -0.41900247463651E0 -0.41870610056408E0 -0.41840981090561E0 --0.41811360567563E0 -0.4178174848889E0 -0.41752144856032E0 -0.41722549670531E0 -0.41692962933707E0 --0.41663384646922E0 -0.4163381481157E0 -0.41604253429127E0 -0.41574700501021E0 -0.41545156028684E0 --0.41515620013541E0 -0.41486092457011E0 -0.41456573360521E0 -0.41427062725536E0 -0.4139756055341E0 --0.4136806684545E0 -0.4133858160291E0 -0.41309104827288E0 -0.41279636520007E0 -0.41250176682463E0 --0.4122072531601E0 -0.41191282422E0 -0.41161848001819E0 -0.4113242205686E0 -0.41103004588508E0 --0.41073595598032E0 -0.41044195086695E0 -0.41014803055856E0 -0.40985419506902E0 -0.4095604444121E0 --0.40926677860083E0 -0.4089731976483E0 -0.40867970156778E0 -0.40838629037294E0 -0.40809296407788E0 --0.40779972269476E0 -0.4075065662357E0 -0.40721349471293E0 -0.40692050814048E0 -0.40662760653192E0 --0.40633478990009E0 -0.40604205825761E0 -0.40574941161694E0 -0.40545684999142E0 -0.40516437339435E0 --0.40487198183841E0 -0.40457967533548E0 -0.40428745389717E0 -0.40399531753667E0 -0.40370326626689E0 --0.40341130010054E0 -0.4031194190504E0 -0.40282762312969E0 -0.4025359123503E0 -0.40224428672374E0 --0.40195274626116E0 -0.40166129097594E0 -0.4013699208808E0 -0.40107863598811E0 -0.40078743630978E0 --0.40049632185759E0 -0.4002052926441E0 -0.39991434868186E0 -0.39962348998326E0 -0.39933271655928E0 --0.39904202842079E0 -0.3987514255801E0 -0.39846090804976E0 -0.39817047584224E0 -0.39788012896889E0 --0.39758986744123E0 -0.39729969127104E0 -0.39700960047042E0 -0.39671959505175E0 -0.39642967502589E0 --0.39613984040376E0 -0.39585009119645E0 -0.39556042741612E0 -0.39527084907458E0 -0.39498135618327E0 --0.3946919487535E0 -0.39440262679643E0 -0.39411339032389E0 -0.39382423934775E0 -0.39353517387919E0 --0.39324619392867E0 -0.39295729950635E0 -0.39266849062409E0 -0.39237976729337E0 -0.39209112952551E0 --0.39180257733135E0 -0.39151411072154E0 -0.39122572970748E0 -0.39093743430074E0 -0.39064922451312E0 --0.39036110035423E0 -0.39007306183393E0 -0.38978510896296E0 -0.38949724175281E0 -0.38920946021461E0 --0.38892176435911E0 -0.38863415419724E0 -0.38834662973975E0 -0.38805919099675E0 -0.3877718379783E0 --0.38748457069504E0 -0.38719738915762E0 -0.38691029337646E0 -0.38662328336229E0 -0.38633635912565E0 --0.38604952067708E0 -0.38576276802713E0 -0.38547610118681E0 -0.38518952016551E0 -0.38490302497271E0 --0.38461661561821E0 -0.38433029211302E0 -0.3840440544679E0 -0.38375790269276E0 -0.38347183679734E0 --0.38318585679127E0 -0.38289996268541E0 -0.38261415449062E0 -0.38232843221666E0 -0.38204279587254E0 --0.38175724546687E0 -0.38147178101048E0 -0.38118640251365E0 -0.38090110998641E0 -0.38061590343831E0 --0.38033078287868E0 -0.38004574831771E0 -0.3797607997657E0 -0.37947593723309E0 -0.37919116072813E0 --0.37890647025918E0 -0.37862186583593E0 -0.3783373474688E0 -0.37805291516816E0 -0.3777685689429E0 --0.37748430880207E0 -0.37720013475499E0 -0.37691604681175E0 -0.37663204498273E0 -0.37634812927646E0 --0.3760642997013E0 -0.37578055626561E0 -0.37549689897963E0 -0.37521332785295E0 -0.37492984289494E0 --0.37464644411482E0 -0.37436313152228E0 -0.37407990512562E0 -0.37379676493329E0 -0.37351371095404E0 --0.37323074319745E0 -0.37294786167283E0 -0.37266506638904E0 -0.37238235735486E0 -0.37209973457897E0 --0.37181719807084E0 -0.37153474784008E0 -0.37125238389517E0 -0.3709701062441E0 -0.37068791489442E0 --0.37040580985582E0 -0.37012379113734E0 -0.36984185874781E0 -0.36956001269569E0 -0.3692782529893E0 --0.36899657963757E0 -0.36871499264952E0 -0.36843349203408E0 -0.36815207779894E0 -0.3678707499518E0 --0.36758950850123E0 -0.36730835345604E0 -0.36702728482493E0 -0.36674630261613E0 -0.36646540683787E0 --0.36618459749852E0 -0.36590387460674E0 -0.36562323817141E0 -0.36534268820021E0 -0.36506222470079E0 --0.36478184768084E0 -0.36450155714898E0 -0.36422135311336E0 -0.36394123558223E0 -0.36366120456385E0 --0.36338126006638E0 -0.36310140209801E0 -0.36282163066709E0 -0.36254194578155E0 -0.36226234744871E0 --0.36198283567563E0 -0.36170341047069E0 -0.361424071842E0 -0.36114481979751E0 -0.36086565434544E0 --0.36058657549426E0 -0.36030758325142E0 -0.36002867762399E0 -0.35974985861879E0 -0.35947112624429E0 --0.35919248050842E0 -0.35891392141897E0 -0.3586354489835E0 -0.35835706320941E0 -0.35807876410466E0 --0.35780055167725E0 -0.35752242593503E0 -0.35724438688462E0 -0.35696643453251E0 -0.35668856888647E0 --0.35641078995458E0 -0.3561330977448E0 -0.35585549226414E0 -0.35557797351962E0 -0.35530054151868E0 --0.35502319626924E0 -0.35474593777961E0 -0.3544687660561E0 -0.35419168110509E0 -0.3539146829332E0 --0.35363777154854E0 -0.35336094695883E0 -0.35308420917119E0 -0.35280755819258E0 -0.35253099402987E0 --0.35225451669063E0 -0.35197812618239E0 -0.35170182251219E0 -0.35142560568652E0 -0.35114947571177E0 --0.35087343259526E0 -0.35059747634403E0 -0.35032160696507E0 -0.35004582446549E0 -0.34977012885216E0 --0.34949452013225E0 -0.349218998313E0 -0.3489435634019E0 -0.34866821540481E0 -0.3483929543278E0 --0.34811778017751E0 -0.34784269296119E0 -0.34756769268555E0 -0.34729277935774E0 -0.347017952985E0 --0.34674321357431E0 -0.34646856113154E0 -0.34619399566254E0 -0.34591951717417E0 -0.34564512567334E0 --0.34537082116677E0 -0.34509660366109E0 -0.34482247316276E0 -0.34454842967849E0 -0.34427447321516E0 --0.34400060378017E0 -0.34372682137893E0 -0.34345312601698E0 -0.34317951770019E0 -0.34290599643602E0 --0.34263256223153E0 -0.34235921509295E0 -0.34208595502637E0 -0.34181278203779E0 -0.34153969613415E0 --0.34126669732235E0 -0.34099378560855E0 -0.34072096099831E0 -0.34044822349687E0 -0.34017557311121E0 --0.33990300984784E0 -0.33963053371317E0 -0.33935814471319E0 -0.33908584285373E0 -0.33881362814136E0 --0.33854150058276E0 -0.33826946018473E0 -0.33799750695229E0 -0.33772564089062E0 -0.33745386200579E0 --0.33718217030447E0 -0.33691056579322E0 -0.33663904847772E0 -0.33636761836363E0 -0.33609627545687E0 --0.33582501976411E0 -0.33555385129229E0 -0.33528277004661E0 -0.33501177603207E0 -0.33474086925362E0 --0.33447004971816E0 -0.33419931743189E0 -0.33392867240089E0 -0.33365811463119E0 -0.33338764412939E0 --0.33311726090029E0 -0.33284696494892E0 -0.33257675628067E0 -0.33230663490206E0 -0.33203660081922E0 --0.33176665403784E0 -0.33149679456356E0 -0.33122702240193E0 -0.33095733755911E0 -0.33068774004143E0 --0.33041822985425E0 -0.33014880700242E0 -0.32987947149028E0 -0.32961022332458E0 -0.32934106251142E0 --0.32907198905662E0 -0.32880300296546E0 -0.32853410424313E0 -0.32826529289552E0 -0.32799656892854E0 --0.32772793234807E0 -0.32745938315896E0 -0.32719092136616E0 -0.32692254697515E0 -0.32665425999165E0 --0.32638606042108E0 -0.32611794826908E0 -0.32584992354111E0 -0.3255819862427E0 -0.32531413637956E0 --0.32504637395779E0 -0.32477869898208E0 -0.32451111145703E0 -0.32424361138729E0 -0.32397619877904E0 --0.32370887363797E0 -0.32344163596944E0 -0.32317448577866E0 -0.32290742307068E0 -0.32264044785125E0 --0.3223735601261E0 -0.32210675990054E0 -0.3218400471791E0 -0.32157342196608E0 -0.3213068842672E0 --0.32104043408796E0 -0.3207740714337E0 -0.32050779630994E0 -0.32024160872256E0 -0.31997550867637E0 --0.31970949617575E0 -0.3194435712247E0 -0.3191777338294E0 -0.31891198399536E0 -0.31864632172787E0 --0.31838074703185E0 -0.31811525991215E0 -0.31784986037417E0 -0.31758454842336E0 -0.31731932406504E0 --0.31705418730364E0 -0.31678913814359E0 -0.31652417659003E0 -0.31625930264823E0 -0.31599451632322E0 --0.31572981762023E0 -0.31546520654434E0 -0.31520068310067E0 -0.31493624729433E0 -0.31467189913065E0 --0.31440763861417E0 -0.31414346574942E0 -0.31387938054097E0 -0.31361538299413E0 -0.31335147311378E0 --0.31308765090502E0 -0.31282391637297E0 -0.31256026952263E0 -0.31229671035908E0 -0.31203323888759E0 --0.31176985511294E0 -0.31150655903931E0 -0.31124335067052E0 -0.31098023001215E0 -0.31071719706945E0 --0.31045425184753E0 -0.3101913943511E0 -0.30992862458479E0 -0.30966594255366E0 -0.30940334826287E0 --0.30914084171776E0 -0.30887842292237E0 -0.3086160918809E0 -0.308353848598E0 -0.30809169307882E0 --0.30782962532801E0 -0.3075676453508E0 -0.3073057531524E0 -0.30704394873788E0 -0.30678223211127E0 --0.30652060327662E0 -0.30625906223884E0 -0.30599760900292E0 -0.30573624357361E0 -0.30547496595584E0 --0.30521377615445E0 -0.30495267417425E0 -0.30469166002007E0 -0.30443073369703E0 -0.30416989520925E0 --0.30390914456083E0 -0.30364848175606E0 -0.30338790680037E0 -0.30312741969892E0 -0.30286702045627E0 --0.30260670907682E0 -0.30234648556485E0 -0.30208634992565E0 -0.30182630216448E0 -0.30156634228589E0 --0.30130647029381E0 -0.30104668619185E0 -0.30078698998545E0 -0.30052738167962E0 -0.30026786127923E0 --0.30000842878877E0 -0.2997490842127E0 -0.29948982755585E0 -0.29923065882305E0 -0.29897157801923E0 --0.29871258514849E0 -0.29845368021499E0 -0.29819486322329E0 -0.29793613417824E0 -0.29767749308437E0 --0.29741893994654E0 -0.29716047476949E0 -0.29690209755794E0 -0.29664380831661E0 -0.29638560705046E0 --0.29612749376369E0 -0.29586946846037E0 -0.29561153114444E0 -0.29535368182118E0 -0.2950959204953E0 --0.29483824717165E0 -0.29458066185508E0 -0.29432316455104E0 -0.29406575526309E0 -0.29380843399503E0 --0.29355120075099E0 -0.29329405553631E0 -0.29303699835583E0 -0.29278002921422E0 -0.29252314811608E0 --0.29226635506602E0 -0.29200965006877E0 -0.29175303312915E0 -0.29149650425159E0 -0.29124006344035E0 --0.29098371069943E0 -0.29072744603379E0 -0.29047126944806E0 -0.29021518094685E0 -0.28995918053488E0 --0.28970326821671E0 -0.28944744399713E0 -0.28919170788099E0 -0.28893605987317E0 -0.2886804999776E0 --0.28842502819816E0 -0.28816964453949E0 -0.28791434900663E0 -0.28765914160441E0 -0.28740402233738E0 --0.28714899121005E0 -0.28689404822705E0 -0.28663919339333E0 -0.28638442671412E0 -0.28612974819344E0 --0.28587515783522E0 -0.28562065564338E0 -0.28536624162332E0 -0.28511191578E0 -0.28485767811805E0 --0.28460352864195E0 -0.28434946735594E0 -0.28409549426522E0 -0.2838416093749E0 -0.28358781268969E0 --0.28333410421356E0 -0.28308048395035E0 -0.28282695190502E0 -0.28257350808235E0 -0.28232015248695E0 - -0.282066885124E0 -0.28181370599888E0 -0.28156061511594E0 -0.28130761247914E0 -0.28105469809212E0 --0.2808018719604E0 -0.28054913408892E0 -0.28029648448248E0 -0.28004392314566E0 -0.27979145008288E0 --0.27953906529921E0 -0.27928676879975E0 -0.27903456058953E0 -0.27878244067249E0 -0.27853040905247E0 --0.27827846573439E0 -0.27802661072348E0 -0.27777484402484E0 -0.27752316564298E0 -0.27727157558241E0 --0.27702007384792E0 -0.2767686604447E0 -0.27651733537816E0 -0.27626609865247E0 -0.27601495027176E0 --0.2757638902403E0 -0.27551291856342E0 -0.275262035246E0 -0.27501124029296E0 -0.27476053370916E0 --0.27450991549934E0 -0.27425938566867E0 -0.27400894422239E0 -0.27375859116529E0 -0.2735083265016E0 --0.2732581502353E0 -0.27300806237173E0 -0.27275806291595E0 -0.27250815187293E0 -0.27225832924758E0 --0.27200859504457E0 -0.27175894926907E0 -0.27150939192638E0 -0.27125992302207E0 -0.27101054256012E0 --0.27076125054469E0 -0.27051204698056E0 -0.27026293187322E0 -0.27001390522766E0 -0.26976496704921E0 --0.26951611734335E0 -0.26926735611539E0 -0.26901868336931E0 -0.26877009910894E0 -0.26852160333969E0 --0.26827319606721E0 -0.26802487729702E0 -0.26777664703391E0 -0.26752850528277E0 -0.2672804520487E0 --0.26703248733704E0 -0.26678461115328E0 -0.266536823502E0 -0.26628912438782E0 -0.26604151381545E0 --0.26579399179051E0 -0.26554655831825E0 -0.26529921340386E0 -0.26505195705245E0 -0.26480478926898E0 --0.26455771005912E0 -0.26431071942854E0 -0.26406381738235E0 -0.26381700392512E0 -0.26357027906113E0 --0.2633236427962E0 -0.26307709513579E0 -0.26283063608527E0 -0.26258426564994E0 -0.26233798383492E0 --0.26209179064576E0 -0.26184568608809E0 -0.2615996701677E0 -0.26135374288908E0 -0.26110790425671E0 --0.26086215427594E0 -0.26061649295281E0 -0.26037092029326E0 -0.26012543630234E0 -0.25988004098515E0 --0.25963473434694E0 -0.25938951639374E0 -0.25914438713163E0 -0.25889934656565E0 -0.25865439470073E0 --0.2584095315417E0 -0.25816475709455E0 -0.25792007136467E0 -0.25767547435778E0 -0.25743096607993E0 --0.25718654653746E0 -0.25694221573519E0 -0.25669797367811E0 -0.25645382037148E0 -0.25620975582128E0 --0.25596578003303E0 -0.25572189301261E0 -0.25547809476591E0 -0.25523438529875E0 -0.2549907646169E0 --0.25474723272633E0 -0.25450378963257E0 -0.25426043534084E0 -0.25401716985588E0 -0.25377399318434E0 --0.25353090533242E0 -0.25328790630613E0 -0.25304499611104E0 -0.25280217475263E0 -0.25255944223707E0 --0.25231679857056E0 -0.25207424375931E0 -0.25183177780862E0 -0.25158940072377E0 -0.25134711251073E0 --0.25110491317584E0 -0.25086280272515E0 -0.25062078116479E0 -0.25037884850079E0 -0.25013700473924E0 --0.24989524988636E0 -0.24965358394854E0 -0.24941200693145E0 -0.24917051884067E0 -0.24892911968166E0 --0.24868780946133E0 -0.24844658818619E0 -0.24820545586242E0 -0.24796441249606E0 -0.24772345809286E0 --0.24748259265967E0 -0.24724181620322E0 -0.24700112872986E0 -0.24676053024513E0 -0.24652002075427E0 --0.24627960026418E0 -0.24603926878162E0 -0.24579902631315E0 -0.2455588728654E0 -0.24531880844526E0 --0.24507883305891E0 -0.24483894671216E0 -0.24459914941041E0 -0.24435944116101E0 -0.24411982197075E0 --0.24388029184626E0 -0.24364085079397E0 -0.24340149882009E0 -0.24316223593166E0 -0.24292306213577E0 --0.24268397743946E0 -0.24244498184858E0 -0.24220607536893E0 -0.24196725800737E0 -0.24172852977111E0 --0.24148989066714E0 -0.24125134070221E0 -0.24101287988303E0 -0.24077450821645E0 -0.24053622570947E0 --0.2402980323692E0 -0.24005992820222E0 -0.23982191321508E0 -0.23958398741432E0 -0.23934615080731E0 --0.23910840340104E0 -0.23887074520257E0 -0.23863317621894E0 -0.23839569645689E0 -0.23815830592406E0 --0.23792100462806E0 -0.23768379257601E0 -0.23744666977431E0 -0.23720963622908E0 -0.23697269194815E0 --0.23673583693916E0 -0.23649907120959E0 -0.2362623947665E0 -0.23602580761686E0 -0.23578930976819E0 --0.23555290122813E0 -0.23531658200444E0 -0.23508035210397E0 -0.23484421153366E0 -0.2346081603008E0 --0.23437219841323E0 -0.23413632587831E0 -0.23390054270393E0 -0.23366484889797E0 -0.23342924446826E0 --0.23319372942173E0 -0.23295830376522E0 -0.23272296750661E0 -0.23248772065399E0 -0.23225256321527E0 --0.23201749519813E0 -0.23178251661016E0 -0.23154762745922E0 -0.23131282775343E0 -0.23107811750115E0 --0.23084349670962E0 -0.2306089653861E0 -0.23037452353798E0 -0.23014017117392E0 -0.22990590830211E0 --0.22967173493065E0 -0.22943765106762E0 -0.22920365672108E0 -0.22896975189924E0 -0.22873593661039E0 --0.22850221086258E0 -0.22826857466362E0 -0.22803502802103E0 -0.22780157094352E0 -0.22756820343958E0 --0.22733492551759E0 -0.22710173718589E0 -0.22686863845263E0 -0.22663562932643E0 -0.22640270981601E0 --0.22616987993023E0 -0.22593713967685E0 -0.22570448906361E0 -0.22547192809897E0 -0.22523945679207E0 --0.22500707515169E0 -0.22477478318653E0 -0.22454258090526E0 -0.22431046831661E0 -0.22407844542938E0 --0.22384651225247E0 -0.22361466879446E0 -0.22338291506387E0 -0.2231512510691E0 -0.22291967681942E0 --0.22268819232363E0 -0.22245679759083E0 -0.22222549263037E0 -0.22199427745181E0 -0.22176315206359E0 --0.22153211647419E0 -0.22130117069235E0 -0.22107031472782E0 -0.22083954858998E0 -0.22060887228804E0 --0.22037828583114E0 -0.22014778922826E0 -0.21991738248931E0 -0.21968706562421E0 -0.21945683864214E0 --0.21922670155173E0 -0.21899665436116E0 -0.21876669708086E0 -0.2185368297207E0 -0.21830705229042E0 --0.21807736479951E0 -0.21784776725732E0 -0.21761825967382E0 -0.21738884205907E0 -0.2171595144232E0 --0.21693027677519E0 -0.21670112912399E0 -0.21647207147944E0 -0.21624310385198E0 -0.2160142262518E0 --0.21578543868871E0 -0.21555674117246E0 -0.21532813371298E0 -0.2150996163208E0 -0.21487118900652E0 --0.21464285177983E0 -0.21441460465033E0 -0.21418644762752E0 -0.21395838072242E0 -0.2137304039456E0 --0.21350251730736E0 -0.21327472081785E0 -0.21304701448678E0 -0.21281939832542E0 -0.21259187234484E0 --0.21236443655571E0 -0.21213709096759E0 -0.21190983558974E0 -0.21168267043347E0 -0.21145559550998E0 --0.21122861083027E0 -0.21100171640511E0 -0.21077491224558E0 -0.21054819836224E0 -0.21032157476541E0 --0.21009504146482E0 -0.20986859847242E0 -0.20964224579953E0 -0.20941598345732E0 -0.20918981145668E0 --0.20896372980835E0 -0.20873773852375E0 -0.20851183761433E0 -0.20828602709149E0 -0.20806030696598E0 --0.20783467724845E0 -0.20760913795029E0 -0.20738368908323E0 -0.20715833065875E0 -0.20693306268837E0 --0.20670788518352E0 -0.20648279815577E0 -0.20625780161702E0 -0.20603289557936E0 -0.20580808005395E0 --0.20558335505184E0 -0.20535872058413E0 -0.20513417666338E0 -0.20490972330163E0 -0.20468536051085E0 --0.20446108830301E0 -0.20423690668996E0 -0.20401281568392E0 -0.20378881529705E0 -0.2035649055414E0 --0.20334108642876E0 -0.2031173579706E0 -0.20289372017958E0 -0.20267017306822E0 -0.20244671664893E0 --0.20222335093407E0 -0.2020000759358E0 -0.20177689166678E0 -0.20155379813982E0 -0.20133079536793E0 --0.20110788336299E0 -0.20088506213689E0 -0.20066233170213E0 -0.20043969207204E0 -0.20021714325949E0 --0.19999468527754E0 -0.19977231813931E0 -0.1995500418579E0 -0.19932785644558E0 -0.19910576191439E0 --0.19888375827767E0 -0.19866184554914E0 -0.19844002374241E0 -0.19821829287034E0 -0.19799665294584E0 --0.1977751039822E0 -0.19755364599317E0 -0.19733227899266E0 -0.19711100299355E0 -0.19688981800868E0 --0.19666872405103E0 -0.19644772113493E0 -0.19622680927428E0 -0.19600598848272E0 -0.19578525877382E0 --0.19556462016091E0 -0.19534407265844E0 -0.19512361628069E0 -0.19490325104154E0 -0.19468297695442E0 --0.19446279403242E0 -0.19424270229022E0 -0.19402270174219E0 -0.19380279240263E0 -0.19358297428566E0 --0.19336324740524E0 -0.19314361177588E0 -0.19292406741217E0 -0.19270461432873E0 -0.19248525253971E0 --0.19226598205922E0 -0.19204680290176E0 -0.19182771508222E0 -0.19160871861517E0 -0.19138981351555E0 --0.19117099979817E0 -0.19095227747785E0 -0.19073364656976E0 -0.19051510708917E0 -0.19029665905072E0 --0.19007830246888E0 -0.1898600373579E0 -0.18964186373372E0 -0.18942378161159E0 -0.18920579100699E0 --0.18898789193555E0 -0.18877008441325E0 -0.18855236845485E0 -0.18833474407518E0 -0.18811721128929E0 --0.18789977011333E0 -0.18768242056289E0 -0.18746516265386E0 -0.18724799640215E0 -0.18703092182358E0 --0.18681393893412E0 -0.18659704774975E0 -0.18638024828634E0 -0.18616354055965E0 -0.18594692458511E0 --0.1857304003793E0 -0.1855139679585E0 -0.18529762733894E0 -0.18508137853706E0 -0.18486522156899E0 --0.18464915645144E0 -0.18443318320122E0 -0.18421730183521E0 -0.18400151236932E0 -0.18378581481938E0 --0.18357020920212E0 -0.18335469553488E0 -0.18313927383486E0 -0.18292394411869E0 -0.18270870640291E0 --0.18249356070434E0 -0.1822785070408E0 -0.18206354543017E0 -0.18184867588904E0 -0.18163389843385E0 --0.18141921308088E0 -0.18120461984857E0 -0.18099011875472E0 -0.18077570981688E0 -0.18056139305243E0 --0.18034716847851E0 -0.18013303611325E0 -0.17991899597462E0 -0.17970504808044E0 -0.17949119244807E0 --0.17927742909462E0 -0.17906375803844E0 -0.17885017929784E0 -0.17863669289094E0 -0.17842329883617E0 --0.17820999715206E0 -0.17799678785669E0 -0.1777836709679E0 -0.17757064650307E0 -0.17735771448144E0 --0.1771448749217E0 -0.17693212784251E0 -0.17671947326262E0 -0.1765069112006E0 -0.17629444167541E0 --0.17608206470601E0 -0.1758697803114E0 -0.17565758851009E0 -0.17544548932031E0 -0.17523348276133E0 --0.17502156885288E0 -0.17480974761458E0 -0.17459801906548E0 -0.17438638322461E0 -0.17417484011129E0 --0.17396338974544E0 -0.17375203214692E0 -0.17354076733501E0 -0.17332959532891E0 -0.17311851614774E0 --0.17290752981211E0 -0.1726966363422E0 -0.17248583575793E0 -0.17227512807907E0 -0.17206451332499E0 --0.1718539915166E0 -0.17164356267448E0 -0.17143322681896E0 -0.17122298396993E0 -0.17101283414704E0 --0.17080277737118E0 -0.17059281366307E0 -0.17038294304333E0 -0.17017316553279E0 -0.16996348115207E0 --0.16975388992213E0 -0.16954439186406E0 -0.1693349869991E0 -0.16912567534776E0 -0.1689164569304E0 --0.16870733176802E0 -0.16849829988261E0 -0.16828936129581E0 -0.168080516029E0 -0.16787176410361E0 --0.16766310554109E0 -0.16745454036285E0 -0.16724606858998E0 -0.16703769024441E0 -0.16682940534833E0 --0.16662121392385E0 -0.16641311599269E0 -0.16620511157639E0 -0.16599720069703E0 -0.16578938337752E0 --0.16558165964092E0 -0.1653740295088E0 -0.16516649300274E0 -0.16495905014445E0 -0.16475170095734E0 --0.16454444546414E0 -0.16433728368766E0 -0.16413021565072E0 -0.16392324137598E0 -0.16371636088668E0 --0.16350957420603E0 -0.16330288135699E0 -0.16309628236224E0 -0.16288977724398E0 -0.16268336602617E0 --0.16247704873249E0 -0.16227082538649E0 -0.16206469601161E0 -0.16185866063104E0 -0.1616527192687E0 --0.16144687194867E0 -0.1612411186951E0 -0.16103545953131E0 -0.16082989448058E0 -0.16062442356681E0 --0.16041904681464E0 -0.16021376424826E0 -0.16000857589212E0 -0.1598034817706E0 -0.15959848190808E0 --0.15939357632925E0 -0.1591887650587E0 -0.15898404812102E0 -0.15877942554077E0 -0.1585748973423E0 --0.15837046355088E0 -0.1581661241912E0 -0.15796187928844E0 -0.15775772886826E0 -0.15755367295644E0 --0.15734971157769E0 -0.15714584475672E0 -0.15694207251847E0 -0.15673839488917E0 -0.15653481189441E0 --0.15633132356012E0 -0.15612792991225E0 -0.15592463097669E0 -0.15572142677945E0 -0.15551831734642E0 --0.15531530270364E0 -0.1551123828773E0 -0.15490955789325E0 -0.1547068277781E0 -0.15450419255816E0 --0.15430165225983E0 -0.15409920691004E0 -0.1538968565353E0 -0.15369460116267E0 -0.15349244081929E0 --0.15329037553239E0 -0.15308840532842E0 -0.15288653023371E0 -0.15268475027544E0 -0.15248306548152E0 --0.15228147587954E0 -0.15207998149691E0 -0.15187858236096E0 -0.15167727849916E0 -0.15147606993968E0 --0.15127495671062E0 -0.15107393883959E0 -0.15087301635409E0 -0.15067218928143E0 -0.1504714576506E0 --0.15027082149004E0 -0.15007028082811E0 -0.14986983569314E0 -0.1496694861131E0 -0.1494692321171E0 --0.14926907373402E0 -0.14906901099262E0 -0.14886904392142E0 -0.14866917254852E0 -0.14846939690342E0 --0.14826971701566E0 -0.14807013291465E0 -0.14787064462963E0 -0.14767125218987E0 -0.14747195562479E0 --0.14727275496393E0 -0.14707365023654E0 -0.1468746414726E0 -0.14667572870172E0 -0.14647691195372E0 --0.1462781912591E0 -0.14607956664796E0 -0.14588103815074E0 -0.14568260579791E0 -0.14548426961995E0 --0.14528602964698E0 -0.1450878859089E0 -0.1448898384364E0 -0.14469188726059E0 -0.14449403241215E0 --0.1442962739223E0 -0.14409861182191E0 -0.14390104614212E0 -0.14370357691467E0 -0.14350620417134E0 --0.14330892794314E0 -0.14311174826101E0 -0.14291466515583E0 -0.14271767866002E0 -0.14252078880536E0 --0.14232399562387E0 -0.14212729914771E0 -0.14193069940871E0 -0.14173419643951E0 -0.14153779027258E0 --0.14134148094028E0 -0.14114526847475E0 -0.14094915290781E0 -0.1407531342725E0 -0.14055721260177E0 --0.14036138792842E0 -0.14016566028554E0 -0.1399700297058E0 -0.13977449622262E0 -0.13957905986978E0 --0.13938372068129E0 -0.13918847868978E0 -0.13899333392796E0 -0.13879828642915E0 -0.13860333622802E0 --0.13840848335857E0 -0.138213727855E0 -0.13801906975157E0 -0.13782450908255E0 -0.13763004588164E0 --0.13743568018215E0 -0.13724141201882E0 -0.13704724142693E0 -0.13685316844148E0 -0.13665919309713E0 --0.13646531542848E0 -0.13627153547045E0 -0.13607785325849E0 -0.13588426882793E0 -0.13569078221386E0 --0.13549739345124E0 -0.1353041025751E0 -0.13511090962169E0 -0.13491781462686E0 -0.13472481762641E0 --0.13453191865616E0 -0.13433911775146E0 -0.13414641494932E0 -0.13395381028636E0 -0.13376130379894E0 --0.13356889552301E0 -0.13337658549416E0 -0.13318437374959E0 -0.13299226032623E0 -0.13280024526093E0 --0.1326083285909E0 -0.13241651035304E0 -0.13222479058473E0 -0.13203316932347E0 -0.13184164660674E0 --0.13165022247179E0 -0.13145889695558E0 -0.13126767009578E0 -0.13107654193091E0 -0.13088551249921E0 --0.13069458183863E0 -0.13050374998698E0 -0.13031301698222E0 -0.13012238286341E0 -0.12993184766937E0 --0.12974141143856E0 -0.12955107420931E0 -0.1293608360196E0 -0.12917069690924E0 -0.12898065691732E0 --0.12879071608313E0 -0.12860087444609E0 -0.12841113204553E0 -0.12822148892092E0 -0.12803194511149E0 --0.12784250065663E0 -0.12765315559686E0 -0.12746390997233E0 -0.12727476382307E0 -0.12708571718906E0 --0.12689677010994E0 -0.126707922627E0 -0.12651917478121E0 -0.12633052661315E0 -0.12614197816311E0 --0.1259535294708E0 -0.12576518057814E0 -0.12557693152663E0 -0.12538878235764E0 -0.12520073311237E0 --0.12501278383161E0 -0.12482493455739E0 -0.12463718533192E0 -0.12444953619738E0 -0.12426198719517E0 --0.12407453836651E0 -0.12388718975354E0 -0.12369994139916E0 -0.1235127933458E0 -0.1233257456362E0 --0.12313879831298E0 -0.1229519514188E0 -0.12276520499686E0 -0.12257855909016E0 -0.12239201374171E0 --0.12220556899443E0 -0.12201922489091E0 -0.12183298147558E0 -0.12164683879225E0 -0.12146079688473E0 --0.12127485579675E0 -0.12108901557151E0 -0.12090327625397E0 -0.12071763788874E0 -0.12053210052032E0 --0.12034666419276E0 -0.12016132894988E0 -0.11997609483668E0 -0.11979096189814E0 -0.11960593017891E0 --0.11942099972486E0 -0.11923617058165E0 -0.11905144279456E0 -0.1188668164086E0 -0.1186822914683E0 --0.11849786802019E0 -0.11831354611018E0 -0.11812932578427E0 -0.11794520708875E0 -0.11776119006954E0 --0.11757727477335E0 -0.11739346124693E0 -0.11720974953697E0 -0.11702613969001E0 -0.11684263175228E0 --0.11665922577086E0 -0.11647592179339E0 -0.1162927198672E0 -0.11610962003964E0 -0.11592662235782E0 --0.1157437268692E0 -0.11556093362238E0 -0.11537824266575E0 -0.11519565404707E0 -0.11501316781396E0 --0.11483078401394E0 -0.11464850269636E0 -0.11446632390986E0 -0.11428424770324E0 -0.11410227412532E0 --0.11392040322446E0 -0.11373863505049E0 -0.11355696965287E0 -0.11337540708098E0 -0.11319394738406E0 --0.11301259061086E0 -0.11283133681168E0 -0.11265018603676E0 -0.11246913833617E0 -0.11228819376017E0 --0.11210735235855E0 -0.11192661418206E0 -0.11174597928188E0 -0.11156544770934E0 -0.11138501951453E0 --0.1112046947475E0 -0.111024473459E0 -0.11084435570138E0 -0.11066434152628E0 -0.11048443098546E0 --0.1103046241307E0 -0.11012492101378E0 -0.10994532168647E0 -0.10976582620007E0 -0.10958643460701E0 --0.10940714696021E0 -0.1092279633121E0 -0.10904888371564E0 -0.10886990822347E0 -0.10869103688851E0 --0.10851226976426E0 -0.10833360690402E0 -0.10815504836103E0 -0.10797659418841E0 -0.10779824443927E0 --0.10761999916815E0 -0.10744185842893E0 -0.10726382227587E0 -0.10708589076336E0 -0.1069080639454E0 --0.10673034187709E0 -0.10655272461324E0 -0.10637521220861E0 -0.10619780471793E0 -0.10602050219539E0 --0.10584330469686E0 -0.10566621227794E0 -0.10548922499413E0 -0.10531234290141E0 -0.10513556605539E0 --0.10495889451226E0 -0.10478232832839E0 -0.10460586756E0 -0.1044295122635E0 -0.10425326249502E0 --0.10407711831117E0 -0.1039010797693E0 -0.1037251469262E0 -0.10354931983938E0 -0.10337359856616E0 --0.10319798316387E0 -0.10302247369062E0 -0.10284707020427E0 -0.10267177276266E0 -0.10249658142362E0 --0.10232149624447E0 -0.10214651728455E0 -0.10197164460238E0 -0.1017968782568E0 -0.10162221830699E0 --0.10144766481205E0 -0.10127321783099E0 -0.10109887742263E0 -0.1009246436459E0 -0.10075051656126E0 --0.10057649622849E0 -0.10040258270767E0 -0.10022877605895E0 -0.10005507634214E0 -0.99881483618255E-1 --0.9970799794796E-1 -0.99534619391916E-1 -0.99361348010803E-1 -0.99188183864639E-1 -0.99015127015451E-1 --0.98842177524877E-1 -0.98669335454488E-1 -0.98496600866026E-1 -0.98323973820788E-1 -0.98151454381128E-1 --0.97979042609592E-1 -0.97806738568599E-1 -0.97634542320406E-1 -0.97462453926958E-1 -0.97290473451003E-1 --0.97118600956112E-1 -0.96946836505297E-1 -0.96775180162113E-1 -0.96603631989881E-1 -0.96432192052037E-1 --0.96260860412857E-1 -0.96089637136278E-1 -0.9591852228637E-1 -0.95747515927168E-1 -0.95576618122383E-1 --0.95405828937344E-1 -0.95235148436654E-1 -0.95064576685339E-1 -0.94894113748816E-1 -0.94723759692022E-1 --0.94553514580843E-1 -0.94383378480951E-1 -0.94213351458033E-1 -0.94043433577913E-1 -0.93873624905836E-1 --0.93703925508512E-1 -0.93534335452749E-1 -0.93364854805018E-1 -0.93195483632544E-1 -0.93026222002308E-1 --0.92857069981357E-1 -0.92688027636797E-1 -0.92519095035031E-1 -0.92350272244591E-1 -0.92181559333431E-1 --0.92012956369541E-1 -0.91844463421238E-1 -0.91676080556357E-1 -0.91507807843769E-1 -0.91339645352403E-1 --0.91171593151048E-1 -0.9100365130868E-1 -0.90835819893847E-1 -0.90668098975965E-1 -0.90500488625067E-1 --0.90332988910656E-1 -0.90165599902925E-1 -0.89998321671689E-1 -0.89831154287039E-1 -0.89664097820026E-1 --0.89497152341387E-1 -0.89330317921772E-1 -0.89163594631721E-1 -0.88996982541564E-1 -0.88830481723491E-1 --0.88664092248925E-1 -0.88497814189628E-1 -0.88331647617583E-1 -0.8816559260413E-1 -0.87999649222323E-1 --0.87833817544804E-1 -0.87668097644163E-1 -0.87502489592831E-1 -0.87336993462691E-1 -0.87171609327415E-1 --0.87006337260617E-1 -0.86841177335668E-1 -0.86676129626523E-1 -0.86511194206635E-1 -0.8634637115022E-1 --0.86181660531919E-1 -0.86017062426117E-1 -0.85852576907352E-1 -0.85688204049875E-1 -0.85523943928354E-1 --0.85359796618506E-1 -0.85195762195252E-1 -0.85031840734587E-1 -0.84868032312484E-1 -0.84704337004818E-1 - -0.845407548876E-1 -0.84377286036433E-1 -0.8421393052786E-1 -0.84050688438882E-1 -0.83887559845806E-1 --0.8372454482633E-1 -0.8356164345759E-1 -0.83398855816978E-1 -0.83236181982417E-1 -0.83073622031502E-1 --0.82911176042248E-1 -0.82748844092513E-1 -0.82586626260056E-1 -0.82424522624197E-1 -0.82262533263527E-1 --0.82100658257128E-1 -0.81938897684292E-1 -0.81777251623727E-1 -0.8161572015572E-1 -0.81454303360046E-1 --0.81293001316573E-1 -0.81131814105318E-1 -0.80970741805674E-1 -0.8080978449874E-1 -0.80648942265399E-1 --0.80488215186405E-1 -0.80327603343133E-1 -0.80167106816356E-1 -0.80006725687858E-1 -0.79846460039795E-1 --0.79686309954081E-1 -0.79526275512729E-1 -0.79366356797439E-1 -0.79206553890482E-1 -0.79046866875083E-1 --0.78887295833707E-1 -0.78727840849885E-1 -0.78568502006916E-1 -0.78409279388047E-1 -0.78250173077601E-1 --0.78091183159526E-1 -0.77932309717851E-1 -0.77773552836607E-1 -0.77614912599182E-1 -0.77456389091371E-1 --0.77297982398016E-1 -0.77139692604327E-1 -0.76981519796035E-1 -0.76823464058604E-1 -0.76665525477753E-1 --0.76507704138961E-1 -0.7635000012778E-1 -0.76192413531504E-1 -0.76034944436661E-1 -0.7587759293018E-1 --0.75720359099071E-1 -0.75563243029875E-1 -0.75406244810791E-1 -0.75249364529407E-1 -0.75092602273527E-1 --0.74935958131218E-1 -0.74779432189906E-1 -0.74623024538671E-1 -0.7446673526621E-1 -0.74310564461218E-1 --0.74154512213285E-1 -0.73998578611398E-1 -0.73842763745341E-1 -0.73687067705062E-1 -0.73531490580223E-1 --0.7337603246121E-1 -0.73220693437842E-1 -0.7306547360067E-1 -0.72910373041091E-1 -0.72755391849835E-1 --0.72600530118547E-1 -0.72445787938543E-1 -0.7229116540125E-1 -0.72136662599254E-1 -0.71982279624691E-1 --0.71828016569861E-1 -0.71673873527024E-1 -0.71519850587926E-1 -0.71365947846713E-1 -0.71212165396635E-1 --0.71058503331217E-1 -0.70904961744253E-1 -0.7075154072887E-1 -0.70598240379939E-1 -0.7044506079194E-1 --0.70292002059361E-1 -0.70139064277045E-1 -0.6998624753922E-1 -0.69833551941487E-1 -0.69680977579569E-1 --0.69528524548705E-1 -0.69376192945584E-1 -0.69223982866404E-1 -0.6907189440749E-1 -0.6891992766528E-1 --0.68768082735369E-1 -0.68616359715867E-1 -0.68464758704263E-1 -0.68313279798063E-1 -0.68161923095151E-1 --0.68010688692744E-1 -0.67859576689563E-1 -0.67708587184465E-1 -0.67557720276082E-1 -0.67406976063284E-1 --0.67256354644435E-1 -0.67105856118919E-1 -0.6695548058694E-1 -0.66805228147993E-1 -0.66655098902566E-1 - -0.665050929507E-1 -0.66355210392706E-1 -0.66205451330057E-1 -0.66055815863719E-1 -0.65906304094932E-1 --0.65756916124846E-1 -0.65607652054278E-1 -0.65458511986198E-1 -0.6530949602264E-1 -0.65160604266162E-1 --0.65011836819699E-1 -0.64863193785514E-1 -0.64714675267467E-1 -0.64566281368923E-1 -0.64418012193384E-1 --0.64269867844857E-1 -0.64121848426648E-1 -0.63973954043556E-1 -0.63826184800335E-1 -0.63678540801425E-1 --0.63531022152534E-1 -0.63383628958658E-1 -0.63236361325521E-1 -0.63089219359318E-1 -0.62942203165767E-1 --0.62795312851428E-1 -0.62648548522347E-1 -0.62501910285021E-1 -0.62355398247241E-1 -0.62209012515895E-1 --0.62062753199023E-1 -0.61916620404653E-1 -0.61770614240682E-1 -0.61624734815416E-1 -0.61478982236522E-1 --0.61333356612928E-1 -0.61187858054265E-1 -0.61042486669362E-1 -0.60897242568402E-1 -0.60752125860973E-1 --0.60607136657026E-1 -0.60462275067404E-1 -0.60317541202422E-1 -0.60172935173002E-1 -0.60028457089895E-1 --0.5988410706367E-1 -0.59739885206873E-1 -0.5959579163112E-1 -0.59451826448679E-1 -0.59307989772151E-1 --0.59164281713426E-1 -0.59020702386174E-1 -0.58877251903392E-1 -0.58733930378354E-1 -0.58590737924901E-1 --0.58447674656122E-1 -0.58304740686706E-1 -0.58161936131116E-1 -0.58019261103629E-1 -0.57876715719931E-1 --0.57734300094944E-1 -0.57592014344343E-1 -0.57449858584139E-1 -0.57307832929833E-1 -0.57165937498084E-1 --0.5702417240486E-1 -0.56882537766759E-1 -0.56741033701543E-1 -0.56599660326135E-1 -0.56458417758561E-1 --0.56317306116624E-1 -0.56176325518031E-1 -0.56035476081941E-1 -0.55894757926853E-1 -0.55754171171749E-1 --0.55613715935787E-1 -0.55473392337338E-1 -0.55333200497235E-1 -0.55193140535282E-1 -0.55053212571787E-1 --0.54913416727869E-1 -0.54773753124115E-1 -0.54634221881804E-1 -0.54494823121941E-1 -0.54355556965528E-1 --0.54216423535446E-1 -0.5407742295362E-1 -0.5393855534277E-1 -0.53799820825843E-1 -0.53661219525116E-1 --0.53522751564845E-1 -0.53384417068551E-1 -0.53246216160046E-1 -0.53108148963561E-1 -0.52970215602453E-1 --0.52832416202239E-1 -0.52694750888017E-1 -0.5255721978482E-1 -0.52419823018748E-1 -0.52282560715103E-1 --0.52145433000354E-1 -0.52008440001264E-1 -0.51871581844155E-1 -0.51734858656334E-1 -0.51598270564415E-1 --0.51461817695838E-1 -0.51325500179107E-1 -0.51189318141834E-1 -0.51053271712973E-1 -0.50917361021086E-1 --0.50781586194803E-1 -0.50645947364156E-1 -0.5051044465848E-1 -0.50375078207722E-1 -0.50239848141914E-1 --0.5010475459044E-1 -0.49969797685133E-1 -0.49834977556796E-1 -0.49700294336783E-1 -0.49565748157072E-1 --0.49431339148793E-1 -0.49297067444904E-1 -0.49162933177946E-1 -0.49028936480494E-1 -0.48895077485854E-1 --0.48761356326483E-1 -0.4862777313648E-1 -0.48494328050175E-1 -0.48361021201277E-1 -0.48227852725203E-1 --0.48094822756662E-1 -0.47961931430791E-1 -0.47829178883159E-1 -0.47696565248404E-1 -0.47564090663511E-1 --0.47431755264821E-1 -0.4729955918878E-1 -0.47167502572933E-1 -0.47035585553947E-1 -0.46903808269847E-1 --0.46772170858809E-1 -0.46640673458629E-1 -0.46509316208176E-1 -0.46378099245529E-1 -0.46247022709802E-1 --0.46116086741053E-1 -0.45985291478414E-1 -0.45854637062568E-1 -0.45724123633639E-1 -0.45593751331993E-1 --0.45463520299339E-1 -0.45333430676632E-1 -0.45203482605634E-1 -0.45073676228091E-1 -0.4494401168527E-1 --0.44814489120762E-1 -0.44685108677094E-1 -0.44555870497522E-1 -0.44426774725893E-1 -0.44297821505134E-1 --0.44169010980252E-1 -0.44040343295637E-1 -0.43911818595842E-1 -0.43783437026142E-1 -0.43655198730869E-1 --0.43527103856308E-1 -0.43399152548786E-1 -0.43271344954192E-1 -0.4314368121983E-1 -0.43016161492129E-1 --0.42888785918529E-1 -0.42761554647199E-1 -0.42634467825596E-1 -0.42507525602391E-1 -0.42380728125629E-1 --0.42254075543822E-1 -0.4212756800706E-1 -0.42001205664321E-1 -0.4187498866606E-1 -0.41748917162726E-1 --0.41622991304509E-1 -0.41497211242638E-1 -0.41371577127475E-1 -0.41246089110683E-1 -0.41120747344746E-1 --0.4099555198118E-1 -0.40870503173274E-1 -0.40745601073551E-1 -0.40620845834989E-1 -0.40496237611867E-1 --0.4037177655766E-1 -0.40247462826804E-1 -0.40123296573583E-1 -0.39999277951982E-1 -0.39875407118283E-1 --0.39751684227641E-1 -0.39628109436114E-1 -0.3950468290027E-1 -0.39381404775706E-1 -0.39258275220356E-1 --0.39135294391312E-1 -0.39012462445991E-1 -0.38889779542575E-1 -0.3876724583823E-1 -0.38644861492276E-1 --0.38522626663846E-1 -0.38400541511784E-1 -0.38278606196418E-1 -0.38156820877101E-1 -0.38035185714414E-1 --0.3791370086954E-1 -0.37792366502907E-1 -0.37671182776409E-1 -0.37550149851118E-1 -0.37429267888807E-1 --0.3730853705271E-1 -0.37187957504968E-1 -0.3706752940926E-1 -0.3694725292902E-1 -0.36827128227505E-1 --0.36707155469772E-1 -0.36587334819973E-1 -0.36467666443088E-1 -0.36348150504463E-1 -0.36228787168409E-1 --0.36109576602108E-1 -0.35990518971544E-1 -0.35871614443301E-1 -0.35752863185063E-1 -0.35634265363689E-1 --0.35515821147273E-1 -0.35397530703599E-1 -0.35279394200352E-1 -0.35161411807381E-1 -0.35043583693357E-1 --0.34925910028096E-1 -0.34808390981827E-1 -0.3469102672387E-1 -0.34573817425942E-1 -0.34456763258799E-1 --0.34339864393684E-1 -0.34223121002591E-1 -0.34106533256407E-1 -0.33990101328483E-1 -0.33873825391736E-1 --0.33757705618965E-1 -0.33641742184438E-1 -0.33525935261368E-1 -0.33410285024449E-1 -0.3329479164882E-1 --0.33179455308941E-1 -0.33064276180804E-1 -0.32949254439479E-1 -0.3283439026097E-1 -0.32719683822615E-1 --0.32605135300613E-1 -0.32490744872951E-1 -0.32376512717167E-1 -0.3226243901081E-1 -0.32148523933252E-1 --0.32034767662892E-1 -0.31921170379101E-1 -0.31807732261456E-1 -0.31694453488649E-1 -0.31581334242332E-1 --0.31468374702902E-1 -0.31355575051454E-1 -0.31242935469973E-1 -0.31130456139335E-1 -0.31018137242712E-1 --0.3090597896277E-1 -0.30793981482194E-1 -0.30682144984848E-1 -0.30570469653482E-1 -0.30458955672778E-1 --0.30347603227776E-1 -0.30236412502702E-1 -0.30125383683925E-1 -0.3001451695686E-1 -0.29903812507597E-1 --0.29793270522984E-1 -0.29682891188679E-1 -0.29572674693065E-1 -0.29462621223823E-1 -0.29352730968721E-1 --0.29243004117005E-1 -0.29133440856785E-1 -0.29024041377881E-1 -0.28914805870362E-1 -0.28805734523706E-1 --0.28696827529071E-1 -0.28588085076565E-1 -0.28479507357478E-1 -0.28371094564288E-1 -0.28262846888285E-1 --0.28154764522828E-1 -0.28046847660595E-1 -0.27939096494502E-1 -0.27831511219196E-1 -0.27724092028276E-1 --0.27616839116594E-1 -0.27509752679074E-1 -0.27402832909958E-1 -0.27296080006278E-1 -0.27189494163756E-1 --0.27083075579076E-1 -0.26976824449796E-1 -0.268707409723E-1 -0.2676482534547E-1 -0.26659077767455E-1 --0.26553498436605E-1 -0.26448087552488E-1 -0.26342845313461E-1 -0.26237771920083E-1 -0.26132867573032E-1 --0.26028132472373E-1 -0.25923566820195E-1 -0.25819170817429E-1 -0.25714944666218E-1 -0.25610888569685E-1 --0.25507002729938E-1 -0.25403287350917E-1 -0.25299742635773E-1 -0.25196368788121E-1 -0.25093166013538E-1 --0.24990134516227E-1 -0.24887274502234E-1 -0.24784586177637E-1 -0.24682069748069E-1 -0.24579725420934E-1 --0.24477553402472E-1 -0.24375553900356E-1 -0.24273727123273E-1 -0.24172073278662E-1 -0.24070592576392E-1 --0.23969285225387E-1 -0.23868151435045E-1 -0.23767191416411E-1 -0.23666405379404E-1 -0.23565793535479E-1 --0.2346535609598E-1 -0.23365093271775E-1 -0.23265005276474E-1 -0.23165092322306E-1 -0.23065354622682E-1 --0.22965792391772E-1 -0.22866405842485E-1 -0.2276719519062E-1 -0.22668160650938E-1 -0.22569302438627E-1 --0.22470620770081E-1 -0.22372115860399E-1 -0.2227378792717E-1 -0.22175637187827E-1 -0.22077663859382E-1 --0.2197986816091E-1 -0.21882250310218E-1 -0.21784810526604E-1 -0.21687549030194E-1 -0.21590466040048E-1 --0.21493561777374E-1 -0.21396836462323E-1 -0.21300290315814E-1 -0.2120392356061E-1 -0.21107736418103E-1 --0.21011729111639E-1 -0.20915901864314E-1 -0.20820254898907E-1 -0.20724788440649E-1 -0.20629502713519E-1 --0.20534397942646E-1 -0.2043947435374E-1 -0.20344732171142E-1 -0.20250171622715E-1 -0.20155792934854E-1 --0.2006159633468E-1 -0.19967582050795E-1 -0.19873750310629E-1 -0.1978010134345E-1 -0.19686635378192E-1 --0.19593352643555E-1 -0.19500253370898E-1 -0.19407337790118E-1 -0.19314606132573E-1 -0.19222058630232E-1 --0.19129695513816E-1 -0.19037517017182E-1 -0.18945523372908E-1 -0.18853714814234E-1 -0.18762091575579E-1 --0.18670653889962E-1 -0.1857940199324E-1 -0.18488336120816E-1 -0.18397456507891E-1 -0.18306763391743E-1 --0.18216257008281E-1 -0.1812593759518E-1 -0.18035805390748E-1 -0.17945860632259E-1 -0.17856103559389E-1 --0.17766534410557E-1 -0.17677153425233E-1 -0.17587960844558E-1 -0.1749895690823E-1 -0.17410141858277E-1 --0.17321515936216E-1 -0.1723307938349E-1 -0.17144832443905E-1 -0.17056775359926E-1 -0.16968908375475E-1 --0.16881231734886E-1 -0.1679374568131E-1 -0.1670645046139E-1 -0.16619346320248E-1 -0.1653243350392E-1 --0.16445712259735E-1 -0.16359182833582E-1 -0.16272845474163E-1 -0.16186700429588E-1 -0.16100747947943E-1 --0.16014988279135E-1 -0.1592942167164E-1 -0.15844048376113E-1 -0.15758868643716E-1 -0.15673882724491E-1 --0.15589090871456E-1 -0.15504493336298E-1 -0.15420090371615E-1 -0.15335882231149E-1 -0.15251869167143E-1 --0.15168051435022E-1 -0.15084429289435E-1 -0.15001002985088E-1 -0.14917772778737E-1 -0.14834738925653E-1 --0.14751901683223E-1 -0.14669261309223E-1 -0.14586818060533E-1 -0.1450457219655E-1 -0.14422523975283E-1 --0.14340673656075E-1 -0.14259021499761E-1 -0.14177567765669E-1 -0.1409631271583E-1 -0.14015256611462E-1 --0.13934399713992E-1 -0.13853742287145E-1 -0.13773284593205E-1 -0.13693026896243E-1 -0.13612969460541E-1 --0.13533112549428E-1 -0.13453456429632E-1 -0.13374001366267E-1 -0.13294747625663E-1 -0.13215695475347E-1 --0.13136845181332E-1 -0.13058197012776E-1 -0.1297975123793E-1 -0.12901508125277E-1 -0.12823467945162E-1 --0.12745630966379E-1 -0.12667997460231E-1 -0.12590567698238E-1 -0.12513341951052E-1 -0.12436320492178E-1 --0.12359503593606E-1 -0.12282891528748E-1 -0.12206484572278E-1 -0.12130282997446E-1 -0.12054287080265E-1 --0.11978497095747E-1 -0.11902913319345E-1 -0.11827536028876E-1 -0.11752365500465E-1 -0.11677402012599E-1 --0.11602645843877E-1 -0.115280972722E-1 -0.11453756578059E-1 -0.11379624040417E-1 -0.11305699939897E-1 --0.11231984558431E-1 -0.11158478176347E-1 -0.11085181077152E-1 -0.110120935432E-1 -0.10939215857354E-1 --0.10866548304625E-1 -0.10794091168489E-1 -0.10721844734613E-1 -0.10649809288624E-1 -0.10577985115448E-1 --0.10506372503329E-1 -0.10434971738796E-1 -0.10363783109921E-1 -0.10292806905871E-1 -0.10222043414196E-1 --0.10151492925988E-1 -0.10081155731075E-1 -0.10011032119816E-1 -0.99411223843916E-2 -0.98714268153058E-2 --0.98019457059869E-2 -0.97326793497653E-2 -0.96636280393476E-2 -0.9594792070268E-2 -0.95261717364224E-2 --0.94577673334865E-2 -0.93895791582458E-2 -0.93216075059667E-2 -0.92538526751067E-2 -0.91863149627669E-2 --0.91189946668395E-2 -0.90518920874418E-2 -0.89850075229411E-2 -0.8918341274373E-2 -0.88518936425522E-2 --0.87856649277817E-2 -0.87196554334094E-2 -0.86538654611327E-2 -0.85882953143085E-2 -0.85229452972182E-2 --0.84578157124263E-2 -0.8392906866587E-2 -0.83282190646842E-2 -0.82637526125457E-2 -0.8199507817989E-2 --0.81354849872057E-2 -0.8071684428996E-2 -0.80081064518274E-2 -0.79447513637513E-2 -0.78816194760488E-2 --0.78187110981859E-2 -0.77560265415225E-2 -0.7693566118342E-2 -0.7631330139224E-2 -0.75693189187635E-2 - -0.750753276988E-2 -0.74459720063764E-2 -0.73846369438196E-2 -0.73235278959913E-2 -0.72626451799881E-2 --0.72019891124506E-2 -0.7141560009677E-2 -0.70813581908106E-2 -0.70213839732225E-2 -0.69616376764267E-2 --0.69021196208248E-2 -0.68428301253034E-2 -0.67837695122858E-2 -0.672493810252E-2 -0.66663362179233E-2 --0.66079641825044E-2 -0.65498223184311E-2 -0.64919109509276E-2 -0.64342304046587E-2 -0.63767810040769E-2 --0.63195630766745E-2 -0.62625769481602E-2 -0.62058229462606E-2 -0.61493013994065E-2 -0.60930126344403E-2 --0.60369569824404E-2 -0.59811347726094E-2 -0.59255463353105E-2 -0.58701920027361E-2 -0.58150721052304E-2 --0.57601869765454E-2 -0.5705536949763E-2 -0.56511223578497E-2 -0.55969435364683E-2 -0.55430008194189E-2 --0.54892945430302E-2 -0.54358250443567E-2 -0.53825926589493E-2 -0.53295977262603E-2 -0.52768405839694E-2 --0.5224321570971E-2 -0.51720410278663E-2 -0.51199992933473E-2 -0.50681967098436E-2 -0.50166336189356E-2 --0.49653103621922E-2 -0.49142272840079E-2 -0.48633847268691E-2 -0.48127830358068E-2 -0.4762422556441E-2 - -0.471230363307E-2 -0.4662426613645E-2 -0.46127918442799E-2 -0.4563399672608E-2 -0.45142504481635E-2 --0.44653445185353E-2 -0.44166822348735E-2 -0.43682639473838E-2 -0.43200900064021E-2 -0.42721607652032E-2 --0.42244765751456E-2 -0.41770377900601E-2 -0.41298447642027E-2 -0.40828978505068E-2 -0.40361974060823E-2 --0.3989743786037E-2 -0.39435373470098E-2 -0.38975784473373E-2 -0.38518674433957E-2 -0.3806404695438E-2 --0.3761190562638E-2 -0.3716225404397E-2 -0.36715095828078E-2 -0.3627043457958E-2 -0.35828273928826E-2 --0.35388617509931E-2 -0.34951468944828E-2 -0.34516831894087E-2 -0.34084709998559E-2 -0.33655106916246E-2 --0.33228026322158E-2 -0.32803471871992E-2 -0.32381447259118E-2 -0.31961956164694E-2 -0.31545002273991E-2 --0.3113058930206E-2 -0.30718720942826E-2 -0.30309400919635E-2 -0.29902632958295E-2 -0.29498420773923E-2 --0.2909676811895E-2 -0.28697678725756E-2 -0.28301156345865E-2 -0.27907204747678E-2 -0.27515827679157E-2 --0.27127028929136E-2 -0.26740812272646E-2 -0.26357181489942E-2 -0.2597614038967E-2 -0.25597692759878E-2 --0.25221842418255E-2 -0.24848593183363E-2 -0.24477948863602E-2 -0.24109913308536E-2 -0.23744490346383E-2 --0.23381683824739E-2 -0.23021497606445E-2 -0.22663935533626E-2 -0.22309001492386E-2 -0.21956699353456E-2 --0.21607032993868E-2 -0.21260006317063E-2 -0.20915623205E-2 -0.20573887573766E-2 -0.20234803339328E-2 --0.19898374408541E-2 -0.19564604727016E-2 -0.19233498219054E-2 -0.18905058830361E-2 -0.18579290521592E-2 --0.18256197232867E-2 -0.17935782947833E-2 -0.17618051633236E-2 -0.17303007264211E-2 -0.16990653844146E-2 --0.16680995354233E-2 -0.16374035809723E-2 -0.16069779224277E-2 -0.15768229603685E-2 -0.15469390993403E-2 --0.15173267417187E-2 -0.14879862920661E-2 -0.14589181563082E-2 -0.14301227382033E-2 -0.14016004463895E-2 --0.13733516875783E-2 -0.13453768694367E-2 -0.13176764023128E-2 -0.12902506943617E-2 -0.12631001572428E-2 --0.12362252023122E-2 -0.12096262402571E-2 -0.11833036858108E-2 -0.11572579514239E-2 -0.11314894519422E-2 --0.11059986035264E-2 -0.10807858201516E-2 -0.10558515207616E-2 -0.10311961222483E-2 -0.1006820042596E-2 --0.98272370234785E-3 -0.95890751975361E-3 -0.93537191697686E-3 -0.91211731574287E-3 -0.88914413721881E-3 --0.86645280643732E-3 -0.84404374613503E-3 -0.82191738165305E-3 -0.80007413958656E-3 -0.77851444436237E-3 --0.75723872538819E-3 -0.73624740986458E-3 -0.71554092630809E-3 -0.69511970587808E-3 -0.67498417738758E-3 --0.65513477361069E-3 -0.6355719267318E-3 -0.61629606852172E-3 -0.59730763466131E-3 -0.57860705848872E-3 --0.56019477604117E-3 -0.54207122446238E-3 -0.52423683876497E-3 -0.50669205919079E-3 -0.48943732366139E-3 --0.47247307152323E-3 -0.45579974462836E-3 -0.43941778245359E-3 -0.42332762868491E-3 -0.40752972626449E-3 --0.39202451785544E-3 -0.37681244992344E-3 -0.36189396652617E-3 -0.34726951469522E-3 -0.33293954248135E-3 --0.31890449590944E-3 -0.30516482613451E-3 -0.29172098197806E-3 -0.27857341381609E-3 -0.26572257446433E-3 --0.25316891430202E-3 -0.24091288816095E-3 -0.22895494995228E-3 -0.21729555344612E-3 -0.20593515623797E-3 --0.19487421345853E-3 -0.18411318335227E-3 -0.17365252504424E-3 -0.16349269575673E-3 -0.15363415780791E-3 --0.14407737111306E-3 -0.13482279733315E-3 -0.1258709005646E-3 -0.11722214242093E-3 -0.1088769890951E-3 --0.10083590569591E-3 -0.93099357348157E-4 -0.85667813001108E-4 -0.78541739083955E-4 -0.71721605295356E-4 --0.65207882068843E-4 -0.59001038022916E-4 -0.53101546975722E-4 -0.47509880257601E-4 -0.42226511086741E-4 --0.37251915015477E-4 -0.32586565073856E-4 -0.28230939106284E-4 -0.24185513693527E-4 -0.20450765585832E-4 --0.17027175308443E-4 -0.1391522080043E-4 -0.11115383496019E-4 -0.86281454348713E-5 -0.64539869572879E-5 --0.45933935663466E-5 -0.30468482236608E-5 -0.18148359506597E-5 -0.89784403731179E-6 -0.29635889385556E-6 diff --git a/examples/ttm/Fe_mm.eam.fs b/examples/ttm/Fe_mm.eam.fs new file mode 120000 index 00000000000..65026101540 --- /dev/null +++ b/examples/ttm/Fe_mm.eam.fs @@ -0,0 +1 @@ +../../potentials/Fe_mm.eam.fs \ No newline at end of file diff --git a/examples/ttm/in.ttm.thermal b/examples/ttm/in.ttm.thermal index 4c768ac921d..8d26524e112 100644 --- a/examples/ttm/in.ttm.thermal +++ b/examples/ttm/in.ttm.thermal @@ -21,7 +21,7 @@ create_atoms 1 region atom_box mass 1 55.845 pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe +pair_coeff * * Fe_mm_eam.fs Fe neighbor 2.0 bin neigh_modify every 5 delay 0 check yes From 1aa40e660fcf0f7fe29961e4f9eb377a581db909 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:37:29 -0400 Subject: [PATCH 039/604] whitespace and permissions --- doc/src/fix_ttm.rst | 50 ++++----- examples/ttm/in.ttm.thermal | 22 ++-- src/EXTRA-FIX/fix_ttm_thermal.cpp | 162 +++++++++++++++--------------- src/EXTRA-FIX/fix_ttm_thermal.h | 0 4 files changed, 117 insertions(+), 117 deletions(-) mode change 100755 => 100644 doc/src/fix_ttm.rst mode change 100755 => 100644 src/EXTRA-FIX/fix_ttm_thermal.cpp mode change 100755 => 100644 src/EXTRA-FIX/fix_ttm_thermal.h diff --git a/doc/src/fix_ttm.rst b/doc/src/fix_ttm.rst old mode 100755 new mode 100644 index 14af7a6b5c7..1fce1936405 --- a/doc/src/fix_ttm.rst +++ b/doc/src/fix_ttm.rst @@ -49,7 +49,7 @@ Syntax Nx = number of thermal solve grid points in the x-direction (positive integer) Ny = number of thermal solve grid points in the y-direction (positive integer) Nz = number of thermal solve grid points in the z-direction (positive integer) - + * remaining arguments for fix ttm/thermal: .. parsed-literal:: @@ -72,7 +72,7 @@ Syntax file.out = filename to write grid temperatures to * fix ttm/thermal supports an additional keyword: *source* - + .. parsed-literal:: *source* value = source source = volumetric heating term applied to electrons (energy/(time\*volume) units) @@ -86,7 +86,7 @@ Examples fix 3 all ttm/grid 123456 1.0 1.0 1.0 1.0 1.0 5.0 5 5 5 infile Te.in fix 4 all ttm/mod 34277 parameters.txt 5 5 5 infile T_init outfile 10 T_out fix 5 all ttm/thermal 11111 properties.in 10 10 10 source 0.1 infile temps.in outfile 10 temps.out - + Example input scripts using these commands can be found in examples/ttm. Description @@ -123,9 +123,9 @@ expensive computationally than fix *ttm* because it treats the thermal diffusion equation as non-linear. More details on fix *ttm/mod* are given below. -Fix *ttm/thermal* allows for electronic properties to be assigned -independently to each TTM grid point and supports external heat sources -to the electronic subsystem. More details on fix *ttm/thermal* are +Fix *ttm/thermal* allows for electronic properties to be assigned +independently to each TTM grid point and supports external heat sources +to the electronic subsystem. More details on fix *ttm/thermal* are given below. Heat transfer between the electronic and atomic subsystems is carried @@ -268,10 +268,10 @@ units setting in use, grid size and the current timestep. reads. The file has the same format as the file the *infile* option reads. -For the fix ttm, fix ttm/mod, and fix ttm/thermal commands, the -corresponding atomic temperature for atoms in each grid cell can -be computed and output by the :doc:`fix ave/chunk ` -command using the:doc:`compute chunk/atom ` command +For the fix ttm, fix ttm/mod, and fix ttm/thermal commands, the +corresponding atomic temperature for atoms in each grid cell can +be computed and output by the :doc:`fix ave/chunk ` +command using the:doc:`compute chunk/atom ` command to create a 3d array of chunks consistent with the grid used by this fix. For the fix ttm/grid command the same thing can be done using the @@ -392,14 +392,14 @@ have been removed: \bigtriangledown (\kappa_\mathrm{eff} \bigtriangledown T_e) - g_p (T_e - T_a) + \eta s -where :math:`s` is the applied heating power density and :math:`\eta` is -the absorption efficiency (0-1) defined for each ttm grid cell in the -*properties.in* file. Also note that compared to the original *fix ttm*, -it uses use a volumetric specific heat, :math:`C_\mathrm{vol}` , which +where :math:`s` is the applied heating power density and :math:`\eta` is +the absorption efficiency (0-1) defined for each ttm grid cell in the +*properties.in* file. Also note that compared to the original *fix ttm*, +it uses use a volumetric specific heat, :math:`C_\mathrm{vol}` , which represents the product of :math:`C_e \rho_e`. -:ref:`(Baer) ` defined :math:`\kappa_\mathrm{eff}` as an effective -electronic thermal conductivity when two adjacent TTM cells (denoted by +:ref:`(Baer) ` defined :math:`\kappa_\mathrm{eff}` as an effective +electronic thermal conductivity when two adjacent TTM cells (denoted by the subscripts *a* and *b*) have different conductivities as: .. math:: @@ -407,22 +407,22 @@ the subscripts *a* and *b*) have different conductivities as: \kappa_\mathrm{eff} = \frac{2 \kappa_a \kappa_b}{\kappa_a + \kappa_b} The current fix *ttm/thermal* implementation allows TTM simulations with -TTM cells that do not contain electrons (vacuum or insulators). Similar -to *ttm/mod*, the absence of electrons is defined as the grid cells with -zero electronic temperature.The numerical scheme does not allow energy +TTM cells that do not contain electrons (vacuum or insulators). Similar +to *ttm/mod*, the absence of electrons is defined as the grid cells with +zero electronic temperature.The numerical scheme does not allow energy exchange with such cells. -The fix *ttm/thermal* parameter file *properties_file* uses a similar syntax -as the keyword *infile*. The file is read in line by line and each ttm cell's -properties are set. Comment lines are allowed and each line should have +The fix *ttm/thermal* parameter file *properties_file* uses a similar syntax +as the keyword *infile*. The file is read in line by line and each ttm cell's +properties are set. Comment lines are allowed and each line should have properties listed in the order: .. parsed-literal:: ix iy iz C_vol kappa_e gamma_p eta -the grid must match the one declared in the fix and all grid points must have +the grid must match the one declared in the fix and all grid points must have all properties set or *ttm/thermal* will exit with an error. ---------- @@ -430,8 +430,8 @@ all properties set or *ttm/thermal* will exit with an error. Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -The fix ttm, fix ttm/mod, and fix ttm/thermal commands write the state -of the electronic subsystem and the energy exchange between the subsystems +The fix ttm, fix ttm/mod, and fix ttm/thermal commands write the state +of the electronic subsystem and the energy exchange between the subsystems to :doc:`binary restart files `. The fix ttm/grid command does not yet support writing of its distributed grid to a restart file. diff --git a/examples/ttm/in.ttm.thermal b/examples/ttm/in.ttm.thermal index 8d26524e112..3a32e6bb5ec 100644 --- a/examples/ttm/in.ttm.thermal +++ b/examples/ttm/in.ttm.thermal @@ -4,19 +4,19 @@ boundary p p p variable latc equal 2.87 lattice bcc ${latc} -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_box 1 sim_box -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +create_box 1 sim_box +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_atoms 1 region atom_box +create_atoms 1 region atom_box mass 1 55.845 diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp old mode 100755 new mode 100644 index 695a4460f2a..4a744b0b9dc --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -16,11 +16,11 @@ Contributing authors: Original fix ttm Paul Crozier (SNL) Carolyn Phillips (University of Michigan) - + ttm/thermal Bradly Baer (Vanderbilt University) D. Greg Walker (Vanderbilt University) - + ------------------------------------------------------------------------- */ @@ -65,7 +65,7 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : net_energy_transfer(nullptr), net_energy_transfer_all(nullptr) , gamma_p_grid(nullptr), inductive_response_grid(nullptr), c_e_grid(nullptr), k_e_grid(nullptr) - + { if (narg < 8) error->all(FLERR,"Illegal fix ttm command"); vector_flag = 1; @@ -75,16 +75,16 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : nevery = 1; restart_peratom = 1; restart_global = 1; - + e_property_file = nullptr; - + seed = utils::inumeric(FLERR,arg[3],false,lmp); e_property_file = utils::strdup(arg[4]); nxgrid = utils::inumeric(FLERR,arg[5],false,lmp); nygrid = utils::inumeric(FLERR,arg[6],false,lmp); nzgrid = utils::inumeric(FLERR,arg[7],false,lmp); - - + + inductive_power = 0.0; tinit = 0.0; infile = outfile = nullptr; @@ -112,7 +112,7 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : iarg += 3; } else error->all(FLERR,"Illegal fix ttm command"); } - + // error check @@ -158,7 +158,7 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); - + // determines which class deallocate_grid() is called from deallocate_flag = 0; @@ -202,17 +202,17 @@ void FixTTMThermal::post_constructor() for (iy = 0; iy < nygrid; iy++) for (ix = 0; ix < nxgrid; ix++) T_electron[iz][iy][ix] = tinit; - - + + // zero net_energy_transfer_all // in case compute_vector accesses it on timestep 0 outflag = 0; memset(&net_energy_transfer_all[0][0][0],0,ngridtotal*sizeof(double)); - + // set electron grid properties from file read_electron_properties(e_property_file); - + // set initial electron temperatures from user input file if (infile) read_electron_temperatures(infile); @@ -320,25 +320,25 @@ void FixTTMThermal::post_force(int /*vflag*/) if (T_electron[iz][iy][ix] < 0) error->one(FLERR,"Electronic temperature dropped below zero"); - //Come back and check this for scaling + //Come back and check this for scaling for (int i = 1; i <= atom->ntypes; i++) { - gfactor1[i] = - gamma_p_grid[iz][iy][ix] / force->ftm2v; - gfactor2[i] = sqrt(24.0*force->boltz*gamma_p_grid[iz][iy][ix]/update->dt/force->mvv2e) / force->ftm2v; - } + gfactor1[i] = - gamma_p_grid[iz][iy][ix] / force->ftm2v; + gfactor2[i] = sqrt(24.0*force->boltz*gamma_p_grid[iz][iy][ix]/update->dt/force->mvv2e) / force->ftm2v; + } double tsqrt = sqrt(T_electron[iz][iy][ix]); gamma1 = gfactor1[type[i]]; gamma2 = gfactor2[type[i]] * tsqrt; if (T_electron[iz][iy][ix] > 1e-5) { - flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); - flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); - flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); - - f[i][0] += flangevin[i][0]; - f[i][1] += flangevin[i][1]; - f[i][2] += flangevin[i][2]; - } + flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); + flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); + flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); + + f[i][0] += flangevin[i][0]; + f[i][1] += flangevin[i][1]; + f[i][2] += flangevin[i][2]; + } } } } @@ -372,13 +372,13 @@ void FixTTMThermal::end_of_step() double dxinv = nxgrid/domain->xprd; double dyinv = nygrid/domain->yprd; double dzinv = nzgrid/domain->zprd; - + for (iz = 0; iz < nzgrid; iz++) for (iy = 0; iy < nygrid; iy++) for (ix = 0; ix < nxgrid; ix++) net_energy_transfer[iz][iy][ix] = 0.0; - + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -412,14 +412,14 @@ void FixTTMThermal::end_of_step() int num_inner_timesteps = 1; double inner_dt = update->dt; double voxel_coeff =(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz); - + std::vector grid_fourier(nzgrid * nygrid * nxgrid); int index = 0; // Location unimportant, only max value for (iz = 0; iz < nzgrid; iz++) for (iy = 0; iy < nygrid; iy++) for (ix = 0; ix < nxgrid; ix++) grid_fourier[index++] = 2.0/c_e_grid[iz][iy][ix]*(k_e_grid[iz][iy][ix]*voxel_coeff); - + double fourier_max = *std::max_element(grid_fourier.begin(), grid_fourier.end()); double stability_criterion = 1.0 - fourier_max*inner_dt; @@ -460,50 +460,50 @@ void FixTTMThermal::end_of_step() if (xleft == -1) xleft = nxgrid - 1; if (yleft == -1) yleft = nygrid - 1; if (zleft == -1) zleft = nzgrid - 1; - - // Initialize flags for vacuum - int left = 1; - int right =1; - int in = 1; - int out = 1; - int up = 1; - int down = 1; - - // Set flags to 0 if vaccum - if (T_electron[iz][iy][xleft] < 1e-5) left = 0; - if (T_electron[iz][iy][xright] < 1e-5) right = 0; - if (T_electron[iz][yright][ix] < 1e-5) in = 0; - if (T_electron[iz][yleft][ix] < 1e-5) out = 0; - if (T_electron[zright][iy][ix] < 1e-5) up = 0; - if (T_electron[zleft][iy][ix] < 1e-5) down = 0; - - if (T_electron[iz][iy][ix] > 1e-5) { + + // Initialize flags for vacuum + int left = 1; + int right =1; + int in = 1; + int out = 1; + int up = 1; + int down = 1; + + // Set flags to 0 if vaccum + if (T_electron[iz][iy][xleft] < 1e-5) left = 0; + if (T_electron[iz][iy][xright] < 1e-5) right = 0; + if (T_electron[iz][yright][ix] < 1e-5) in = 0; + if (T_electron[iz][yleft][ix] < 1e-5) out = 0; + if (T_electron[zright][iy][ix] < 1e-5) up = 0; + if (T_electron[zleft][iy][ix] < 1e-5) down = 0; + + if (T_electron[iz][iy][ix] > 1e-5) { T_electron[iz][iy][ix] = T_electron_old[iz][iy][ix] + inner_dt/c_e_grid[iz][iy][ix]*( - (safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + - - (safe_effective_kappa(k_e_grid[iz][iy][xright],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][iy][xright]-T_electron_old[iz][iy][ix])/dx/dx*right + - - (safe_effective_kappa(k_e_grid[iz][yleft][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][yleft][ix]-T_electron_old[iz][iy][ix])/dy/dy*out + - - (safe_effective_kappa(k_e_grid[iz][yright][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][yright][ix]-T_electron_old[iz][iy][ix])/dy/dy*in + - - (safe_effective_kappa(k_e_grid[zleft][iy][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[zleft][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*down + - - (safe_effective_kappa(k_e_grid[zright][iy][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[zright][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*up - - -(net_energy_transfer_all[iz][iy][ix])/(del_vol) - +(inductive_power*inductive_response_grid[iz][iy][ix]));} - } - + (safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + + + (safe_effective_kappa(k_e_grid[iz][iy][xright],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xright]-T_electron_old[iz][iy][ix])/dx/dx*right + + + (safe_effective_kappa(k_e_grid[iz][yleft][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yleft][ix]-T_electron_old[iz][iy][ix])/dy/dy*out + + + (safe_effective_kappa(k_e_grid[iz][yright][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yright][ix]-T_electron_old[iz][iy][ix])/dy/dy*in + + + (safe_effective_kappa(k_e_grid[zleft][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zleft][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*down + + + (safe_effective_kappa(k_e_grid[zright][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zright][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*up + + -(net_energy_transfer_all[iz][iy][ix])/(del_vol) + +(inductive_power*inductive_response_grid[iz][iy][ix]));} + } + } - + // output of grid electron temperatures to file if (outfile && (update->ntimestep % outevery == 0)) write_electron_temperatures(fmt::format("{}.{}",outfile,update->ntimestep)); @@ -536,12 +536,12 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) int ix = values.next_int() - 1; int iy = values.next_int() - 1; int iz = values.next_int() - 1; - double c_e_tmp = values.next_double(); - double k_e_tmp = values.next_double(); + double c_e_tmp = values.next_double(); + double k_e_tmp = values.next_double(); double gamma_p_tmp = values.next_double(); double ind_tmp = values.next_double(); - + // check correctness of input data @@ -550,15 +550,15 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) if (c_e_tmp < 0.0) throw TokenizerException("Fix ttm electron specific heat must be > 0.0",""); - + if (k_e_tmp < 0.0) throw TokenizerException("Fix ttm electron conductivity must be > 0.0",""); - + if (gamma_p_tmp < 0.0) throw TokenizerException("Fix ttm electron coupling must be > 0.0",""); - + if (ind_tmp < 0.0) - throw TokenizerException("Fix ttm electron inductive response must be >= 0.0",""); + throw TokenizerException("Fix ttm electron inductive response must be >= 0.0",""); c_e_grid[iz][iy][ix] = c_e_tmp; k_e_grid[iz][iy][ix] = k_e_tmp; @@ -615,7 +615,7 @@ void FixTTMThermal::read_electron_temperatures(const std::string &filename) double T_tmp = values.next_double(); - + // check correctness of input data @@ -652,12 +652,12 @@ void FixTTMThermal::read_electron_temperatures(const std::string &filename) void FixTTMThermal::write_electron_temperatures(const std::string &filename) { if (comm->me) return; - + FILE *fp = fopen(filename.c_str(),"w"); if (!fp) error->one(FLERR,"Fix ttm could not open output file {}: {}", filename,utils::getsyserror()); fmt::print(fp,"# DATE: {} UNITS: {} COMMENT: Electron temperature " - "{}x{}x{} grid at step {}. Created by fix {}\n #Grid X,Y,Z Temperature\n", utils::current_date(), + "{}x{}x{} grid at step {}. Created by fix {}\n #Grid X,Y,Z Temperature\n", utils::current_date(), update->unit_style, nxgrid, nygrid, nzgrid, update->ntimestep, style); int ix,iy,iz; @@ -869,7 +869,7 @@ void FixTTMThermal::deallocate_grid() { memory->destroy(T_electron_old); memory->destroy(T_electron); - memory->destroy(c_e_grid); + memory->destroy(c_e_grid); memory->destroy(k_e_grid); memory->destroy(gamma_p_grid); memory->destroy(inductive_response_grid); diff --git a/src/EXTRA-FIX/fix_ttm_thermal.h b/src/EXTRA-FIX/fix_ttm_thermal.h old mode 100755 new mode 100644 From 352d7e7dddcdd48942928c3deda7d37f9fd7b838 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:44:35 -0400 Subject: [PATCH 040/604] programming style changes --- src/EXTRA-FIX/fix_ttm_thermal.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp index 4a744b0b9dc..700042ead76 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -40,7 +40,6 @@ #include #include #include -#include #include using namespace LAMMPS_NS; @@ -113,7 +112,6 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal fix ttm command"); } - // error check if (seed <= 0) @@ -169,12 +167,13 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : FixTTMThermal::~FixTTMThermal() { - delete [] infile; + delete[] infile; + delete[] e_propert_file; delete random; - delete [] gfactor1; - delete [] gfactor2; + delete[] gfactor1; + delete[] gfactor2; memory->destroy(flangevin); @@ -203,7 +202,6 @@ void FixTTMThermal::post_constructor() for (ix = 0; ix < nxgrid; ix++) T_electron[iz][iy][ix] = tinit; - // zero net_energy_transfer_all // in case compute_vector accesses it on timestep 0 @@ -373,13 +371,11 @@ void FixTTMThermal::end_of_step() double dyinv = nygrid/domain->yprd; double dzinv = nzgrid/domain->zprd; - for (iz = 0; iz < nzgrid; iz++) for (iy = 0; iy < nygrid; iy++) for (ix = 0; ix < nxgrid; ix++) net_energy_transfer[iz][iy][ix] = 0.0; - for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { ix = static_cast ((x[i][0]-boxlo[0])*dxinv + shift) - OFFSET; @@ -432,8 +428,6 @@ void FixTTMThermal::end_of_step() error->warning(FLERR,"Too many inner timesteps in fix ttm"); } - - // finite difference iterations to update T_electron for (int istep = 0; istep < num_inner_timesteps; istep++) { @@ -614,9 +608,6 @@ void FixTTMThermal::read_electron_temperatures(const std::string &filename) int iz = values.next_int() - 1; double T_tmp = values.next_double(); - - - // check correctness of input data if ((ix < 0) || (ix >= nxgrid) || (iy < 0) || (iy >= nygrid) || (iz < 0) || (iz >= nzgrid)) @@ -642,8 +633,10 @@ void FixTTMThermal::read_electron_temperatures(const std::string &filename) memory->destroy(T_initial_set); } + MPI_Bcast(&T_electron[0][0][0],ngridtotal,MPI_DOUBLE,0,world); } + /* ---------------------------------------------------------------------- write out current electron temperatures to user-specified file only written by proc 0 @@ -656,7 +649,7 @@ void FixTTMThermal::write_electron_temperatures(const std::string &filename) FILE *fp = fopen(filename.c_str(),"w"); if (!fp) error->one(FLERR,"Fix ttm could not open output file {}: {}", filename,utils::getsyserror()); - fmt::print(fp,"# DATE: {} UNITS: {} COMMENT: Electron temperature " + utils::print(fp,"# DATE: {} UNITS: {} COMMENT: Electron temperature " "{}x{}x{} grid at step {}. Created by fix {}\n #Grid X,Y,Z Temperature\n", utils::current_date(), update->unit_style, nxgrid, nygrid, nzgrid, update->ntimestep, style); @@ -714,7 +707,7 @@ void FixTTMThermal::write_restart(FILE *fp) void FixTTMThermal::restart(char *buf) { int n = 0; - auto rlist = (double *) buf; + auto *rlist = (double *) buf; // check that restart grid size is same as current grid size From 795b91e2d36525f6448637cc153a9ec44949bd2c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:45:14 -0400 Subject: [PATCH 041/604] add version tag --- doc/src/fix_ttm.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/fix_ttm.rst b/doc/src/fix_ttm.rst index 1fce1936405..87147362a81 100644 --- a/doc/src/fix_ttm.rst +++ b/doc/src/fix_ttm.rst @@ -123,6 +123,8 @@ expensive computationally than fix *ttm* because it treats the thermal diffusion equation as non-linear. More details on fix *ttm/mod* are given below. +.. versionadded:: TBD + Fix *ttm/thermal* allows for electronic properties to be assigned independently to each TTM grid point and supports external heat sources to the electronic subsystem. More details on fix *ttm/thermal* are From 37275316fb6afe4376023bc067b9503241408541 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 11 Aug 2025 19:58:18 -0400 Subject: [PATCH 042/604] fix typo --- src/EXTRA-FIX/fix_ttm_thermal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp index 700042ead76..3803204eb61 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -168,7 +168,7 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : FixTTMThermal::~FixTTMThermal() { delete[] infile; - delete[] e_propert_file; + delete[] e_property_file; delete random; From d30f24aa9bdfa918439d0f367b9bcd095540cbfa Mon Sep 17 00:00:00 2001 From: user Date: Tue, 12 Aug 2025 14:51:15 -0500 Subject: [PATCH 043/604] added missing example file --- examples/ttm/props.in | 1001 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1001 insertions(+) create mode 100755 examples/ttm/props.in diff --git a/examples/ttm/props.in b/examples/ttm/props.in new file mode 100755 index 00000000000..d87ca60e3a5 --- /dev/null +++ b/examples/ttm/props.in @@ -0,0 +1,1001 @@ +#Nx Ny Nz C_vol k_e Gama_p Eta +1 1 1 1.09E-06 0.005365 29.5917 1 +2 1 1 1.09E-06 0.005365 29.5917 1 +3 1 1 1.09E-06 0.005365 29.5917 1 +4 1 1 1.09E-06 0.005365 29.5917 1 +5 1 1 1.09E-06 0.005365 29.5917 1 +6 1 1 1.09E-06 0.005365 29.5917 1 +7 1 1 1.09E-06 0.005365 29.5917 1 +8 1 1 1.09E-06 0.005365 29.5917 1 +9 1 1 1.09E-06 0.005365 29.5917 1 +10 1 1 1.09E-06 0.005365 29.5917 1 +1 2 1 1.09E-06 0.005365 29.5917 1 +2 2 1 1.09E-06 0.005365 29.5917 1 +3 2 1 1.09E-06 0.005365 29.5917 1 +4 2 1 1.09E-06 0.005365 29.5917 1 +5 2 1 1.09E-06 0.005365 29.5917 1 +6 2 1 1.09E-06 0.005365 29.5917 1 +7 2 1 1.09E-06 0.005365 29.5917 1 +8 2 1 1.09E-06 0.005365 29.5917 1 +9 2 1 1.09E-06 0.005365 29.5917 1 +10 2 1 1.09E-06 0.005365 29.5917 1 +1 3 1 1.09E-06 0.005365 29.5917 1 +2 3 1 1.09E-06 0.005365 29.5917 1 +3 3 1 1.09E-06 0.005365 29.5917 1 +4 3 1 1.09E-06 0.005365 29.5917 1 +5 3 1 1.09E-06 0.005365 29.5917 1 +6 3 1 1.09E-06 0.005365 29.5917 1 +7 3 1 1.09E-06 0.005365 29.5917 1 +8 3 1 1.09E-06 0.005365 29.5917 1 +9 3 1 1.09E-06 0.005365 29.5917 1 +10 3 1 1.09E-06 0.005365 29.5917 1 +1 4 1 1.09E-06 0.005365 29.5917 1 +2 4 1 1.09E-06 0.005365 29.5917 1 +3 4 1 1.09E-06 0.005365 29.5917 1 +4 4 1 1.09E-06 0.005365 29.5917 1 +5 4 1 1.09E-06 0.005365 29.5917 1 +6 4 1 1.09E-06 0.005365 29.5917 1 +7 4 1 1.09E-06 0.005365 29.5917 1 +8 4 1 1.09E-06 0.005365 29.5917 1 +9 4 1 1.09E-06 0.005365 29.5917 1 +10 4 1 1.09E-06 0.005365 29.5917 1 +1 5 1 1.09E-06 0.005365 29.5917 1 +2 5 1 1.09E-06 0.005365 29.5917 1 +3 5 1 1.09E-06 0.005365 29.5917 1 +4 5 1 1.09E-06 0.005365 29.5917 1 +5 5 1 1.09E-06 0.005365 29.5917 1 +6 5 1 1.09E-06 0.005365 29.5917 1 +7 5 1 1.09E-06 0.005365 29.5917 1 +8 5 1 1.09E-06 0.005365 29.5917 1 +9 5 1 1.09E-06 0.005365 29.5917 1 +10 5 1 1.09E-06 0.005365 29.5917 1 +1 6 1 1.09E-06 0.005365 29.5917 1 +2 6 1 1.09E-06 0.005365 29.5917 1 +3 6 1 1.09E-06 0.005365 29.5917 1 +4 6 1 1.09E-06 0.005365 29.5917 1 +5 6 1 1.09E-06 0.005365 29.5917 1 +6 6 1 1.09E-06 0.005365 29.5917 1 +7 6 1 1.09E-06 0.005365 29.5917 1 +8 6 1 1.09E-06 0.005365 29.5917 1 +9 6 1 1.09E-06 0.005365 29.5917 1 +10 6 1 1.09E-06 0.005365 29.5917 1 +1 7 1 1.09E-06 0.005365 29.5917 1 +2 7 1 1.09E-06 0.005365 29.5917 1 +3 7 1 1.09E-06 0.005365 29.5917 1 +4 7 1 1.09E-06 0.005365 29.5917 1 +5 7 1 1.09E-06 0.005365 29.5917 1 +6 7 1 1.09E-06 0.005365 29.5917 1 +7 7 1 1.09E-06 0.005365 29.5917 1 +8 7 1 1.09E-06 0.005365 29.5917 1 +9 7 1 1.09E-06 0.005365 29.5917 1 +10 7 1 1.09E-06 0.005365 29.5917 1 +1 8 1 1.09E-06 0.005365 29.5917 1 +2 8 1 1.09E-06 0.005365 29.5917 1 +3 8 1 1.09E-06 0.005365 29.5917 1 +4 8 1 1.09E-06 0.005365 29.5917 1 +5 8 1 1.09E-06 0.005365 29.5917 1 +6 8 1 1.09E-06 0.005365 29.5917 1 +7 8 1 1.09E-06 0.005365 29.5917 1 +8 8 1 1.09E-06 0.005365 29.5917 1 +9 8 1 1.09E-06 0.005365 29.5917 1 +10 8 1 1.09E-06 0.005365 29.5917 1 +1 9 1 1.09E-06 0.005365 29.5917 1 +2 9 1 1.09E-06 0.005365 29.5917 1 +3 9 1 1.09E-06 0.005365 29.5917 1 +4 9 1 1.09E-06 0.005365 29.5917 1 +5 9 1 1.09E-06 0.005365 29.5917 1 +6 9 1 1.09E-06 0.005365 29.5917 1 +7 9 1 1.09E-06 0.005365 29.5917 1 +8 9 1 1.09E-06 0.005365 29.5917 1 +9 9 1 1.09E-06 0.005365 29.5917 1 +10 9 1 1.09E-06 0.005365 29.5917 1 +1 10 1 1.09E-06 0.005365 29.5917 1 +2 10 1 1.09E-06 0.005365 29.5917 1 +3 10 1 1.09E-06 0.005365 29.5917 1 +4 10 1 1.09E-06 0.005365 29.5917 1 +5 10 1 1.09E-06 0.005365 29.5917 1 +6 10 1 1.09E-06 0.005365 29.5917 1 +7 10 1 1.09E-06 0.005365 29.5917 1 +8 10 1 1.09E-06 0.005365 29.5917 1 +9 10 1 1.09E-06 0.005365 29.5917 1 +10 10 1 1.09E-06 0.005365 29.5917 1 +1 1 2 1.09E-06 0.005365 29.5917 1 +2 1 2 1.09E-06 0.005365 29.5917 1 +3 1 2 1.09E-06 0.005365 29.5917 1 +4 1 2 1.09E-06 0.005365 29.5917 1 +5 1 2 1.09E-06 0.005365 29.5917 1 +6 1 2 1.09E-06 0.005365 29.5917 1 +7 1 2 1.09E-06 0.005365 29.5917 1 +8 1 2 1.09E-06 0.005365 29.5917 1 +9 1 2 1.09E-06 0.005365 29.5917 1 +10 1 2 1.09E-06 0.005365 29.5917 1 +1 2 2 1.09E-06 0.005365 29.5917 1 +2 2 2 1.09E-06 0.005365 29.5917 1 +3 2 2 1.09E-06 0.005365 29.5917 1 +4 2 2 1.09E-06 0.005365 29.5917 1 +5 2 2 1.09E-06 0.005365 29.5917 1 +6 2 2 1.09E-06 0.005365 29.5917 1 +7 2 2 1.09E-06 0.005365 29.5917 1 +8 2 2 1.09E-06 0.005365 29.5917 1 +9 2 2 1.09E-06 0.005365 29.5917 1 +10 2 2 1.09E-06 0.005365 29.5917 1 +1 3 2 1.09E-06 0.005365 29.5917 1 +2 3 2 1.09E-06 0.005365 29.5917 1 +3 3 2 1.09E-06 0.005365 29.5917 1 +4 3 2 1.09E-06 0.005365 29.5917 1 +5 3 2 1.09E-06 0.005365 29.5917 1 +6 3 2 1.09E-06 0.005365 29.5917 1 +7 3 2 1.09E-06 0.005365 29.5917 1 +8 3 2 1.09E-06 0.005365 29.5917 1 +9 3 2 1.09E-06 0.005365 29.5917 1 +10 3 2 1.09E-06 0.005365 29.5917 1 +1 4 2 1.09E-06 0.005365 29.5917 1 +2 4 2 1.09E-06 0.005365 29.5917 1 +3 4 2 1.09E-06 0.005365 29.5917 1 +4 4 2 1.09E-06 0.005365 29.5917 1 +5 4 2 1.09E-06 0.005365 29.5917 1 +6 4 2 1.09E-06 0.005365 29.5917 1 +7 4 2 1.09E-06 0.005365 29.5917 1 +8 4 2 1.09E-06 0.005365 29.5917 1 +9 4 2 1.09E-06 0.005365 29.5917 1 +10 4 2 1.09E-06 0.005365 29.5917 1 +1 5 2 1.09E-06 0.005365 29.5917 1 +2 5 2 1.09E-06 0.005365 29.5917 1 +3 5 2 1.09E-06 0.005365 29.5917 1 +4 5 2 1.09E-06 0.005365 29.5917 1 +5 5 2 1.09E-06 0.005365 29.5917 1 +6 5 2 1.09E-06 0.005365 29.5917 1 +7 5 2 1.09E-06 0.005365 29.5917 1 +8 5 2 1.09E-06 0.005365 29.5917 1 +9 5 2 1.09E-06 0.005365 29.5917 1 +10 5 2 1.09E-06 0.005365 29.5917 1 +1 6 2 1.09E-06 0.005365 29.5917 1 +2 6 2 1.09E-06 0.005365 29.5917 1 +3 6 2 1.09E-06 0.005365 29.5917 1 +4 6 2 1.09E-06 0.005365 29.5917 1 +5 6 2 1.09E-06 0.005365 29.5917 1 +6 6 2 1.09E-06 0.005365 29.5917 1 +7 6 2 1.09E-06 0.005365 29.5917 1 +8 6 2 1.09E-06 0.005365 29.5917 1 +9 6 2 1.09E-06 0.005365 29.5917 1 +10 6 2 1.09E-06 0.005365 29.5917 1 +1 7 2 1.09E-06 0.005365 29.5917 1 +2 7 2 1.09E-06 0.005365 29.5917 1 +3 7 2 1.09E-06 0.005365 29.5917 1 +4 7 2 1.09E-06 0.005365 29.5917 1 +5 7 2 1.09E-06 0.005365 29.5917 1 +6 7 2 1.09E-06 0.005365 29.5917 1 +7 7 2 1.09E-06 0.005365 29.5917 1 +8 7 2 1.09E-06 0.005365 29.5917 1 +9 7 2 1.09E-06 0.005365 29.5917 1 +10 7 2 1.09E-06 0.005365 29.5917 1 +1 8 2 1.09E-06 0.005365 29.5917 1 +2 8 2 1.09E-06 0.005365 29.5917 1 +3 8 2 1.09E-06 0.005365 29.5917 1 +4 8 2 1.09E-06 0.005365 29.5917 1 +5 8 2 1.09E-06 0.005365 29.5917 1 +6 8 2 1.09E-06 0.005365 29.5917 1 +7 8 2 1.09E-06 0.005365 29.5917 1 +8 8 2 1.09E-06 0.005365 29.5917 1 +9 8 2 1.09E-06 0.005365 29.5917 1 +10 8 2 1.09E-06 0.005365 29.5917 1 +1 9 2 1.09E-06 0.005365 29.5917 1 +2 9 2 1.09E-06 0.005365 29.5917 1 +3 9 2 1.09E-06 0.005365 29.5917 1 +4 9 2 1.09E-06 0.005365 29.5917 1 +5 9 2 1.09E-06 0.005365 29.5917 1 +6 9 2 1.09E-06 0.005365 29.5917 1 +7 9 2 1.09E-06 0.005365 29.5917 1 +8 9 2 1.09E-06 0.005365 29.5917 1 +9 9 2 1.09E-06 0.005365 29.5917 1 +10 9 2 1.09E-06 0.005365 29.5917 1 +1 10 2 1.09E-06 0.005365 29.5917 1 +2 10 2 1.09E-06 0.005365 29.5917 1 +3 10 2 1.09E-06 0.005365 29.5917 1 +4 10 2 1.09E-06 0.005365 29.5917 1 +5 10 2 1.09E-06 0.005365 29.5917 1 +6 10 2 1.09E-06 0.005365 29.5917 1 +7 10 2 1.09E-06 0.005365 29.5917 1 +8 10 2 1.09E-06 0.005365 29.5917 1 +9 10 2 1.09E-06 0.005365 29.5917 1 +10 10 2 1.09E-06 0.005365 29.5917 1 +1 1 3 1.09E-06 0.005365 29.5917 1 +2 1 3 1.09E-06 0.005365 29.5917 1 +3 1 3 1.09E-06 0.005365 29.5917 1 +4 1 3 1.09E-06 0.005365 29.5917 1 +5 1 3 1.09E-06 0.005365 29.5917 1 +6 1 3 1.09E-06 0.005365 29.5917 1 +7 1 3 1.09E-06 0.005365 29.5917 1 +8 1 3 1.09E-06 0.005365 29.5917 1 +9 1 3 1.09E-06 0.005365 29.5917 1 +10 1 3 1.09E-06 0.005365 29.5917 1 +1 2 3 1.09E-06 0.005365 29.5917 1 +2 2 3 1.09E-06 0.005365 29.5917 1 +3 2 3 1.09E-06 0.005365 29.5917 1 +4 2 3 1.09E-06 0.005365 29.5917 1 +5 2 3 1.09E-06 0.005365 29.5917 1 +6 2 3 1.09E-06 0.005365 29.5917 1 +7 2 3 1.09E-06 0.005365 29.5917 1 +8 2 3 1.09E-06 0.005365 29.5917 1 +9 2 3 1.09E-06 0.005365 29.5917 1 +10 2 3 1.09E-06 0.005365 29.5917 1 +1 3 3 1.09E-06 0.005365 29.5917 1 +2 3 3 1.09E-06 0.005365 29.5917 1 +3 3 3 1.09E-06 0.005365 29.5917 1 +4 3 3 1.09E-06 0.005365 29.5917 1 +5 3 3 1.09E-06 0.005365 29.5917 1 +6 3 3 1.09E-06 0.005365 29.5917 1 +7 3 3 1.09E-06 0.005365 29.5917 1 +8 3 3 1.09E-06 0.005365 29.5917 1 +9 3 3 1.09E-06 0.005365 29.5917 1 +10 3 3 1.09E-06 0.005365 29.5917 1 +1 4 3 1.09E-06 0.005365 29.5917 1 +2 4 3 1.09E-06 0.005365 29.5917 1 +3 4 3 1.09E-06 0.005365 29.5917 1 +4 4 3 1.09E-06 0.005365 29.5917 1 +5 4 3 1.09E-06 0.005365 29.5917 1 +6 4 3 1.09E-06 0.005365 29.5917 1 +7 4 3 1.09E-06 0.005365 29.5917 1 +8 4 3 1.09E-06 0.005365 29.5917 1 +9 4 3 1.09E-06 0.005365 29.5917 1 +10 4 3 1.09E-06 0.005365 29.5917 1 +1 5 3 1.09E-06 0.005365 29.5917 1 +2 5 3 1.09E-06 0.005365 29.5917 1 +3 5 3 1.09E-06 0.005365 29.5917 1 +4 5 3 1.09E-06 0.005365 29.5917 1 +5 5 3 1.09E-06 0.005365 29.5917 1 +6 5 3 1.09E-06 0.005365 29.5917 1 +7 5 3 1.09E-06 0.005365 29.5917 1 +8 5 3 1.09E-06 0.005365 29.5917 1 +9 5 3 1.09E-06 0.005365 29.5917 1 +10 5 3 1.09E-06 0.005365 29.5917 1 +1 6 3 1.09E-06 0.005365 29.5917 1 +2 6 3 1.09E-06 0.005365 29.5917 1 +3 6 3 1.09E-06 0.005365 29.5917 1 +4 6 3 1.09E-06 0.005365 29.5917 1 +5 6 3 1.09E-06 0.005365 29.5917 1 +6 6 3 1.09E-06 0.005365 29.5917 1 +7 6 3 1.09E-06 0.005365 29.5917 1 +8 6 3 1.09E-06 0.005365 29.5917 1 +9 6 3 1.09E-06 0.005365 29.5917 1 +10 6 3 1.09E-06 0.005365 29.5917 1 +1 7 3 1.09E-06 0.005365 29.5917 1 +2 7 3 1.09E-06 0.005365 29.5917 1 +3 7 3 1.09E-06 0.005365 29.5917 1 +4 7 3 1.09E-06 0.005365 29.5917 1 +5 7 3 1.09E-06 0.005365 29.5917 1 +6 7 3 1.09E-06 0.005365 29.5917 1 +7 7 3 1.09E-06 0.005365 29.5917 1 +8 7 3 1.09E-06 0.005365 29.5917 1 +9 7 3 1.09E-06 0.005365 29.5917 1 +10 7 3 1.09E-06 0.005365 29.5917 1 +1 8 3 1.09E-06 0.005365 29.5917 1 +2 8 3 1.09E-06 0.005365 29.5917 1 +3 8 3 1.09E-06 0.005365 29.5917 1 +4 8 3 1.09E-06 0.005365 29.5917 1 +5 8 3 1.09E-06 0.005365 29.5917 1 +6 8 3 1.09E-06 0.005365 29.5917 1 +7 8 3 1.09E-06 0.005365 29.5917 1 +8 8 3 1.09E-06 0.005365 29.5917 1 +9 8 3 1.09E-06 0.005365 29.5917 1 +10 8 3 1.09E-06 0.005365 29.5917 1 +1 9 3 1.09E-06 0.005365 29.5917 1 +2 9 3 1.09E-06 0.005365 29.5917 1 +3 9 3 1.09E-06 0.005365 29.5917 1 +4 9 3 1.09E-06 0.005365 29.5917 1 +5 9 3 1.09E-06 0.005365 29.5917 1 +6 9 3 1.09E-06 0.005365 29.5917 1 +7 9 3 1.09E-06 0.005365 29.5917 1 +8 9 3 1.09E-06 0.005365 29.5917 1 +9 9 3 1.09E-06 0.005365 29.5917 1 +10 9 3 1.09E-06 0.005365 29.5917 1 +1 10 3 1.09E-06 0.005365 29.5917 1 +2 10 3 1.09E-06 0.005365 29.5917 1 +3 10 3 1.09E-06 0.005365 29.5917 1 +4 10 3 1.09E-06 0.005365 29.5917 1 +5 10 3 1.09E-06 0.005365 29.5917 1 +6 10 3 1.09E-06 0.005365 29.5917 1 +7 10 3 1.09E-06 0.005365 29.5917 1 +8 10 3 1.09E-06 0.005365 29.5917 1 +9 10 3 1.09E-06 0.005365 29.5917 1 +10 10 3 1.09E-06 0.005365 29.5917 1 +1 1 4 1.09E-06 0.005365 29.5917 1 +2 1 4 1.09E-06 0.005365 29.5917 1 +3 1 4 1.09E-06 0.005365 29.5917 1 +4 1 4 1.09E-06 0.005365 29.5917 1 +5 1 4 1.09E-06 0.005365 29.5917 1 +6 1 4 1.09E-06 0.005365 29.5917 1 +7 1 4 1.09E-06 0.005365 29.5917 1 +8 1 4 1.09E-06 0.005365 29.5917 1 +9 1 4 1.09E-06 0.005365 29.5917 1 +10 1 4 1.09E-06 0.005365 29.5917 1 +1 2 4 1.09E-06 0.005365 29.5917 1 +2 2 4 1.09E-06 0.005365 29.5917 1 +3 2 4 1.09E-06 0.005365 29.5917 1 +4 2 4 1.09E-06 0.005365 29.5917 1 +5 2 4 1.09E-06 0.005365 29.5917 1 +6 2 4 1.09E-06 0.005365 29.5917 1 +7 2 4 1.09E-06 0.005365 29.5917 1 +8 2 4 1.09E-06 0.005365 29.5917 1 +9 2 4 1.09E-06 0.005365 29.5917 1 +10 2 4 1.09E-06 0.005365 29.5917 1 +1 3 4 1.09E-06 0.005365 29.5917 1 +2 3 4 1.09E-06 0.005365 29.5917 1 +3 3 4 1.09E-06 0.005365 29.5917 1 +4 3 4 1.09E-06 0.005365 29.5917 1 +5 3 4 1.09E-06 0.005365 29.5917 1 +6 3 4 1.09E-06 0.005365 29.5917 1 +7 3 4 1.09E-06 0.005365 29.5917 1 +8 3 4 1.09E-06 0.005365 29.5917 1 +9 3 4 1.09E-06 0.005365 29.5917 1 +10 3 4 1.09E-06 0.005365 29.5917 1 +1 4 4 1.09E-06 0.005365 29.5917 1 +2 4 4 1.09E-06 0.005365 29.5917 1 +3 4 4 1.09E-06 0.005365 29.5917 1 +4 4 4 1.09E-06 0.005365 29.5917 1 +5 4 4 1.09E-06 0.005365 29.5917 1 +6 4 4 1.09E-06 0.005365 29.5917 1 +7 4 4 1.09E-06 0.005365 29.5917 1 +8 4 4 1.09E-06 0.005365 29.5917 1 +9 4 4 1.09E-06 0.005365 29.5917 1 +10 4 4 1.09E-06 0.005365 29.5917 1 +1 5 4 1.09E-06 0.005365 29.5917 1 +2 5 4 1.09E-06 0.005365 29.5917 1 +3 5 4 1.09E-06 0.005365 29.5917 1 +4 5 4 1.09E-06 0.005365 29.5917 1 +5 5 4 1.09E-06 0.005365 29.5917 1 +6 5 4 1.09E-06 0.005365 29.5917 1 +7 5 4 1.09E-06 0.005365 29.5917 1 +8 5 4 1.09E-06 0.005365 29.5917 1 +9 5 4 1.09E-06 0.005365 29.5917 1 +10 5 4 1.09E-06 0.005365 29.5917 1 +1 6 4 1.09E-06 0.005365 29.5917 1 +2 6 4 1.09E-06 0.005365 29.5917 1 +3 6 4 1.09E-06 0.005365 29.5917 1 +4 6 4 1.09E-06 0.005365 29.5917 1 +5 6 4 1.09E-06 0.005365 29.5917 1 +6 6 4 1.09E-06 0.005365 29.5917 1 +7 6 4 1.09E-06 0.005365 29.5917 1 +8 6 4 1.09E-06 0.005365 29.5917 1 +9 6 4 1.09E-06 0.005365 29.5917 1 +10 6 4 1.09E-06 0.005365 29.5917 1 +1 7 4 1.09E-06 0.005365 29.5917 1 +2 7 4 1.09E-06 0.005365 29.5917 1 +3 7 4 1.09E-06 0.005365 29.5917 1 +4 7 4 1.09E-06 0.005365 29.5917 1 +5 7 4 1.09E-06 0.005365 29.5917 1 +6 7 4 1.09E-06 0.005365 29.5917 1 +7 7 4 1.09E-06 0.005365 29.5917 1 +8 7 4 1.09E-06 0.005365 29.5917 1 +9 7 4 1.09E-06 0.005365 29.5917 1 +10 7 4 1.09E-06 0.005365 29.5917 1 +1 8 4 1.09E-06 0.005365 29.5917 1 +2 8 4 1.09E-06 0.005365 29.5917 1 +3 8 4 1.09E-06 0.005365 29.5917 1 +4 8 4 1.09E-06 0.005365 29.5917 1 +5 8 4 1.09E-06 0.005365 29.5917 1 +6 8 4 1.09E-06 0.005365 29.5917 1 +7 8 4 1.09E-06 0.005365 29.5917 1 +8 8 4 1.09E-06 0.005365 29.5917 1 +9 8 4 1.09E-06 0.005365 29.5917 1 +10 8 4 1.09E-06 0.005365 29.5917 1 +1 9 4 1.09E-06 0.005365 29.5917 1 +2 9 4 1.09E-06 0.005365 29.5917 1 +3 9 4 1.09E-06 0.005365 29.5917 1 +4 9 4 1.09E-06 0.005365 29.5917 1 +5 9 4 1.09E-06 0.005365 29.5917 1 +6 9 4 1.09E-06 0.005365 29.5917 1 +7 9 4 1.09E-06 0.005365 29.5917 1 +8 9 4 1.09E-06 0.005365 29.5917 1 +9 9 4 1.09E-06 0.005365 29.5917 1 +10 9 4 1.09E-06 0.005365 29.5917 1 +1 10 4 1.09E-06 0.005365 29.5917 1 +2 10 4 1.09E-06 0.005365 29.5917 1 +3 10 4 1.09E-06 0.005365 29.5917 1 +4 10 4 1.09E-06 0.005365 29.5917 1 +5 10 4 1.09E-06 0.005365 29.5917 1 +6 10 4 1.09E-06 0.005365 29.5917 1 +7 10 4 1.09E-06 0.005365 29.5917 1 +8 10 4 1.09E-06 0.005365 29.5917 1 +9 10 4 1.09E-06 0.005365 29.5917 1 +10 10 4 1.09E-06 0.005365 29.5917 1 +1 1 5 1.09E-06 0.005365 29.5917 1 +2 1 5 1.09E-06 0.005365 29.5917 1 +3 1 5 1.09E-06 0.005365 29.5917 1 +4 1 5 1.09E-06 0.005365 29.5917 1 +5 1 5 1.09E-06 0.005365 29.5917 1 +6 1 5 1.09E-06 0.005365 29.5917 1 +7 1 5 1.09E-06 0.005365 29.5917 1 +8 1 5 1.09E-06 0.005365 29.5917 1 +9 1 5 1.09E-06 0.005365 29.5917 1 +10 1 5 1.09E-06 0.005365 29.5917 1 +1 2 5 1.09E-06 0.005365 29.5917 1 +2 2 5 1.09E-06 0.005365 29.5917 1 +3 2 5 1.09E-06 0.005365 29.5917 1 +4 2 5 1.09E-06 0.005365 29.5917 1 +5 2 5 1.09E-06 0.005365 29.5917 1 +6 2 5 1.09E-06 0.005365 29.5917 1 +7 2 5 1.09E-06 0.005365 29.5917 1 +8 2 5 1.09E-06 0.005365 29.5917 1 +9 2 5 1.09E-06 0.005365 29.5917 1 +10 2 5 1.09E-06 0.005365 29.5917 1 +1 3 5 1.09E-06 0.005365 29.5917 1 +2 3 5 1.09E-06 0.005365 29.5917 1 +3 3 5 1.09E-06 0.005365 29.5917 1 +4 3 5 1.09E-06 0.005365 29.5917 1 +5 3 5 1.09E-06 0.005365 29.5917 1 +6 3 5 1.09E-06 0.005365 29.5917 1 +7 3 5 1.09E-06 0.005365 29.5917 1 +8 3 5 1.09E-06 0.005365 29.5917 1 +9 3 5 1.09E-06 0.005365 29.5917 1 +10 3 5 1.09E-06 0.005365 29.5917 1 +1 4 5 1.09E-06 0.005365 29.5917 1 +2 4 5 1.09E-06 0.005365 29.5917 1 +3 4 5 1.09E-06 0.005365 29.5917 1 +4 4 5 1.09E-06 0.005365 29.5917 1 +5 4 5 1.09E-06 0.005365 29.5917 1 +6 4 5 1.09E-06 0.005365 29.5917 1 +7 4 5 1.09E-06 0.005365 29.5917 1 +8 4 5 1.09E-06 0.005365 29.5917 1 +9 4 5 1.09E-06 0.005365 29.5917 1 +10 4 5 1.09E-06 0.005365 29.5917 1 +1 5 5 1.09E-06 0.005365 29.5917 1 +2 5 5 1.09E-06 0.005365 29.5917 1 +3 5 5 1.09E-06 0.005365 29.5917 1 +4 5 5 1.09E-06 0.005365 29.5917 1 +5 5 5 1.09E-06 0.005365 29.5917 1 +6 5 5 1.09E-06 0.005365 29.5917 1 +7 5 5 1.09E-06 0.005365 29.5917 1 +8 5 5 1.09E-06 0.005365 29.5917 1 +9 5 5 1.09E-06 0.005365 29.5917 1 +10 5 5 1.09E-06 0.005365 29.5917 1 +1 6 5 1.09E-06 0.005365 29.5917 1 +2 6 5 1.09E-06 0.005365 29.5917 1 +3 6 5 1.09E-06 0.005365 29.5917 1 +4 6 5 1.09E-06 0.005365 29.5917 1 +5 6 5 1.09E-06 0.005365 29.5917 1 +6 6 5 1.09E-06 0.005365 29.5917 1 +7 6 5 1.09E-06 0.005365 29.5917 1 +8 6 5 1.09E-06 0.005365 29.5917 1 +9 6 5 1.09E-06 0.005365 29.5917 1 +10 6 5 1.09E-06 0.005365 29.5917 1 +1 7 5 1.09E-06 0.005365 29.5917 1 +2 7 5 1.09E-06 0.005365 29.5917 1 +3 7 5 1.09E-06 0.005365 29.5917 1 +4 7 5 1.09E-06 0.005365 29.5917 1 +5 7 5 1.09E-06 0.005365 29.5917 1 +6 7 5 1.09E-06 0.005365 29.5917 1 +7 7 5 1.09E-06 0.005365 29.5917 1 +8 7 5 1.09E-06 0.005365 29.5917 1 +9 7 5 1.09E-06 0.005365 29.5917 1 +10 7 5 1.09E-06 0.005365 29.5917 1 +1 8 5 1.09E-06 0.005365 29.5917 1 +2 8 5 1.09E-06 0.005365 29.5917 1 +3 8 5 1.09E-06 0.005365 29.5917 1 +4 8 5 1.09E-06 0.005365 29.5917 1 +5 8 5 1.09E-06 0.005365 29.5917 1 +6 8 5 1.09E-06 0.005365 29.5917 1 +7 8 5 1.09E-06 0.005365 29.5917 1 +8 8 5 1.09E-06 0.005365 29.5917 1 +9 8 5 1.09E-06 0.005365 29.5917 1 +10 8 5 1.09E-06 0.005365 29.5917 1 +1 9 5 1.09E-06 0.005365 29.5917 1 +2 9 5 1.09E-06 0.005365 29.5917 1 +3 9 5 1.09E-06 0.005365 29.5917 1 +4 9 5 1.09E-06 0.005365 29.5917 1 +5 9 5 1.09E-06 0.005365 29.5917 1 +6 9 5 1.09E-06 0.005365 29.5917 1 +7 9 5 1.09E-06 0.005365 29.5917 1 +8 9 5 1.09E-06 0.005365 29.5917 1 +9 9 5 1.09E-06 0.005365 29.5917 1 +10 9 5 1.09E-06 0.005365 29.5917 1 +1 10 5 1.09E-06 0.005365 29.5917 1 +2 10 5 1.09E-06 0.005365 29.5917 1 +3 10 5 1.09E-06 0.005365 29.5917 1 +4 10 5 1.09E-06 0.005365 29.5917 1 +5 10 5 1.09E-06 0.005365 29.5917 1 +6 10 5 1.09E-06 0.005365 29.5917 1 +7 10 5 1.09E-06 0.005365 29.5917 1 +8 10 5 1.09E-06 0.005365 29.5917 1 +9 10 5 1.09E-06 0.005365 29.5917 1 +10 10 5 1.09E-06 0.005365 29.5917 1 +1 1 6 1.09E-06 0.005365 29.5917 1 +2 1 6 1.09E-06 0.005365 29.5917 1 +3 1 6 1.09E-06 0.005365 29.5917 1 +4 1 6 1.09E-06 0.005365 29.5917 1 +5 1 6 1.09E-06 0.005365 29.5917 1 +6 1 6 1.09E-06 0.005365 29.5917 1 +7 1 6 1.09E-06 0.005365 29.5917 1 +8 1 6 1.09E-06 0.005365 29.5917 1 +9 1 6 1.09E-06 0.005365 29.5917 1 +10 1 6 1.09E-06 0.005365 29.5917 1 +1 2 6 1.09E-06 0.005365 29.5917 1 +2 2 6 1.09E-06 0.005365 29.5917 1 +3 2 6 1.09E-06 0.005365 29.5917 1 +4 2 6 1.09E-06 0.005365 29.5917 1 +5 2 6 1.09E-06 0.005365 29.5917 1 +6 2 6 1.09E-06 0.005365 29.5917 1 +7 2 6 1.09E-06 0.005365 29.5917 1 +8 2 6 1.09E-06 0.005365 29.5917 1 +9 2 6 1.09E-06 0.005365 29.5917 1 +10 2 6 1.09E-06 0.005365 29.5917 1 +1 3 6 1.09E-06 0.005365 29.5917 1 +2 3 6 1.09E-06 0.005365 29.5917 1 +3 3 6 1.09E-06 0.005365 29.5917 1 +4 3 6 1.09E-06 0.005365 29.5917 1 +5 3 6 1.09E-06 0.005365 29.5917 1 +6 3 6 1.09E-06 0.005365 29.5917 1 +7 3 6 1.09E-06 0.005365 29.5917 1 +8 3 6 1.09E-06 0.005365 29.5917 1 +9 3 6 1.09E-06 0.005365 29.5917 1 +10 3 6 1.09E-06 0.005365 29.5917 1 +1 4 6 1.09E-06 0.005365 29.5917 1 +2 4 6 1.09E-06 0.005365 29.5917 1 +3 4 6 1.09E-06 0.005365 29.5917 1 +4 4 6 1.09E-06 0.005365 29.5917 1 +5 4 6 1.09E-06 0.005365 29.5917 1 +6 4 6 1.09E-06 0.005365 29.5917 1 +7 4 6 1.09E-06 0.005365 29.5917 1 +8 4 6 1.09E-06 0.005365 29.5917 1 +9 4 6 1.09E-06 0.005365 29.5917 1 +10 4 6 1.09E-06 0.005365 29.5917 1 +1 5 6 1.09E-06 0.005365 29.5917 1 +2 5 6 1.09E-06 0.005365 29.5917 1 +3 5 6 1.09E-06 0.005365 29.5917 1 +4 5 6 1.09E-06 0.005365 29.5917 1 +5 5 6 1.09E-06 0.005365 29.5917 1 +6 5 6 1.09E-06 0.005365 29.5917 1 +7 5 6 1.09E-06 0.005365 29.5917 1 +8 5 6 1.09E-06 0.005365 29.5917 1 +9 5 6 1.09E-06 0.005365 29.5917 1 +10 5 6 1.09E-06 0.005365 29.5917 1 +1 6 6 1.09E-06 0.005365 29.5917 1 +2 6 6 1.09E-06 0.005365 29.5917 1 +3 6 6 1.09E-06 0.005365 29.5917 1 +4 6 6 1.09E-06 0.005365 29.5917 1 +5 6 6 1.09E-06 0.005365 29.5917 1 +6 6 6 1.09E-06 0.005365 29.5917 1 +7 6 6 1.09E-06 0.005365 29.5917 1 +8 6 6 1.09E-06 0.005365 29.5917 1 +9 6 6 1.09E-06 0.005365 29.5917 1 +10 6 6 1.09E-06 0.005365 29.5917 1 +1 7 6 1.09E-06 0.005365 29.5917 1 +2 7 6 1.09E-06 0.005365 29.5917 1 +3 7 6 1.09E-06 0.005365 29.5917 1 +4 7 6 1.09E-06 0.005365 29.5917 1 +5 7 6 1.09E-06 0.005365 29.5917 1 +6 7 6 1.09E-06 0.005365 29.5917 1 +7 7 6 1.09E-06 0.005365 29.5917 1 +8 7 6 1.09E-06 0.005365 29.5917 1 +9 7 6 1.09E-06 0.005365 29.5917 1 +10 7 6 1.09E-06 0.005365 29.5917 1 +1 8 6 1.09E-06 0.005365 29.5917 1 +2 8 6 1.09E-06 0.005365 29.5917 1 +3 8 6 1.09E-06 0.005365 29.5917 1 +4 8 6 1.09E-06 0.005365 29.5917 1 +5 8 6 1.09E-06 0.005365 29.5917 1 +6 8 6 1.09E-06 0.005365 29.5917 1 +7 8 6 1.09E-06 0.005365 29.5917 1 +8 8 6 1.09E-06 0.005365 29.5917 1 +9 8 6 1.09E-06 0.005365 29.5917 1 +10 8 6 1.09E-06 0.005365 29.5917 1 +1 9 6 1.09E-06 0.005365 29.5917 1 +2 9 6 1.09E-06 0.005365 29.5917 1 +3 9 6 1.09E-06 0.005365 29.5917 1 +4 9 6 1.09E-06 0.005365 29.5917 1 +5 9 6 1.09E-06 0.005365 29.5917 1 +6 9 6 1.09E-06 0.005365 29.5917 1 +7 9 6 1.09E-06 0.005365 29.5917 1 +8 9 6 1.09E-06 0.005365 29.5917 1 +9 9 6 1.09E-06 0.005365 29.5917 1 +10 9 6 1.09E-06 0.005365 29.5917 1 +1 10 6 1.09E-06 0.005365 29.5917 1 +2 10 6 1.09E-06 0.005365 29.5917 1 +3 10 6 1.09E-06 0.005365 29.5917 1 +4 10 6 1.09E-06 0.005365 29.5917 1 +5 10 6 1.09E-06 0.005365 29.5917 1 +6 10 6 1.09E-06 0.005365 29.5917 1 +7 10 6 1.09E-06 0.005365 29.5917 1 +8 10 6 1.09E-06 0.005365 29.5917 1 +9 10 6 1.09E-06 0.005365 29.5917 1 +10 10 6 1.09E-06 0.005365 29.5917 1 +1 1 7 1.09E-06 0.005365 29.5917 1 +2 1 7 1.09E-06 0.005365 29.5917 1 +3 1 7 1.09E-06 0.005365 29.5917 1 +4 1 7 1.09E-06 0.005365 29.5917 1 +5 1 7 1.09E-06 0.005365 29.5917 1 +6 1 7 1.09E-06 0.005365 29.5917 1 +7 1 7 1.09E-06 0.005365 29.5917 1 +8 1 7 1.09E-06 0.005365 29.5917 1 +9 1 7 1.09E-06 0.005365 29.5917 1 +10 1 7 1.09E-06 0.005365 29.5917 1 +1 2 7 1.09E-06 0.005365 29.5917 1 +2 2 7 1.09E-06 0.005365 29.5917 1 +3 2 7 1.09E-06 0.005365 29.5917 1 +4 2 7 1.09E-06 0.005365 29.5917 1 +5 2 7 1.09E-06 0.005365 29.5917 1 +6 2 7 1.09E-06 0.005365 29.5917 1 +7 2 7 1.09E-06 0.005365 29.5917 1 +8 2 7 1.09E-06 0.005365 29.5917 1 +9 2 7 1.09E-06 0.005365 29.5917 1 +10 2 7 1.09E-06 0.005365 29.5917 1 +1 3 7 1.09E-06 0.005365 29.5917 1 +2 3 7 1.09E-06 0.005365 29.5917 1 +3 3 7 1.09E-06 0.005365 29.5917 1 +4 3 7 1.09E-06 0.005365 29.5917 1 +5 3 7 1.09E-06 0.005365 29.5917 1 +6 3 7 1.09E-06 0.005365 29.5917 1 +7 3 7 1.09E-06 0.005365 29.5917 1 +8 3 7 1.09E-06 0.005365 29.5917 1 +9 3 7 1.09E-06 0.005365 29.5917 1 +10 3 7 1.09E-06 0.005365 29.5917 1 +1 4 7 1.09E-06 0.005365 29.5917 1 +2 4 7 1.09E-06 0.005365 29.5917 1 +3 4 7 1.09E-06 0.005365 29.5917 1 +4 4 7 1.09E-06 0.005365 29.5917 1 +5 4 7 1.09E-06 0.005365 29.5917 1 +6 4 7 1.09E-06 0.005365 29.5917 1 +7 4 7 1.09E-06 0.005365 29.5917 1 +8 4 7 1.09E-06 0.005365 29.5917 1 +9 4 7 1.09E-06 0.005365 29.5917 1 +10 4 7 1.09E-06 0.005365 29.5917 1 +1 5 7 1.09E-06 0.005365 29.5917 1 +2 5 7 1.09E-06 0.005365 29.5917 1 +3 5 7 1.09E-06 0.005365 29.5917 1 +4 5 7 1.09E-06 0.005365 29.5917 1 +5 5 7 1.09E-06 0.005365 29.5917 1 +6 5 7 1.09E-06 0.005365 29.5917 1 +7 5 7 1.09E-06 0.005365 29.5917 1 +8 5 7 1.09E-06 0.005365 29.5917 1 +9 5 7 1.09E-06 0.005365 29.5917 1 +10 5 7 1.09E-06 0.005365 29.5917 1 +1 6 7 1.09E-06 0.005365 29.5917 1 +2 6 7 1.09E-06 0.005365 29.5917 1 +3 6 7 1.09E-06 0.005365 29.5917 1 +4 6 7 1.09E-06 0.005365 29.5917 1 +5 6 7 1.09E-06 0.005365 29.5917 1 +6 6 7 1.09E-06 0.005365 29.5917 1 +7 6 7 1.09E-06 0.005365 29.5917 1 +8 6 7 1.09E-06 0.005365 29.5917 1 +9 6 7 1.09E-06 0.005365 29.5917 1 +10 6 7 1.09E-06 0.005365 29.5917 1 +1 7 7 1.09E-06 0.005365 29.5917 1 +2 7 7 1.09E-06 0.005365 29.5917 1 +3 7 7 1.09E-06 0.005365 29.5917 1 +4 7 7 1.09E-06 0.005365 29.5917 1 +5 7 7 1.09E-06 0.005365 29.5917 1 +6 7 7 1.09E-06 0.005365 29.5917 1 +7 7 7 1.09E-06 0.005365 29.5917 1 +8 7 7 1.09E-06 0.005365 29.5917 1 +9 7 7 1.09E-06 0.005365 29.5917 1 +10 7 7 1.09E-06 0.005365 29.5917 1 +1 8 7 1.09E-06 0.005365 29.5917 1 +2 8 7 1.09E-06 0.005365 29.5917 1 +3 8 7 1.09E-06 0.005365 29.5917 1 +4 8 7 1.09E-06 0.005365 29.5917 1 +5 8 7 1.09E-06 0.005365 29.5917 1 +6 8 7 1.09E-06 0.005365 29.5917 1 +7 8 7 1.09E-06 0.005365 29.5917 1 +8 8 7 1.09E-06 0.005365 29.5917 1 +9 8 7 1.09E-06 0.005365 29.5917 1 +10 8 7 1.09E-06 0.005365 29.5917 1 +1 9 7 1.09E-06 0.005365 29.5917 1 +2 9 7 1.09E-06 0.005365 29.5917 1 +3 9 7 1.09E-06 0.005365 29.5917 1 +4 9 7 1.09E-06 0.005365 29.5917 1 +5 9 7 1.09E-06 0.005365 29.5917 1 +6 9 7 1.09E-06 0.005365 29.5917 1 +7 9 7 1.09E-06 0.005365 29.5917 1 +8 9 7 1.09E-06 0.005365 29.5917 1 +9 9 7 1.09E-06 0.005365 29.5917 1 +10 9 7 1.09E-06 0.005365 29.5917 1 +1 10 7 1.09E-06 0.005365 29.5917 1 +2 10 7 1.09E-06 0.005365 29.5917 1 +3 10 7 1.09E-06 0.005365 29.5917 1 +4 10 7 1.09E-06 0.005365 29.5917 1 +5 10 7 1.09E-06 0.005365 29.5917 1 +6 10 7 1.09E-06 0.005365 29.5917 1 +7 10 7 1.09E-06 0.005365 29.5917 1 +8 10 7 1.09E-06 0.005365 29.5917 1 +9 10 7 1.09E-06 0.005365 29.5917 1 +10 10 7 1.09E-06 0.005365 29.5917 1 +1 1 8 1.09E-06 0.005365 29.5917 1 +2 1 8 1.09E-06 0.005365 29.5917 1 +3 1 8 1.09E-06 0.005365 29.5917 1 +4 1 8 1.09E-06 0.005365 29.5917 1 +5 1 8 1.09E-06 0.005365 29.5917 1 +6 1 8 1.09E-06 0.005365 29.5917 1 +7 1 8 1.09E-06 0.005365 29.5917 1 +8 1 8 1.09E-06 0.005365 29.5917 1 +9 1 8 1.09E-06 0.005365 29.5917 1 +10 1 8 1.09E-06 0.005365 29.5917 1 +1 2 8 1.09E-06 0.005365 29.5917 1 +2 2 8 1.09E-06 0.005365 29.5917 1 +3 2 8 1.09E-06 0.005365 29.5917 1 +4 2 8 1.09E-06 0.005365 29.5917 1 +5 2 8 1.09E-06 0.005365 29.5917 1 +6 2 8 1.09E-06 0.005365 29.5917 1 +7 2 8 1.09E-06 0.005365 29.5917 1 +8 2 8 1.09E-06 0.005365 29.5917 1 +9 2 8 1.09E-06 0.005365 29.5917 1 +10 2 8 1.09E-06 0.005365 29.5917 1 +1 3 8 1.09E-06 0.005365 29.5917 1 +2 3 8 1.09E-06 0.005365 29.5917 1 +3 3 8 1.09E-06 0.005365 29.5917 1 +4 3 8 1.09E-06 0.005365 29.5917 1 +5 3 8 1.09E-06 0.005365 29.5917 1 +6 3 8 1.09E-06 0.005365 29.5917 1 +7 3 8 1.09E-06 0.005365 29.5917 1 +8 3 8 1.09E-06 0.005365 29.5917 1 +9 3 8 1.09E-06 0.005365 29.5917 1 +10 3 8 1.09E-06 0.005365 29.5917 1 +1 4 8 1.09E-06 0.005365 29.5917 1 +2 4 8 1.09E-06 0.005365 29.5917 1 +3 4 8 1.09E-06 0.005365 29.5917 1 +4 4 8 1.09E-06 0.005365 29.5917 1 +5 4 8 1.09E-06 0.005365 29.5917 1 +6 4 8 1.09E-06 0.005365 29.5917 1 +7 4 8 1.09E-06 0.005365 29.5917 1 +8 4 8 1.09E-06 0.005365 29.5917 1 +9 4 8 1.09E-06 0.005365 29.5917 1 +10 4 8 1.09E-06 0.005365 29.5917 1 +1 5 8 1.09E-06 0.005365 29.5917 1 +2 5 8 1.09E-06 0.005365 29.5917 1 +3 5 8 1.09E-06 0.005365 29.5917 1 +4 5 8 1.09E-06 0.005365 29.5917 1 +5 5 8 1.09E-06 0.005365 29.5917 1 +6 5 8 1.09E-06 0.005365 29.5917 1 +7 5 8 1.09E-06 0.005365 29.5917 1 +8 5 8 1.09E-06 0.005365 29.5917 1 +9 5 8 1.09E-06 0.005365 29.5917 1 +10 5 8 1.09E-06 0.005365 29.5917 1 +1 6 8 1.09E-06 0.005365 29.5917 1 +2 6 8 1.09E-06 0.005365 29.5917 1 +3 6 8 1.09E-06 0.005365 29.5917 1 +4 6 8 1.09E-06 0.005365 29.5917 1 +5 6 8 1.09E-06 0.005365 29.5917 1 +6 6 8 1.09E-06 0.005365 29.5917 1 +7 6 8 1.09E-06 0.005365 29.5917 1 +8 6 8 1.09E-06 0.005365 29.5917 1 +9 6 8 1.09E-06 0.005365 29.5917 1 +10 6 8 1.09E-06 0.005365 29.5917 1 +1 7 8 1.09E-06 0.005365 29.5917 1 +2 7 8 1.09E-06 0.005365 29.5917 1 +3 7 8 1.09E-06 0.005365 29.5917 1 +4 7 8 1.09E-06 0.005365 29.5917 1 +5 7 8 1.09E-06 0.005365 29.5917 1 +6 7 8 1.09E-06 0.005365 29.5917 1 +7 7 8 1.09E-06 0.005365 29.5917 1 +8 7 8 1.09E-06 0.005365 29.5917 1 +9 7 8 1.09E-06 0.005365 29.5917 1 +10 7 8 1.09E-06 0.005365 29.5917 1 +1 8 8 1.09E-06 0.005365 29.5917 1 +2 8 8 1.09E-06 0.005365 29.5917 1 +3 8 8 1.09E-06 0.005365 29.5917 1 +4 8 8 1.09E-06 0.005365 29.5917 1 +5 8 8 1.09E-06 0.005365 29.5917 1 +6 8 8 1.09E-06 0.005365 29.5917 1 +7 8 8 1.09E-06 0.005365 29.5917 1 +8 8 8 1.09E-06 0.005365 29.5917 1 +9 8 8 1.09E-06 0.005365 29.5917 1 +10 8 8 1.09E-06 0.005365 29.5917 1 +1 9 8 1.09E-06 0.005365 29.5917 1 +2 9 8 1.09E-06 0.005365 29.5917 1 +3 9 8 1.09E-06 0.005365 29.5917 1 +4 9 8 1.09E-06 0.005365 29.5917 1 +5 9 8 1.09E-06 0.005365 29.5917 1 +6 9 8 1.09E-06 0.005365 29.5917 1 +7 9 8 1.09E-06 0.005365 29.5917 1 +8 9 8 1.09E-06 0.005365 29.5917 1 +9 9 8 1.09E-06 0.005365 29.5917 1 +10 9 8 1.09E-06 0.005365 29.5917 1 +1 10 8 1.09E-06 0.005365 29.5917 1 +2 10 8 1.09E-06 0.005365 29.5917 1 +3 10 8 1.09E-06 0.005365 29.5917 1 +4 10 8 1.09E-06 0.005365 29.5917 1 +5 10 8 1.09E-06 0.005365 29.5917 1 +6 10 8 1.09E-06 0.005365 29.5917 1 +7 10 8 1.09E-06 0.005365 29.5917 1 +8 10 8 1.09E-06 0.005365 29.5917 1 +9 10 8 1.09E-06 0.005365 29.5917 1 +10 10 8 1.09E-06 0.005365 29.5917 1 +1 1 9 1.09E-06 0.005365 29.5917 1 +2 1 9 1.09E-06 0.005365 29.5917 1 +3 1 9 1.09E-06 0.005365 29.5917 1 +4 1 9 1.09E-06 0.005365 29.5917 1 +5 1 9 1.09E-06 0.005365 29.5917 1 +6 1 9 1.09E-06 0.005365 29.5917 1 +7 1 9 1.09E-06 0.005365 29.5917 1 +8 1 9 1.09E-06 0.005365 29.5917 1 +9 1 9 1.09E-06 0.005365 29.5917 1 +10 1 9 1.09E-06 0.005365 29.5917 1 +1 2 9 1.09E-06 0.005365 29.5917 1 +2 2 9 1.09E-06 0.005365 29.5917 1 +3 2 9 1.09E-06 0.005365 29.5917 1 +4 2 9 1.09E-06 0.005365 29.5917 1 +5 2 9 1.09E-06 0.005365 29.5917 1 +6 2 9 1.09E-06 0.005365 29.5917 1 +7 2 9 1.09E-06 0.005365 29.5917 1 +8 2 9 1.09E-06 0.005365 29.5917 1 +9 2 9 1.09E-06 0.005365 29.5917 1 +10 2 9 1.09E-06 0.005365 29.5917 1 +1 3 9 1.09E-06 0.005365 29.5917 1 +2 3 9 1.09E-06 0.005365 29.5917 1 +3 3 9 1.09E-06 0.005365 29.5917 1 +4 3 9 1.09E-06 0.005365 29.5917 1 +5 3 9 1.09E-06 0.005365 29.5917 1 +6 3 9 1.09E-06 0.005365 29.5917 1 +7 3 9 1.09E-06 0.005365 29.5917 1 +8 3 9 1.09E-06 0.005365 29.5917 1 +9 3 9 1.09E-06 0.005365 29.5917 1 +10 3 9 1.09E-06 0.005365 29.5917 1 +1 4 9 1.09E-06 0.005365 29.5917 1 +2 4 9 1.09E-06 0.005365 29.5917 1 +3 4 9 1.09E-06 0.005365 29.5917 1 +4 4 9 1.09E-06 0.005365 29.5917 1 +5 4 9 1.09E-06 0.005365 29.5917 1 +6 4 9 1.09E-06 0.005365 29.5917 1 +7 4 9 1.09E-06 0.005365 29.5917 1 +8 4 9 1.09E-06 0.005365 29.5917 1 +9 4 9 1.09E-06 0.005365 29.5917 1 +10 4 9 1.09E-06 0.005365 29.5917 1 +1 5 9 1.09E-06 0.005365 29.5917 1 +2 5 9 1.09E-06 0.005365 29.5917 1 +3 5 9 1.09E-06 0.005365 29.5917 1 +4 5 9 1.09E-06 0.005365 29.5917 1 +5 5 9 1.09E-06 0.005365 29.5917 1 +6 5 9 1.09E-06 0.005365 29.5917 1 +7 5 9 1.09E-06 0.005365 29.5917 1 +8 5 9 1.09E-06 0.005365 29.5917 1 +9 5 9 1.09E-06 0.005365 29.5917 1 +10 5 9 1.09E-06 0.005365 29.5917 1 +1 6 9 1.09E-06 0.005365 29.5917 1 +2 6 9 1.09E-06 0.005365 29.5917 1 +3 6 9 1.09E-06 0.005365 29.5917 1 +4 6 9 1.09E-06 0.005365 29.5917 1 +5 6 9 1.09E-06 0.005365 29.5917 1 +6 6 9 1.09E-06 0.005365 29.5917 1 +7 6 9 1.09E-06 0.005365 29.5917 1 +8 6 9 1.09E-06 0.005365 29.5917 1 +9 6 9 1.09E-06 0.005365 29.5917 1 +10 6 9 1.09E-06 0.005365 29.5917 1 +1 7 9 1.09E-06 0.005365 29.5917 1 +2 7 9 1.09E-06 0.005365 29.5917 1 +3 7 9 1.09E-06 0.005365 29.5917 1 +4 7 9 1.09E-06 0.005365 29.5917 1 +5 7 9 1.09E-06 0.005365 29.5917 1 +6 7 9 1.09E-06 0.005365 29.5917 1 +7 7 9 1.09E-06 0.005365 29.5917 1 +8 7 9 1.09E-06 0.005365 29.5917 1 +9 7 9 1.09E-06 0.005365 29.5917 1 +10 7 9 1.09E-06 0.005365 29.5917 1 +1 8 9 1.09E-06 0.005365 29.5917 1 +2 8 9 1.09E-06 0.005365 29.5917 1 +3 8 9 1.09E-06 0.005365 29.5917 1 +4 8 9 1.09E-06 0.005365 29.5917 1 +5 8 9 1.09E-06 0.005365 29.5917 1 +6 8 9 1.09E-06 0.005365 29.5917 1 +7 8 9 1.09E-06 0.005365 29.5917 1 +8 8 9 1.09E-06 0.005365 29.5917 1 +9 8 9 1.09E-06 0.005365 29.5917 1 +10 8 9 1.09E-06 0.005365 29.5917 1 +1 9 9 1.09E-06 0.005365 29.5917 1 +2 9 9 1.09E-06 0.005365 29.5917 1 +3 9 9 1.09E-06 0.005365 29.5917 1 +4 9 9 1.09E-06 0.005365 29.5917 1 +5 9 9 1.09E-06 0.005365 29.5917 1 +6 9 9 1.09E-06 0.005365 29.5917 1 +7 9 9 1.09E-06 0.005365 29.5917 1 +8 9 9 1.09E-06 0.005365 29.5917 1 +9 9 9 1.09E-06 0.005365 29.5917 1 +10 9 9 1.09E-06 0.005365 29.5917 1 +1 10 9 1.09E-06 0.005365 29.5917 1 +2 10 9 1.09E-06 0.005365 29.5917 1 +3 10 9 1.09E-06 0.005365 29.5917 1 +4 10 9 1.09E-06 0.005365 29.5917 1 +5 10 9 1.09E-06 0.005365 29.5917 1 +6 10 9 1.09E-06 0.005365 29.5917 1 +7 10 9 1.09E-06 0.005365 29.5917 1 +8 10 9 1.09E-06 0.005365 29.5917 1 +9 10 9 1.09E-06 0.005365 29.5917 1 +10 10 9 1.09E-06 0.005365 29.5917 1 +1 1 10 1.09E-06 0.005365 29.5917 1 +2 1 10 1.09E-06 0.005365 29.5917 1 +3 1 10 1.09E-06 0.005365 29.5917 1 +4 1 10 1.09E-06 0.005365 29.5917 1 +5 1 10 1.09E-06 0.005365 29.5917 1 +6 1 10 1.09E-06 0.005365 29.5917 1 +7 1 10 1.09E-06 0.005365 29.5917 1 +8 1 10 1.09E-06 0.005365 29.5917 1 +9 1 10 1.09E-06 0.005365 29.5917 1 +10 1 10 1.09E-06 0.005365 29.5917 1 +1 2 10 1.09E-06 0.005365 29.5917 1 +2 2 10 1.09E-06 0.005365 29.5917 1 +3 2 10 1.09E-06 0.005365 29.5917 1 +4 2 10 1.09E-06 0.005365 29.5917 1 +5 2 10 1.09E-06 0.005365 29.5917 1 +6 2 10 1.09E-06 0.005365 29.5917 1 +7 2 10 1.09E-06 0.005365 29.5917 1 +8 2 10 1.09E-06 0.005365 29.5917 1 +9 2 10 1.09E-06 0.005365 29.5917 1 +10 2 10 1.09E-06 0.005365 29.5917 1 +1 3 10 1.09E-06 0.005365 29.5917 1 +2 3 10 1.09E-06 0.005365 29.5917 1 +3 3 10 1.09E-06 0.005365 29.5917 1 +4 3 10 1.09E-06 0.005365 29.5917 1 +5 3 10 1.09E-06 0.005365 29.5917 1 +6 3 10 1.09E-06 0.005365 29.5917 1 +7 3 10 1.09E-06 0.005365 29.5917 1 +8 3 10 1.09E-06 0.005365 29.5917 1 +9 3 10 1.09E-06 0.005365 29.5917 1 +10 3 10 1.09E-06 0.005365 29.5917 1 +1 4 10 1.09E-06 0.005365 29.5917 1 +2 4 10 1.09E-06 0.005365 29.5917 1 +3 4 10 1.09E-06 0.005365 29.5917 1 +4 4 10 1.09E-06 0.005365 29.5917 1 +5 4 10 1.09E-06 0.005365 29.5917 1 +6 4 10 1.09E-06 0.005365 29.5917 1 +7 4 10 1.09E-06 0.005365 29.5917 1 +8 4 10 1.09E-06 0.005365 29.5917 1 +9 4 10 1.09E-06 0.005365 29.5917 1 +10 4 10 1.09E-06 0.005365 29.5917 1 +1 5 10 1.09E-06 0.005365 29.5917 1 +2 5 10 1.09E-06 0.005365 29.5917 1 +3 5 10 1.09E-06 0.005365 29.5917 1 +4 5 10 1.09E-06 0.005365 29.5917 1 +5 5 10 1.09E-06 0.005365 29.5917 1 +6 5 10 1.09E-06 0.005365 29.5917 1 +7 5 10 1.09E-06 0.005365 29.5917 1 +8 5 10 1.09E-06 0.005365 29.5917 1 +9 5 10 1.09E-06 0.005365 29.5917 1 +10 5 10 1.09E-06 0.005365 29.5917 1 +1 6 10 1.09E-06 0.005365 29.5917 1 +2 6 10 1.09E-06 0.005365 29.5917 1 +3 6 10 1.09E-06 0.005365 29.5917 1 +4 6 10 1.09E-06 0.005365 29.5917 1 +5 6 10 1.09E-06 0.005365 29.5917 1 +6 6 10 1.09E-06 0.005365 29.5917 1 +7 6 10 1.09E-06 0.005365 29.5917 1 +8 6 10 1.09E-06 0.005365 29.5917 1 +9 6 10 1.09E-06 0.005365 29.5917 1 +10 6 10 1.09E-06 0.005365 29.5917 1 +1 7 10 1.09E-06 0.005365 29.5917 1 +2 7 10 1.09E-06 0.005365 29.5917 1 +3 7 10 1.09E-06 0.005365 29.5917 1 +4 7 10 1.09E-06 0.005365 29.5917 1 +5 7 10 1.09E-06 0.005365 29.5917 1 +6 7 10 1.09E-06 0.005365 29.5917 1 +7 7 10 1.09E-06 0.005365 29.5917 1 +8 7 10 1.09E-06 0.005365 29.5917 1 +9 7 10 1.09E-06 0.005365 29.5917 1 +10 7 10 1.09E-06 0.005365 29.5917 1 +1 8 10 1.09E-06 0.005365 29.5917 1 +2 8 10 1.09E-06 0.005365 29.5917 1 +3 8 10 1.09E-06 0.005365 29.5917 1 +4 8 10 1.09E-06 0.005365 29.5917 1 +5 8 10 1.09E-06 0.005365 29.5917 1 +6 8 10 1.09E-06 0.005365 29.5917 1 +7 8 10 1.09E-06 0.005365 29.5917 1 +8 8 10 1.09E-06 0.005365 29.5917 1 +9 8 10 1.09E-06 0.005365 29.5917 1 +10 8 10 1.09E-06 0.005365 29.5917 1 +1 9 10 1.09E-06 0.005365 29.5917 1 +2 9 10 1.09E-06 0.005365 29.5917 1 +3 9 10 1.09E-06 0.005365 29.5917 1 +4 9 10 1.09E-06 0.005365 29.5917 1 +5 9 10 1.09E-06 0.005365 29.5917 1 +6 9 10 1.09E-06 0.005365 29.5917 1 +7 9 10 1.09E-06 0.005365 29.5917 1 +8 9 10 1.09E-06 0.005365 29.5917 1 +9 9 10 1.09E-06 0.005365 29.5917 1 +10 9 10 1.09E-06 0.005365 29.5917 1 +1 10 10 1.09E-06 0.005365 29.5917 1 +2 10 10 1.09E-06 0.005365 29.5917 1 +3 10 10 1.09E-06 0.005365 29.5917 1 +4 10 10 1.09E-06 0.005365 29.5917 1 +5 10 10 1.09E-06 0.005365 29.5917 1 +6 10 10 1.09E-06 0.005365 29.5917 1 +7 10 10 1.09E-06 0.005365 29.5917 1 +8 10 10 1.09E-06 0.005365 29.5917 1 +9 10 10 1.09E-06 0.005365 29.5917 1 +10 10 10 1.09E-06 0.005365 29.5917 1 From 794caf02c2c230f5437d153515a392c220ea1852 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 12 Aug 2025 17:41:26 -0500 Subject: [PATCH 044/604] fixes in response to comments --- src/EXTRA-FIX/fix_ttm_thermal.cpp | 88 +++++++++++++++---------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp index 3803204eb61..4abe3247439 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -328,7 +328,7 @@ void FixTTMThermal::post_force(int /*vflag*/) gamma1 = gfactor1[type[i]]; gamma2 = gfactor2[type[i]] * tsqrt; - if (T_electron[iz][iy][ix] > 1e-5) { + if (T_electron[iz][iy][ix] > 0) { flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); @@ -455,46 +455,48 @@ void FixTTMThermal::end_of_step() if (yleft == -1) yleft = nygrid - 1; if (zleft == -1) zleft = nzgrid - 1; - // Initialize flags for vacuum - int left = 1; - int right =1; - int in = 1; - int out = 1; - int up = 1; - int down = 1; - - // Set flags to 0 if vaccum - if (T_electron[iz][iy][xleft] < 1e-5) left = 0; - if (T_electron[iz][iy][xright] < 1e-5) right = 0; - if (T_electron[iz][yright][ix] < 1e-5) in = 0; - if (T_electron[iz][yleft][ix] < 1e-5) out = 0; - if (T_electron[zright][iy][ix] < 1e-5) up = 0; - if (T_electron[zleft][iy][ix] < 1e-5) down = 0; - - if (T_electron[iz][iy][ix] > 1e-5) { - T_electron[iz][iy][ix] = - T_electron_old[iz][iy][ix] + inner_dt/c_e_grid[iz][iy][ix]*( - (safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + - - (safe_effective_kappa(k_e_grid[iz][iy][xright],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][iy][xright]-T_electron_old[iz][iy][ix])/dx/dx*right + - - (safe_effective_kappa(k_e_grid[iz][yleft][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][yleft][ix]-T_electron_old[iz][iy][ix])/dy/dy*out + - - (safe_effective_kappa(k_e_grid[iz][yright][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[iz][yright][ix]-T_electron_old[iz][iy][ix])/dy/dy*in + - - (safe_effective_kappa(k_e_grid[zleft][iy][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[zleft][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*down + - - (safe_effective_kappa(k_e_grid[zright][iy][ix],k_e_grid[iz][iy][ix]))* - (T_electron_old[zright][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*up - - -(net_energy_transfer_all[iz][iy][ix])/(del_vol) - +(inductive_power*inductive_response_grid[iz][iy][ix]));} - } + // Initialize flags for vacuum + int left = 1; + int right =1; + int in = 1; + int out = 1; + int up = 1; + int down = 1; + + // Set flags to 0 if vacuum + if (T_electron[iz][iy][xleft] == 0) left = 0; + if (T_electron[iz][iy][xright] == 0) right = 0; + if (T_electron[iz][yright][ix] == 0) in = 0; + if (T_electron[iz][yleft][ix] == 0) out = 0; + if (T_electron[zright][iy][ix] == 0) up = 0; + if (T_electron[zleft][iy][ix] == 0) down = 0; + + if (T_electron[iz][iy][ix] > 0) { + T_electron[iz][iy][ix] = + T_electron_old[iz][iy][ix] + + inner_dt/c_e_grid[iz][iy][ix] * + ((safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + + + (safe_effective_kappa(k_e_grid[iz][iy][xright],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][iy][xright]-T_electron_old[iz][iy][ix])/dx/dx*right + + + (safe_effective_kappa(k_e_grid[iz][yleft][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yleft][ix]-T_electron_old[iz][iy][ix])/dy/dy*out + + + (safe_effective_kappa(k_e_grid[iz][yright][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[iz][yright][ix]-T_electron_old[iz][iy][ix])/dy/dy*in + + + (safe_effective_kappa(k_e_grid[zleft][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zleft][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*down + + + (safe_effective_kappa(k_e_grid[zright][iy][ix],k_e_grid[iz][iy][ix]))* + (T_electron_old[zright][iy][ix]-T_electron_old[iz][iy][ix])/dz/dz*up + + -(net_energy_transfer_all[iz][iy][ix])/(del_vol) + +(inductive_power*inductive_response_grid[iz][iy][ix])); + } + } } @@ -516,7 +518,7 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) memory->create(prop_initial_set,nzgrid,nygrid,nxgrid,"ttm:prop_initial_set"); memset(&prop_initial_set[0][0][0],0,ngridtotal*sizeof(int)); - // read initial electron temperature values from file + // read electron properties from file bigint nread = 0; try { @@ -811,10 +813,8 @@ double FixTTMThermal::compute_vector(int n) T_electron[iz][iy][ix]*c_e_grid[iz][iy][ix]*del_vol; transfer_energy += net_energy_transfer_all[iz][iy][ix]*update->dt; - //printf("TRANSFER %d %d %d %g\n",ix,iy,iz,transfer_energy); } - //printf("TRANSFER %g\n",transfer_energy); outflag = 1; } From b905c8aa68ac14e08e55524c84423efd7e6390d3 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Thu, 28 Aug 2025 13:24:16 -0600 Subject: [PATCH 045/604] Adding exception to special flags --- src/BPM/bond_bpm.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/BPM/bond_bpm.cpp b/src/BPM/bond_bpm.cpp index 5d13c554897..f28b76785ef 100644 --- a/src/BPM/bond_bpm.cpp +++ b/src/BPM/bond_bpm.cpp @@ -60,7 +60,7 @@ BondBPM::BondBPM(LAMMPS *_lmp) : overlay_flag = 0; property_atom_flag = 0; break_flag = 1; - ignore_special_flag = 1; + ignore_special_flag = 0; nvalues = 0; writedata = 0; @@ -338,6 +338,10 @@ void BondBPM::settings(int narg, char **arg) delete[] id_fix_dummy_history; id_fix_dummy_history = nullptr; } + + // If bonds don't break and there's no overlay, can ignore special requirements + if (break_flag == 0 && overlay_flag == 0) + ignore_special_flag = 1; } /* ---------------------------------------------------------------------- From 8f4cb2fac0d4b4bcdb742b3cf6adbc7515b90939 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Thu, 28 Aug 2025 19:49:05 -0600 Subject: [PATCH 046/604] Catching handling of NONE --- src/EXTRA-FIX/fix_deform_pressure.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/EXTRA-FIX/fix_deform_pressure.cpp b/src/EXTRA-FIX/fix_deform_pressure.cpp index 50dba19408e..634d1f01289 100644 --- a/src/EXTRA-FIX/fix_deform_pressure.cpp +++ b/src/EXTRA-FIX/fix_deform_pressure.cpp @@ -306,12 +306,11 @@ FixDeformPressure::FixDeformPressure(LAMMPS *lmp, int narg, char **arg) : } } - // set strain_flag + // set strain_flag, also sets defaults for NONE strain_flag = 0; for (int i = 0; i < 6; i++) - if (set[i].style != NONE && set[i].style != VOLUME && - set[i].style != PRESSURE && set[i].style != PMEAN) + if (set[i].style != VOLUME && set[i].style != PRESSURE && set[i].style != PMEAN) strain_flag = 1; // set pressure_flag From 785abf874252231e8c7b91446df608a50eabec40 Mon Sep 17 00:00:00 2001 From: "Dan S. Bolintineanu" Date: Fri, 29 Aug 2025 09:20:40 -0600 Subject: [PATCH 047/604] Fixed minor bug for cutoff calculations with fix freeze --- src/GRANULAR/pair_granular.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index a28139df46d..d26f0dc2569 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -609,6 +609,8 @@ double PairGranular::init_one(int i, int j) // radius info about both i and j exist ((maxrad_frozen[i] > 0.0) && (maxrad_dynamic[j] > 0.0))) { cutoff = maxrad_dynamic[i] + maxrad_dynamic[j]; + cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]); + cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]); pulloff = 0.0; if (model->beyond_contact) { pulloff = model->pulloff_distance(maxrad_dynamic[i], maxrad_dynamic[j]); From 436a9bd5d3b9066e738266ee13308647d8ef7a40 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 29 Aug 2025 13:10:50 -0600 Subject: [PATCH 048/604] typo --- doc/src/pair_granular.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/pair_granular.rst b/doc/src/pair_granular.rst index 599f23844e0..3c6e6fcefba 100644 --- a/doc/src/pair_granular.rst +++ b/doc/src/pair_granular.rst @@ -175,7 +175,7 @@ lose contact. Typically, neighbor lists are constructed for pair granular by testing whether finite sized particles overlap (using their radii). However, - this is not the case for normal normals which can interact beyond + this is not the case for normal models which can interact beyond contact, e.g. *jkr*. Instead, the maximum radius for each particle type is first calculated then used to calculate a maximum per-type cutoff distance. For polydisperse systems, this affects the performance From 8ef0005dbf62486a5dc176ab4ad56614e3083c8a Mon Sep 17 00:00:00 2001 From: chogene Date: Sun, 31 Aug 2025 15:14:42 +0900 Subject: [PATCH 049/604] In compute stress, use virial. Removed repeated line. --- examples/ELASTIC_T/BORN_MATRIX/Silicon/output.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/ELASTIC_T/BORN_MATRIX/Silicon/output.in b/examples/ELASTIC_T/BORN_MATRIX/Silicon/output.in index 05e007fab6b..19021c71667 100644 --- a/examples/ELASTIC_T/BORN_MATRIX/Silicon/output.in +++ b/examples/ELASTIC_T/BORN_MATRIX/Silicon/output.in @@ -2,7 +2,7 @@ # Stress fluctuation term F -compute stress all pressure thermo_temp +compute stress all pressure NULL virial variable s1 equal c_stress[1] variable s2 equal c_stress[2] variable s3 equal c_stress[3] @@ -16,7 +16,6 @@ variable s33 equal v_s3*v_s3 variable s44 equal v_s4*v_s4 variable s55 equal v_s5*v_s5 variable s66 equal v_s6*v_s6 -variable s33 equal v_s3*v_s3 variable s12 equal v_s1*v_s2 variable s13 equal v_s1*v_s3 variable s14 equal v_s1*v_s4 From 2cf584b92b590f245393fe912243e84e4ffc9b43 Mon Sep 17 00:00:00 2001 From: chogene Date: Wed, 3 Sep 2025 18:52:28 +0900 Subject: [PATCH 050/604] Updated log files --- .../BORN_MATRIX/Silicon/log.elastic.ortho | 664 +++++++++++++++++ .../BORN_MATRIX/Silicon/log.elastic.tri | 666 ++++++++++++++++++ 2 files changed, 1330 insertions(+) create mode 100644 examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.ortho create mode 100644 examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.tri diff --git a/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.ortho b/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.ortho new file mode 100644 index 00000000000..f26ab1c4ad9 --- /dev/null +++ b/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.ortho @@ -0,0 +1,664 @@ + +# select temperature and pressure (lattice constant) + +variable temp index 1477.0 # temperature of initial sample +variable a index 5.457 # lattice constant + +# select sampling parameters, important for speed/convergence + +variable nthermo index 1500 # interval for thermo output +variable nevery index 10 # stress sampling interval +variable neveryborn index 100 # Born sampling interval +variable timestep index 0.000766 # timestep +variable nlat index 3 # number of lattice unit cells + +# other settings + +variable mass1 index 28.06 # mass +variable tdamp index 0.01 # time constant for thermostat +variable seed index 123457 # seed for thermostat +variable thermostat index 1 # 0 if NVE, 1 if NVT +variable delta index 1.0e-6 # Born numdiff strain magnitude + +# hard-coded rules-of-thumb for run length, etc. + +variable nfreq equal ${nthermo} # interval for averaging output +variable nfreq equal 1500 +variable nrepeat equal floor(${nfreq}/${nevery}) # number of samples +variable nrepeat equal floor(1500/${nevery}) +variable nrepeat equal floor(1500/10) +variable nrepeatborn equal floor(${nfreq}/${neveryborn}) # number of samples +variable nrepeatborn equal floor(1500/${neveryborn}) +variable nrepeatborn equal floor(1500/100) +variable nequil equal 10*${nthermo} # length of equilibration run +variable nequil equal 10*1500 +variable nrun equal 100*${nthermo} # length of equilibrated run +variable nrun equal 100*1500 + +# this generates a general triclinic cell +# conforming to LAMMPS cell (upper triangular) + +units metal +box tilt large + +The 'box' command has been removed and will be ignored + + +# unit lattice vectors are +# a1 = (a1x 0 0) +# a2 = (a2x a2y 0) +# a3 = (a3x a3y a3z) + +variable a1x index 1 +variable a2x index 0 +variable a2y index 1 +variable a3x index 0 +variable a3y index 0 +variable a3z index 1 +variable atmp equal $a +variable atmp equal 5.457 +variable l index $a +variable l index 5.457 +variable basis index "basis 0 0 0 basis 0.25 0.25 0.25 basis 0 0.5 0.5 basis 0.25 0.75 0.75 basis 0.5 0 0.5 basis 0.75 0.25 0.75 basis 0.5 0.5 0 basis 0.75 0.75 0.25" +lattice custom ${l} a1 ${a1x} 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 ${a1x} 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 1.0 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 1.0 0 a3 0.0 ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 1.0 0 a3 0.0 0.0 ${a3z} ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 1.0 0 a3 0.0 0.0 1.0 ${basis} spacing 1 1 1 +lattice custom 5.457 a1 1.0 0 0 a2 0.0 1.0 0 a3 0.0 0.0 1.0 basis 0 0 0 basis 0.25 0.25 0.25 basis 0 0.5 0.5 basis 0.25 0.75 0.75 basis 0.5 0 0.5 basis 0.75 0.25 0.75 basis 0.5 0.5 0 basis 0.75 0.75 0.25 spacing 1 1 1 +Lattice spacing in x,y,z = 5.457 5.457 5.457 + +region box prism 0 ${a1x} 0 ${a2y} 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 ${a2y} 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 1.0 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 1.0 0 1.0 ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 1.0 0 1.0 0.0 ${a3x} ${a3y} +region box prism 0 1.0 0 1.0 0 1.0 0.0 0.0 ${a3y} +region box prism 0 1.0 0 1.0 0 1.0 0.0 0.0 0.0 + +create_box 1 box +Created triclinic box = (0 0 0) to (5.457 5.457 5.457) with tilt (0 0 0) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 8 atoms + using lattice units in triclinic box = (0 0 0) to (5.457 5.457 5.457) with tilt (0 0 0) + create_atoms CPU = 0.001 seconds +mass 1 ${mass1} +mass 1 28.06 +replicate ${nlat} ${nlat} ${nlat} +replicate 3 ${nlat} ${nlat} +replicate 3 3 ${nlat} +replicate 3 3 3 +Replication is creating a 3x3x3 = 27 times larger system... + triclinic box = (0 0 0) to (16.371 16.371 16.371) with tilt (0 0 0) + 1 by 1 by 1 MPI processor grid + 216 atoms + replicate CPU = 0.001 seconds +velocity all create ${temp} 87287 +velocity all create 1477.0 87287 + + + +# Compute initial state + +include potential.in +# NOTE: This script can be modified for different pair styles +# See in.elastic for more info. + +reset_timestep 0 + +# Choose potential +pair_style sw +pair_coeff * * Si.sw Si +Reading sw potential file Si.sw with DATE: 2007-06-11 + +# Setup neighbor style +neighbor 1.0 nsq +neigh_modify once no every 1 delay 0 check yes + +# Setup MD + +timestep ${timestep} +timestep 0.000766 +fix 4 all nve +if "${thermostat} == 1" then "fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed}" +fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 123457 + + +thermo_style custom step temp pe press density +run ${nequil} +run 15000 +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.77118 + ghost atom cutoff = 4.77118 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair sw, perpetual + attributes: full, newton on + pair build: full/nsq + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 3.063 | 3.063 | 3.063 Mbytes + Step Temp PotEng Press Density + 0 1477 -936.42473 -4264.7155 2.2938491 + 15000 1577.398 -896.04653 1913.3039 2.2938491 +Loop time of 20.3883 on 1 procs for 15000 steps with 216 atoms + +Performance: 48.691 ns/day, 0.493 hours/ns, 735.714 timesteps/s, 158.914 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 19.117 | 19.117 | 19.117 | 0.0 | 93.76 +Neigh | 0.41585 | 0.41585 | 0.41585 | 0.0 | 2.04 +Comm | 0.13829 | 0.13829 | 0.13829 | 0.0 | 0.68 +Output | 0.00010729 | 0.00010729 | 0.00010729 | 0.0 | 0.00 +Modify | 0.65829 | 0.65829 | 0.65829 | 0.0 | 3.23 +Other | | 0.0591 | | | 0.29 + +Nlocal: 216 ave 216 max 216 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 648 ave 648 max 648 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 5772 ave 5772 max 5772 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 5772 +Ave neighs/atom = 26.722222 +Neighbor list builds = 258 +Dangerous builds = 0 + +# Run dynamics + +include potential.in +# NOTE: This script can be modified for different pair styles +# See in.elastic for more info. + +reset_timestep 0 + +# Choose potential +pair_style sw +pair_coeff * * Si.sw Si +Reading sw potential file Si.sw with DATE: 2007-06-11 + +# Setup neighbor style +neighbor 1.0 nsq +neigh_modify once no every 1 delay 0 check yes + +# Setup MD + +timestep ${timestep} +timestep 0.000766 +fix 4 all nve +if "${thermostat} == 1" then "fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed}" +fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 123457 + + +include output.in +# Setup output + +# Stress fluctuation term F + +compute stress all pressure NULL virial +variable s1 equal c_stress[1] +variable s2 equal c_stress[2] +variable s3 equal c_stress[3] +variable s4 equal c_stress[6] +variable s5 equal c_stress[5] +variable s6 equal c_stress[4] + +variable s11 equal v_s1*v_s1 +variable s22 equal v_s2*v_s2 +variable s33 equal v_s3*v_s3 +variable s44 equal v_s4*v_s4 +variable s55 equal v_s5*v_s5 +variable s66 equal v_s6*v_s6 +variable s12 equal v_s1*v_s2 +variable s13 equal v_s1*v_s3 +variable s14 equal v_s1*v_s4 +variable s15 equal v_s1*v_s5 +variable s16 equal v_s1*v_s6 +variable s23 equal v_s2*v_s3 +variable s24 equal v_s2*v_s4 +variable s25 equal v_s2*v_s5 +variable s26 equal v_s2*v_s6 +variable s34 equal v_s3*v_s4 +variable s35 equal v_s3*v_s5 +variable s36 equal v_s3*v_s6 +variable s45 equal v_s4*v_s5 +variable s46 equal v_s4*v_s6 +variable s56 equal v_s5*v_s6 + +variable mytemp equal temp +variable mypress equal press +variable mype equal pe/atoms +fix avt all ave/time ${nevery} ${nrepeat} ${nfreq} v_mytemp ave running +fix avt all ave/time 10 ${nrepeat} ${nfreq} v_mytemp ave running +fix avt all ave/time 10 150 ${nfreq} v_mytemp ave running +fix avt all ave/time 10 150 1500 v_mytemp ave running +fix avp all ave/time ${nevery} ${nrepeat} ${nfreq} v_mypress ave running +fix avp all ave/time 10 ${nrepeat} ${nfreq} v_mypress ave running +fix avp all ave/time 10 150 ${nfreq} v_mypress ave running +fix avp all ave/time 10 150 1500 v_mypress ave running +fix avpe all ave/time ${nevery} ${nrepeat} ${nfreq} v_mype ave running +fix avpe all ave/time 10 ${nrepeat} ${nfreq} v_mype ave running +fix avpe all ave/time 10 150 ${nfreq} v_mype ave running +fix avpe all ave/time 10 150 1500 v_mype ave running +fix avs all ave/time ${nevery} ${nrepeat} ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 ${nrepeat} ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 150 ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 150 1500 v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avssq all ave/time ${nevery} ${nrepeat} ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 ${nrepeat} ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 150 ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 150 1500 v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running + +# bar to GPa +variable pconv equal 1.0e5/1.0e9 +variable cunits index GPa +# metal unit constants from LAMMPS +# force->nktv2p = 1.6021765e6; +# force->boltz = 8.617343e-5; +variable boltz equal 8.617343e-5 +variable nktv2p equal 1.6021765e6 +variable vkt equal vol/(${boltz}*${temp})/${nktv2p} +variable vkt equal vol/(8.617343e-05*${temp})/${nktv2p} +variable vkt equal vol/(8.617343e-05*1477.0)/${nktv2p} +variable vkt equal vol/(8.617343e-05*1477.0)/1602176.5 +variable ffac equal ${pconv}*${vkt} +variable ffac equal 0.0001*${vkt} +variable ffac equal 0.0001*0.0215159929384811 + +variable F11 equal -(f_avssq[1]-f_avs[1]*f_avs[1])*${ffac} +variable F11 equal -(f_avssq[1]-f_avs[1]*f_avs[1])*2.15159929384811e-06 +variable F22 equal -(f_avssq[2]-f_avs[2]*f_avs[2])*${ffac} +variable F22 equal -(f_avssq[2]-f_avs[2]*f_avs[2])*2.15159929384811e-06 +variable F33 equal -(f_avssq[3]-f_avs[3]*f_avs[3])*${ffac} +variable F33 equal -(f_avssq[3]-f_avs[3]*f_avs[3])*2.15159929384811e-06 +variable F44 equal -(f_avssq[4]-f_avs[4]*f_avs[4])*${ffac} +variable F44 equal -(f_avssq[4]-f_avs[4]*f_avs[4])*2.15159929384811e-06 +variable F55 equal -(f_avssq[5]-f_avs[5]*f_avs[5])*${ffac} +variable F55 equal -(f_avssq[5]-f_avs[5]*f_avs[5])*2.15159929384811e-06 +variable F66 equal -(f_avssq[6]-f_avs[6]*f_avs[6])*${ffac} +variable F66 equal -(f_avssq[6]-f_avs[6]*f_avs[6])*2.15159929384811e-06 + +variable F12 equal -(f_avssq[7]-f_avs[1]*f_avs[2])*${ffac} +variable F12 equal -(f_avssq[7]-f_avs[1]*f_avs[2])*2.15159929384811e-06 +variable F13 equal -(f_avssq[8]-f_avs[1]*f_avs[3])*${ffac} +variable F13 equal -(f_avssq[8]-f_avs[1]*f_avs[3])*2.15159929384811e-06 +variable F14 equal -(f_avssq[9]-f_avs[1]*f_avs[4])*${ffac} +variable F14 equal -(f_avssq[9]-f_avs[1]*f_avs[4])*2.15159929384811e-06 +variable F15 equal -(f_avssq[10]-f_avs[1]*f_avs[5])*${ffac} +variable F15 equal -(f_avssq[10]-f_avs[1]*f_avs[5])*2.15159929384811e-06 +variable F16 equal -(f_avssq[11]-f_avs[1]*f_avs[6])*${ffac} +variable F16 equal -(f_avssq[11]-f_avs[1]*f_avs[6])*2.15159929384811e-06 + +variable F23 equal -(f_avssq[12]-f_avs[2]*f_avs[3])*${ffac} +variable F23 equal -(f_avssq[12]-f_avs[2]*f_avs[3])*2.15159929384811e-06 +variable F24 equal -(f_avssq[13]-f_avs[2]*f_avs[4])*${ffac} +variable F24 equal -(f_avssq[13]-f_avs[2]*f_avs[4])*2.15159929384811e-06 +variable F25 equal -(f_avssq[14]-f_avs[2]*f_avs[5])*${ffac} +variable F25 equal -(f_avssq[14]-f_avs[2]*f_avs[5])*2.15159929384811e-06 +variable F26 equal -(f_avssq[15]-f_avs[2]*f_avs[6])*${ffac} +variable F26 equal -(f_avssq[15]-f_avs[2]*f_avs[6])*2.15159929384811e-06 + +variable F34 equal -(f_avssq[16]-f_avs[3]*f_avs[4])*${ffac} +variable F34 equal -(f_avssq[16]-f_avs[3]*f_avs[4])*2.15159929384811e-06 +variable F35 equal -(f_avssq[17]-f_avs[3]*f_avs[5])*${ffac} +variable F35 equal -(f_avssq[17]-f_avs[3]*f_avs[5])*2.15159929384811e-06 +variable F36 equal -(f_avssq[18]-f_avs[3]*f_avs[6])*${ffac} +variable F36 equal -(f_avssq[18]-f_avs[3]*f_avs[6])*2.15159929384811e-06 + +variable F45 equal -(f_avssq[19]-f_avs[4]*f_avs[5])*${ffac} +variable F45 equal -(f_avssq[19]-f_avs[4]*f_avs[5])*2.15159929384811e-06 +variable F46 equal -(f_avssq[20]-f_avs[4]*f_avs[6])*${ffac} +variable F46 equal -(f_avssq[20]-f_avs[4]*f_avs[6])*2.15159929384811e-06 + +variable F56 equal -(f_avssq[21]-f_avs[5]*f_avs[6])*${ffac} +variable F56 equal -(f_avssq[21]-f_avs[5]*f_avs[6])*2.15159929384811e-06 + +# Born term + +compute virial all pressure NULL virial +compute born all born/matrix numdiff ${delta} virial +compute born all born/matrix numdiff 1.0e-6 virial +fix avborn all ave/time ${neveryborn} ${nrepeatborn} ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 ${nrepeatborn} ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 15 ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 15 1500 c_born[*] ave running + +variable bfac equal ${pconv}*${nktv2p}/vol +variable bfac equal 0.0001*${nktv2p}/vol +variable bfac equal 0.0001*1602176.5/vol +variable B vector f_avborn*${bfac} +variable B vector f_avborn*0.036516128938577 + +# Kinetic term + +variable kfac equal ${pconv}*${nktv2p}*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*${nktv2p}*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*8.617343e-05*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*8.617343e-05*1477.0/vol +variable K11 equal 4.0*${kfac} +variable K11 equal 4.0*1.00390440086865 +variable K22 equal 4.0*${kfac} +variable K22 equal 4.0*1.00390440086865 +variable K33 equal 4.0*${kfac} +variable K33 equal 4.0*1.00390440086865 +variable K44 equal 2.0*${kfac} +variable K44 equal 2.0*1.00390440086865 +variable K55 equal 2.0*${kfac} +variable K55 equal 2.0*1.00390440086865 +variable K66 equal 2.0*${kfac} +variable K66 equal 2.0*1.00390440086865 + +# Add F, K, and B together + +variable C11 equal v_F11+v_B[1]+v_K11 +variable C22 equal v_F22+v_B[2]+v_K22 +variable C33 equal v_F33+v_B[3]+v_K33 +variable C44 equal v_F44+v_B[4]+v_K44 +variable C55 equal v_F55+v_B[5]+v_K55 +variable C66 equal v_F66+v_B[6]+v_K66 + +variable C12 equal v_F12+v_B[7] +variable C13 equal v_F13+v_B[8] +variable C14 equal v_F14+v_B[9] +variable C15 equal v_F15+v_B[10] +variable C16 equal v_F16+v_B[11] + +variable C23 equal v_F23+v_B[12] +variable C24 equal v_F24+v_B[13] +variable C25 equal v_F25+v_B[14] +variable C26 equal v_F26+v_B[15] + +variable C34 equal v_F34+v_B[16] +variable C35 equal v_F35+v_B[17] +variable C36 equal v_F36+v_B[18] + +variable C45 equal v_F45+v_B[19] +variable C46 equal v_F46+v_B[20] + +variable C56 equal v_F56+v_B[21] + +thermo ${nthermo} +thermo 1500 +thermo_style custom step temp pe press density f_avt f_avp f_avpe v_F11 v_F22 v_F33 v_F44 v_F55 v_F66 v_F12 v_F13 v_F23 v_B[*8] v_B[12] + +thermo_modify norm no + +run ${nrun} +run 150000 +Per MPI rank memory allocation (min/avg/max) = 3.813 | 3.813 | 3.813 Mbytes + Step Temp PotEng Press Density f_avt f_avp f_avpe v_F11 v_F22 v_F33 v_F44 v_F55 v_F66 v_F12 v_F13 v_F23 v_B[1] v_B[2] v_B[3] v_B[4] v_B[5] v_B[6] v_B[7] v_B[8] v_B[12] + 0 1577.398 -896.04653 1913.3039 2.2938491 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1500 1508.6833 -893.01097 2095.5473 2.2938491 1487.8719 221.5416 -4.1309352 -4.6852596 -6.1556398 -4.0350182 -57.481061 -40.337975 -64.217351 -0.16264203 -1.4049443 -0.8890372 136.73748 136.64703 137.17994 99.936294 100.18308 99.679906 74.758307 75.141152 75.099604 + 3000 1499.9942 -889.72404 1925.1944 2.2938491 1491.7881 340.28005 -4.1310551 -5.5882895 -5.7348039 -4.5122348 -49.812218 -44.098707 -62.585519 -0.8083508 -1.5522774 -0.92917467 136.87769 136.73019 136.86382 99.854243 99.92941 99.973824 75.007409 74.896145 74.974413 + 4500 1427.0771 -892.60293 1041.3243 2.2938491 1492.5564 250.18266 -4.129748 -5.6169461 -5.4361638 -4.5706404 -57.86683 -47.619792 -58.133974 -0.74458352 -1.0278867 -0.65956771 136.58183 136.78193 136.59375 99.842989 99.683443 99.927903 75.016411 74.719965 74.911721 + 6000 1427.7275 -897.29135 244.54357 2.2938491 1488.1059 1.0132084 -4.1290482 -6.3070298 -5.1134506 -5.4904919 -57.333484 -49.370302 -57.365347 -0.93310451 -1.6793127 -0.90380319 136.53802 136.55894 136.53753 99.693027 99.667187 99.779485 74.92951 74.791078 74.848989 + 7500 1358.7766 -892.00345 -1097.0783 2.2938491 1490.2992 48.622202 -4.1294078 -6.1595244 -4.964991 -6.034334 -56.372457 -50.845904 -57.47128 -1.0099839 -1.4843848 -1.0382943 136.55123 136.58974 136.60484 99.752866 99.707683 99.789787 74.86056 74.817202 74.881881 + 9000 1509.9993 -890.91567 -572.61789 2.2938491 1487.6449 8.9278029 -4.1298889 -5.8332084 -4.8655263 -5.885691 -59.962325 -50.541803 -56.782728 -0.97386167 -1.2686688 -0.8689065 136.56355 136.6415 136.64995 99.762655 99.724313 99.832776 74.850562 74.792544 74.886482 + 10500 1573.0587 -896.33714 2340.6176 2.2938491 1486.4631 -44.933059 -4.1299065 -5.8889155 -5.3174439 -5.6485017 -58.305841 -49.418656 -57.98166 -1.1926334 -1.2673011 -0.92117463 136.51594 136.61989 136.54001 99.714657 99.68072 99.816168 74.829906 74.733646 74.856239 + 12000 1422.534 -889.32187 -1437.4714 2.2938491 1485.7625 -49.389295 -4.1301115 -5.6338969 -5.1813547 -5.48745 -57.391036 -49.725709 -57.728417 -1.0965755 -1.2370956 -0.92565745 136.53362 136.57306 136.47623 99.683422 99.684726 99.81514 74.799336 74.712494 74.781304 + 13500 1499.6322 -894.48101 -105.05669 2.2938491 1484.9273 -7.3314588 -4.1304097 -5.6011843 -5.1098258 -5.2693604 -57.382655 -50.885102 -58.596297 -1.0035649 -1.2512907 -0.86374953 136.55834 136.5733 136.5036 99.724837 99.711413 99.83368 74.788074 74.721721 74.755172 + 15000 1573.082 -897.75766 765.554 2.2938491 1483.1715 -1.5236431 -4.1307534 -5.5303747 -5.0722101 -5.1966584 -57.042301 -51.276205 -57.77004 -0.98617123 -1.2294834 -0.99253067 136.56205 136.64636 136.53617 99.786115 99.742317 99.857134 74.766003 74.70471 74.753035 + 16500 1523.7965 -892.61351 570.84506 2.2938491 1483.025 20.014131 -4.1307057 -5.6491813 -5.1251214 -5.0431129 -58.923748 -53.004251 -57.380056 -0.9835262 -1.2559912 -0.93442295 136.54238 136.63878 136.48995 99.766671 99.724281 99.869177 74.782941 74.669084 74.727693 + 18000 1514.7372 -893.3318 664.66753 2.2938491 1483.7273 13.503533 -4.1306242 -5.5734219 -5.0149757 -5.1940202 -58.428801 -52.763515 -56.81655 -0.91546872 -1.246324 -0.91266601 136.52313 136.64046 136.48293 99.760241 99.705954 99.878222 74.788508 74.639318 74.731988 + 19500 1456.2333 -890.8736 -730.93027 2.2938491 1484.1039 4.6565591 -4.1304924 -5.539528 -4.9674431 -5.2876012 -59.204208 -51.795805 -57.671877 -0.98491973 -1.3478721 -0.92106888 136.53013 136.66747 136.45717 99.758734 99.680768 99.895346 74.834944 74.632876 74.749659 + 21000 1435.0838 -890.56397 -2261.0677 2.2938491 1484.4731 8.9317271 -4.1305363 -5.4798108 -5.0920748 -5.3720835 -58.880237 -53.634282 -56.940037 -1.0779835 -1.3376906 -0.97935283 136.56234 136.65869 136.44382 99.749007 99.68833 99.907263 74.850341 74.643174 74.728417 + 22500 1372.344 -891.22453 -671.57072 2.2938491 1484.4135 38.534277 -4.1306659 -5.4238581 -5.1327245 -5.3145376 -57.724416 -54.029927 -55.758774 -1.0695574 -1.3576248 -0.93116283 136.58373 136.65318 136.45767 99.764219 99.714241 99.910279 74.848006 74.655344 74.705424 + 24000 1462.6233 -894.07981 -688.49609 2.2938491 1484.6962 23.703539 -4.1305814 -5.5353925 -5.0655061 -5.3531978 -57.356916 -53.958975 -55.612715 -1.0174505 -1.2537552 -0.91769412 136.56683 136.61099 136.44677 99.737652 99.715539 99.882449 74.833203 74.68036 74.705237 + 25500 1531.1544 -889.44134 665.66906 2.2938491 1484.7664 17.7273 -4.1304496 -5.4600113 -5.0234325 -5.3817699 -57.201079 -53.080408 -58.025614 -1.0044908 -1.2159433 -0.90921125 136.53137 136.59092 136.45057 99.744775 99.707432 99.851116 74.818971 74.68331 74.711832 + 27000 1564.7887 -892.33289 -5.1182694 2.2938491 1485.4464 56.491873 -4.1306786 -5.4677207 -5.0290555 -5.3811197 -56.912372 -53.534555 -58.079681 -1.058833 -1.2169505 -0.90127467 136.56281 136.61304 136.47784 99.759554 99.73906 99.875967 74.811876 74.686141 74.703767 + 28500 1370.6209 -893.84637 -2919.9871 2.2938491 1485.2779 54.602708 -4.130832 -5.5569778 -5.072526 -5.346815 -57.524202 -53.152122 -58.170576 -1.0810489 -1.2195006 -0.91043336 136.55699 136.6108 136.51338 99.776143 99.745795 99.864275 74.795104 74.685819 74.717177 + 30000 1522.4483 -889.44291 -192.64469 2.2938491 1485.2163 47.000092 -4.1307135 -5.5657899 -5.1144768 -5.2534691 -57.23935 -53.981806 -57.080422 -1.0808076 -1.1685982 -0.9037818 136.56379 136.60813 136.53282 99.788737 99.750357 99.848963 74.809344 74.70159 74.734697 + 31500 1505.8509 -892.21671 -47.865153 2.2938491 1485.1999 28.718777 -4.1306435 -5.5900256 -5.0977548 -5.3212457 -58.427002 -53.281182 -56.888082 -1.0418402 -1.1235974 -0.95135018 136.54844 136.59962 136.51956 99.788195 99.73104 99.839448 74.804681 74.698921 74.735945 + 33000 1524.7919 -891.77207 618.73448 2.2938491 1484.6501 26.102838 -4.1307073 -5.5699664 -5.0898554 -5.3235624 -58.46472 -52.991068 -56.762092 -1.0088978 -1.0416939 -0.91943889 136.54672 136.59683 136.54178 99.803042 99.742969 99.824046 74.792853 74.704913 74.7412 + 34500 1318.8274 -895.33161 -238.41299 2.2938491 1484.1483 35.920822 -4.1310162 -5.5453151 -5.1834616 -5.226137 -58.786557 -53.69774 -56.685216 -0.92858056 -1.0575756 -0.91213687 136.56518 136.61647 136.55086 99.818916 99.752319 99.846502 74.785822 74.686117 74.729391 + 36000 1508.0854 -897.00819 386.87317 2.2938491 1484.2329 13.900448 -4.1310594 -5.5602357 -5.2287248 -5.2683084 -58.352487 -53.166842 -56.435242 -0.98489974 -1.1150209 -0.97806547 136.56761 136.62126 136.54672 99.817495 99.749793 99.850988 74.791287 74.690688 74.728757 + 37500 1337.0681 -892.21713 -1883.9082 2.2938491 1483.5496 17.638297 -4.1312038 -5.5498721 -5.1713809 -5.2294639 -58.108447 -52.926984 -57.086776 -0.99993713 -1.1188607 -0.98843922 136.60133 136.63789 136.57792 99.835059 99.76994 99.866613 74.797095 74.701683 74.736576 + 39000 1589.4728 -891.19625 995.08605 2.2938491 1483.3612 17.095551 -4.1312007 -5.5317838 -5.2105555 -5.1922304 -58.717403 -54.333186 -56.887211 -0.99220342 -1.1171617 -0.95284246 136.58154 136.64111 136.58061 99.838214 99.767981 99.856571 74.783126 74.695551 74.743596 + 40500 1407.6892 -888.26516 -121.42876 2.2938491 1483.1535 11.349342 -4.1310772 -5.5246834 -5.3227926 -5.2381696 -58.127567 -54.335491 -56.526183 -1.0068437 -1.1100906 -0.98314174 136.59514 136.66318 136.58654 99.842084 99.768622 99.864221 74.798038 74.707968 74.754262 + 42000 1393.215 -890.41289 -1400.2546 2.2938491 1483.0597 15.356167 -4.1310211 -5.5054579 -5.2432623 -5.2879064 -57.678427 -54.403924 -56.68041 -0.98703973 -1.1078252 -0.94202727 136.58046 136.64149 136.57589 99.82922 99.766313 99.859666 74.789686 74.706875 74.742296 + 43500 1431.4083 -891.46447 -1024.8736 2.2938491 1483.4383 29.914384 -4.1310967 -5.4419635 -5.2078433 -5.3109934 -57.530789 -54.26054 -56.720655 -0.97636739 -1.1157064 -0.91055522 136.58668 136.64834 136.57366 99.832542 99.770558 99.869414 74.790483 74.703825 74.734507 + 45000 1545.0867 -892.04857 45.904402 2.2938491 1483.4405 35.718278 -4.1310755 -5.3637937 -5.2001808 -5.3090151 -57.528707 -54.843275 -56.861208 -0.96605649 -1.0920958 -0.91223069 136.57401 136.64521 136.57571 99.837416 99.765593 99.859626 74.781843 74.704887 74.745423 + 46500 1404.8024 -892.83829 1729.162 2.2938491 1483.2957 30.13961 -4.1310317 -5.3792417 -5.1971397 -5.3387475 -56.939572 -54.734022 -56.994231 -0.95147416 -1.082873 -0.92953395 136.57479 136.63964 136.57839 99.838336 99.764501 99.860117 74.781988 74.711464 74.752324 + 48000 1420.8187 -892.6689 133.76209 2.2938491 1483.8654 36.464676 -4.1310865 -5.3725257 -5.1968202 -5.2908604 -57.038948 -54.60648 -57.037008 -0.94217974 -1.078666 -0.9457433 136.57469 136.6391 136.58543 99.841124 99.773144 99.857788 74.770547 74.713982 74.749392 + 49500 1540.1916 -890.81013 1015.7364 2.2938491 1483.5331 29.460135 -4.1311806 -5.3616691 -5.2410763 -5.2770257 -57.367374 -54.511997 -56.983882 -0.90709283 -1.0699147 -0.95961795 136.57397 136.63461 136.58661 99.836988 99.7788 99.856292 74.76285 74.716298 74.744516 + 51000 1542.5599 -895.41592 2229.0521 2.2938491 1483.26 46.938163 -4.1313379 -5.3836423 -5.292679 -5.2421209 -57.493528 -54.976743 -56.742678 -0.92147515 -1.0661534 -0.97457604 136.58505 136.65502 136.60283 99.850684 99.789568 99.867021 74.764877 74.713412 74.744448 + 52500 1435.6508 -893.88278 1371.6894 2.2938491 1483.8611 45.297739 -4.131238 -5.3887756 -5.2631361 -5.2338548 -57.266674 -54.919107 -56.515263 -0.9246507 -1.0528053 -0.9646317 136.57178 136.64685 136.58608 99.843498 99.776109 99.861384 74.767288 74.705939 74.745305 + 54000 1498.4815 -894.04865 -215.48447 2.2938491 1484.2166 53.332306 -4.1312175 -5.3769715 -5.2364667 -5.2180631 -57.002797 -54.824514 -56.308177 -0.90553768 -1.0431561 -0.96206339 136.5736 136.64715 136.59018 99.843476 99.779814 99.864189 74.769353 74.712073 74.747675 + 55500 1488.4475 -892.25207 2223.1334 2.2938491 1484.3977 55.070516 -4.131301 -5.3995991 -5.2051608 -5.2413853 -56.968726 -54.60962 -56.452011 -0.88104887 -1.0454305 -0.9279811 136.56939 136.65901 136.59008 99.852579 99.780226 99.868047 74.766727 74.699177 74.746307 + 57000 1391.7008 -891.05256 -745.9847 2.2938491 1484.9146 63.324757 -4.131193 -5.4361643 -5.2663032 -5.2741791 -56.541877 -54.805976 -56.413862 -0.94116397 -1.0542391 -0.97244806 136.55629 136.65894 136.58041 99.849107 99.769924 99.866092 74.771682 74.694441 74.749224 + 58500 1486.0763 -893.10237 1179.3108 2.2938491 1485.0711 68.933687 -4.1312419 -5.4515664 -5.2681872 -5.271143 -56.31131 -55.125308 -56.51786 -0.91426955 -1.0577479 -0.96344911 136.55446 136.65707 136.5822 99.851799 99.771362 99.862939 74.768592 74.696151 74.748444 + 60000 1415.0214 -889.70348 84.140965 2.2938491 1485.353 62.846002 -4.1310395 -5.4721102 -5.2335426 -5.2667395 -56.252286 -54.696253 -57.214115 -0.89024606 -1.0673422 -0.9615006 136.54445 136.64067 136.56218 99.83624 99.758766 99.857789 74.775278 74.698701 74.74693 + 61500 1505.049 -893.40906 -1171.237 2.2938491 1485.1339 48.041895 -4.1309521 -5.5296914 -5.2744515 -5.2725786 -56.305982 -54.635926 -57.073326 -0.89894036 -1.0697223 -0.98474946 136.53657 136.63496 136.54428 99.824026 99.751581 99.851701 74.778349 74.701739 74.742934 + 63000 1569.2181 -895.53708 692.18873 2.2938491 1485.2282 41.484214 -4.1308915 -5.5387462 -5.2570073 -5.2854566 -56.370165 -54.957322 -56.919632 -0.89333382 -1.0743002 -0.98291386 136.52592 136.6331 136.5364 99.82302 99.741873 99.845957 74.776842 74.696906 74.746058 + 64500 1652.5772 -893.24743 -355.00368 2.2938491 1485.1525 32.091082 -4.1308265 -5.5437295 -5.2405233 -5.2936204 -56.472581 -55.043571 -57.003878 -0.90958534 -1.0628781 -0.9755182 136.51804 136.62448 136.5367 99.824025 99.735602 99.829869 74.775673 74.699722 74.757833 + 66000 1412.7579 -889.50384 -1262.5614 2.2938491 1484.9061 30.389463 -4.1308158 -5.5492803 -5.2014866 -5.2725006 -56.400419 -55.087258 -57.031894 -0.90827746 -1.055947 -0.94908896 136.51429 136.63072 136.52764 99.822306 99.729263 99.834331 74.77472 74.692621 74.75778 + 67500 1419.3222 -893.93609 455.71983 2.2938491 1484.4129 35.353925 -4.1309444 -5.5549734 -5.162612 -5.2500311 -56.643941 -55.139699 -56.939452 -0.89284559 -1.0497501 -0.94639045 136.53097 136.63928 136.52951 99.824253 99.74314 99.845735 74.772193 74.692431 74.744217 + 69000 1499.3927 -893.23657 2133.7507 2.2938491 1484.5924 42.519304 -4.1309453 -5.5579056 -5.193451 -5.2615012 -56.560776 -55.204165 -56.833812 -0.90658376 -1.0531785 -0.92704569 136.53588 136.64044 136.53357 99.823605 99.748719 99.848244 74.773252 74.694645 74.741088 + 70500 1453.7238 -898.50233 203.62655 2.2938491 1484.552 57.948334 -4.1311115 -5.5744016 -5.1846466 -5.300952 -56.485251 -55.244192 -56.490961 -0.92357514 -1.0736892 -0.94119448 136.55673 136.65773 136.54407 99.83592 99.760901 99.866666 74.774503 74.689701 74.734729 + 72000 1358.8426 -894.13404 -1515.1526 2.2938491 1484.5 70.388615 -4.1312383 -5.5720283 -5.1529312 -5.2902761 -56.23731 -55.151913 -56.492399 -0.92650973 -1.0780588 -0.94653221 136.56193 136.66864 136.55766 99.849979 99.766646 99.874771 74.771089 74.68335 74.737748 + 73500 1412.7478 -893.22472 -34.916301 2.2938491 1484.2354 72.906209 -4.1313674 -5.5326905 -5.1818027 -5.2845424 -56.228576 -55.230877 -56.699641 -0.91055509 -1.0746684 -0.92536812 136.56912 136.68454 136.56623 99.858956 99.772239 99.883649 74.771262 74.68041 74.738889 + 75000 1521.0392 -894.91307 1340.9582 2.2938491 1484.2032 74.766448 -4.1313961 -5.5364315 -5.1548586 -5.2790443 -55.950169 -55.216258 -56.575995 -0.92192446 -1.0775437 -0.9300686 136.56659 136.67193 136.5591 99.854992 99.772516 99.879592 74.763174 74.677378 74.726951 + 76500 1866.7885 -890.06096 1785.3366 2.2938491 1484.352 63.644864 -4.131344 -5.5533682 -5.1232457 -5.3066877 -55.720173 -55.30149 -56.379834 -0.92109492 -1.0766113 -0.93507202 136.55289 136.66675 136.55486 99.853031 99.761978 99.867309 74.761019 74.675063 74.735345 + 78000 1572.8414 -894.12523 519.31894 2.2938491 1484.4934 67.52993 -4.131372 -5.5480872 -5.1167882 -5.3058671 -55.460346 -55.428212 -56.847408 -0.93186527 -1.0783747 -0.92266182 136.55552 136.66452 136.55931 99.853797 99.765952 99.866551 74.753266 74.67628 74.731835 + 79500 1582.808 -887.28736 32.310376 2.2938491 1484.6177 63.140876 -4.1313405 -5.5505979 -5.1464245 -5.3002088 -55.312065 -55.219255 -56.823945 -0.92840784 -1.0572644 -0.91602648 136.55089 136.66797 136.55202 99.848591 99.757191 99.866246 74.761506 74.679117 74.731727 + 81000 1505.9362 -893.99552 1168.3131 2.2938491 1484.7024 59.334222 -4.1313291 -5.542069 -5.1300515 -5.3480915 -55.533185 -54.873712 -56.883809 -0.91460171 -1.0589894 -0.91377243 136.54996 136.65777 136.55229 99.840801 99.761865 99.863413 74.757188 74.682251 74.725592 + 82500 1513.0707 -888.98451 468.72877 2.2938491 1484.7983 60.986906 -4.1313248 -5.5540464 -5.149159 -5.3766624 -55.373943 -54.991399 -56.916486 -0.91794449 -1.0742968 -0.91915128 136.54381 136.65842 136.55678 99.843299 99.762414 99.858714 74.754718 74.683161 74.732829 + 84000 1435.7843 -896.76295 1292.2215 2.2938491 1484.8116 65.13448 -4.1313418 -5.5285644 -5.1614459 -5.3656791 -55.769762 -55.222106 -56.728682 -0.90710664 -1.0676205 -0.90069803 136.54527 136.66086 136.55465 99.844049 99.761818 99.863536 74.753412 74.67849 74.730029 + 85500 1445.5087 -890.67024 780.30135 2.2938491 1484.727 63.467983 -4.1313813 -5.5229005 -5.1833069 -5.3792368 -55.856811 -55.430552 -56.799352 -0.9152213 -1.0420366 -0.87978826 136.54482 136.66158 136.56365 99.850674 99.764508 99.8615 74.748717 74.675375 74.731058 + 87000 1342.7982 -889.1216 379.52916 2.2938491 1484.4593 66.939669 -4.131412 -5.5199372 -5.1786341 -5.3635244 -56.027851 -55.31584 -56.624897 -0.9058517 -1.0317479 -0.88550092 136.5477 136.65932 136.5713 99.853041 99.771267 99.860293 74.743982 74.680111 74.730377 + 88500 1530.6474 -892.76936 193.19595 2.2938491 1484.3351 69.837059 -4.1315047 -5.4868964 -5.1609751 -5.347637 -56.087204 -55.67806 -56.587507 -0.89331183 -1.0203858 -0.87139348 136.55514 136.66128 136.57677 99.855617 99.779013 99.862832 74.744524 74.681118 74.726008 + 90000 1463.3279 -893.70753 -138.2312 2.2938491 1484.2389 76.911527 -4.1315681 -5.4986503 -5.157615 -5.3245583 -55.951533 -55.590016 -56.366329 -0.90285076 -1.0012447 -0.86710558 136.55941 136.66571 136.57548 99.858464 99.781941 99.869985 74.741376 74.680681 74.721937 + 91500 1484.8173 -896.68003 -883.50284 2.2938491 1483.9368 85.172348 -4.131743 -5.5026578 -5.1328923 -5.3210247 -55.848809 -55.574797 -56.184176 -0.89680641 -1.0163825 -0.86407063 136.57822 136.67852 136.58472 99.867201 99.792669 99.882242 74.741455 74.679894 74.718352 + 93000 1510.7809 -892.76195 936.61597 2.2938491 1483.9875 80.729629 -4.1317039 -5.5175381 -5.1632049 -5.2864219 -55.73302 -55.431658 -56.046585 -0.87193033 -0.9931326 -0.85773406 136.5717 136.67384 136.58154 99.866807 99.789498 99.875475 74.736638 74.683342 74.720695 + 94500 1393.9894 -891.59461 993.30492 2.2938491 1483.6993 70.091013 -4.1316052 -5.561709 -5.2037584 -5.285637 -55.590758 -55.362502 -55.896423 -0.92450963 -1.0041412 -0.86154561 136.56723 136.66652 136.57885 99.861028 99.785569 99.869928 74.734029 74.687283 74.72289 + 96000 1399.5704 -891.83014 261.72498 2.2938491 1483.7185 68.255442 -4.1315881 -5.5407922 -5.1971596 -5.2792049 -55.587333 -55.227999 -56.044803 -0.92409667 -0.98176507 -0.85994917 136.56208 136.65115 136.57114 99.852303 99.784495 99.863218 74.728616 74.688882 74.718877 + 97500 1445.9358 -889.63053 1920.9089 2.2938491 1483.6555 69.422284 -4.1316201 -5.525585 -5.2069511 -5.2769853 -55.484607 -55.152178 -55.9703 -0.9118933 -0.9697173 -0.84362523 136.56383 136.64813 136.57673 99.854915 99.789999 99.86109 74.724746 74.691655 74.718887 + 99000 1481.7297 -894.53648 1920.3534 2.2938491 1483.5863 73.972032 -4.1316615 -5.5242295 -5.2114442 -5.2775307 -55.432781 -54.995518 -56.245576 -0.91117553 -0.95949418 -0.84367008 136.57319 136.65392 136.58244 99.863 99.796575 99.863558 74.721991 74.689604 74.720156 + 100500 1555.7331 -897.13697 510.64651 2.2938491 1483.5047 76.122656 -4.1317154 -5.5099943 -5.1925986 -5.2544123 -55.41778 -55.049677 -56.321769 -0.91302945 -0.9391075 -0.83366115 136.58534 136.65679 136.58606 99.86412 99.803643 99.86978 74.723511 74.687658 74.714778 + 102000 1551.2618 -893.2161 -1914.7874 2.2938491 1483.4161 70.08278 -4.1316784 -5.5178506 -5.1822887 -5.2657073 -55.204959 -55.329963 -56.176949 -0.92464278 -0.93038408 -0.81303892 136.57969 136.65276 136.58376 99.862116 99.798391 99.863054 74.724843 74.686469 74.716462 + 103500 1503.2152 -892.61787 -171.54201 2.2938491 1483.4512 66.392778 -4.1316545 -5.5326792 -5.2130934 -5.2791861 -55.129762 -55.250793 -55.984826 -0.9399012 -0.93416889 -0.83124512 136.5761 136.64973 136.58549 99.862244 99.798251 99.85835 74.723785 74.686053 74.717098 + 105000 1416.3025 -889.82355 806.24617 2.2938491 1483.4875 69.85283 -4.1316289 -5.5149242 -5.2046481 -5.2772953 -55.459012 -55.076823 -55.803031 -0.93530733 -0.93313435 -0.82259178 136.57789 136.65184 136.58443 99.862493 99.797615 99.861346 74.726505 74.686711 74.717065 + 106500 1516.0186 -890.38072 -65.458566 2.2938491 1483.3376 69.89511 -4.131647 -5.5158107 -5.1842323 -5.2805172 -55.375546 -54.982077 -55.75833 -0.92803719 -0.94721189 -0.81537928 136.57863 136.652 136.58745 99.860995 99.800468 99.863634 74.727566 74.68632 74.715439 + 108000 1567.9081 -892.03482 1908.9334 2.2938491 1483.2035 74.24341 -4.1317296 -5.5155484 -5.2106475 -5.2702321 -55.431377 -54.742913 -56.041609 -0.93539002 -0.93931556 -0.80430502 136.58125 136.66613 136.5964 99.872586 99.802456 99.868313 74.727865 74.683836 74.721713 + 109500 1599.0673 -893.47835 342.02237 2.2938491 1483.2849 68.824989 -4.1316557 -5.523896 -5.2180404 -5.273406 -55.378539 -55.062731 -56.013815 -0.94064862 -0.94072295 -0.81493716 136.57947 136.66034 136.5911 99.864281 99.800955 99.865271 74.730217 74.689359 74.723598 + 111000 1480.7644 -891.25165 1530.0865 2.2938491 1483.2831 65.004031 -4.1316452 -5.5275345 -5.2520385 -5.285739 -55.43929 -54.960797 -55.898547 -0.93950413 -0.9528086 -0.81319868 136.58747 136.66171 136.58828 99.858998 99.803442 99.870483 74.737449 74.695153 74.722286 + 112500 1337.0544 -892.10185 -3896.0383 2.2938491 1483.3685 62.132179 -4.1316107 -5.535292 -5.2915149 -5.2805575 -55.359185 -54.902898 -55.989168 -0.9555528 -0.97515859 -0.82132852 136.58119 136.65575 136.57814 99.851598 99.797813 99.868916 74.733796 74.691559 74.716189 + 114000 1549.365 -890.59132 835.28237 2.2938491 1483.1897 63.483134 -4.1316721 -5.5248067 -5.3082845 -5.2868684 -55.404099 -54.865852 -55.934931 -0.95815672 -0.9720129 -0.82107671 136.58611 136.66054 136.58137 99.854038 99.802922 99.872955 74.734597 74.691805 74.712332 + 115500 1689.6451 -889.57385 2786.9069 2.2938491 1483.1814 64.245855 -4.1316435 -5.4952356 -5.2987603 -5.2825244 -55.33878 -54.973026 -55.867906 -0.96022645 -0.96529264 -0.8132027 136.5867 136.65263 136.58381 99.849714 99.805133 99.871852 74.733362 74.695313 74.711809 + 117000 1634.2773 -888.66391 1792.3236 2.2938491 1483.3522 66.40694 -4.1316382 -5.4884642 -5.3091116 -5.316244 -55.277107 -54.885228 -55.968508 -0.96291328 -0.96260113 -0.80556018 136.5829 136.65144 136.58358 99.849795 99.804951 99.869368 74.733064 74.693782 74.714426 + 118500 1512.0852 -896.54819 2370.9642 2.2938491 1483.2487 65.256886 -4.1316498 -5.494636 -5.3104828 -5.3070371 -55.218404 -55.107709 -56.061175 -0.96759084 -0.96944707 -0.79468325 136.5767 136.6521 136.58024 99.85088 99.802239 99.868718 74.729889 74.68824 74.713734 + 120000 1534.8689 -890.67184 714.49782 2.2938491 1483.2202 64.847873 -4.1316146 -5.4810611 -5.3051601 -5.3037508 -55.058803 -55.070022 -55.991144 -0.95370773 -0.95429037 -0.78285759 136.57638 136.64582 136.57895 99.846406 99.801655 99.865982 74.73014 74.688346 74.711534 + 121500 1405.2475 -886.92812 138.4554 2.2938491 1483.2596 65.382284 -4.1316073 -5.4783038 -5.3149544 -5.3022137 -55.166874 -55.067296 -56.062302 -0.93595897 -0.94409398 -0.77706989 136.57903 136.64806 136.5792 99.846735 99.800512 99.869054 74.732824 74.68797 74.712968 + 123000 1456.6929 -896.30834 1808.717 2.2938491 1483.2248 68.747431 -4.1315801 -5.4861645 -5.3197155 -5.2955242 -55.301202 -55.194601 -56.128254 -0.93713461 -0.95225278 -0.77674662 136.58156 136.65019 136.58142 99.848858 99.80404 99.87049 74.734827 74.688143 74.712702 + 124500 1443.9726 -891.23252 222.27539 2.2938491 1483.1769 75.409197 -4.1316495 -5.4733354 -5.3109443 -5.2989219 -55.218501 -55.085985 -56.04661 -0.93165139 -0.96391216 -0.78641018 136.58779 136.65558 136.5856 99.853842 99.809898 99.875019 74.732971 74.688765 74.711825 + 126000 1296.4452 -894.09532 -448.11759 2.2938491 1483.1966 76.448258 -4.1316248 -5.4477559 -5.3471263 -5.3184683 -55.179674 -55.12879 -55.938433 -0.9399255 -0.95448068 -0.79160918 136.58765 136.65371 136.58604 99.853091 99.808848 99.872848 74.732114 74.690756 74.712086 + 127500 1360.5356 -891.63797 -1307.3274 2.2938491 1483.1679 76.431813 -4.1316277 -5.4296635 -5.3399038 -5.3196976 -55.447145 -55.306027 -55.838932 -0.9375012 -0.93937749 -0.78345199 136.58993 136.65494 136.58746 99.851937 99.811636 99.873935 74.733406 74.692356 74.710989 + 129000 1537.4583 -893.81383 2456.5777 2.2938491 1483.0894 77.554082 -4.1316695 -5.4274905 -5.3227907 -5.31813 -55.417552 -55.163379 -55.956941 -0.93685174 -0.94043447 -0.78494862 136.59298 136.66134 136.59483 99.858058 99.814952 99.877233 74.731583 74.690107 74.712799 + 130500 1412.1121 -889.35505 -833.32241 2.2938491 1483.2416 76.556032 -4.1316238 -5.4280735 -5.3391786 -5.3417135 -55.590068 -55.143779 -56.000536 -0.93314216 -0.93445128 -0.78428675 136.59619 136.65856 136.59562 99.855834 99.817382 99.873711 74.73209 74.697813 74.715117 + 132000 1433.9422 -894.0571 -524.06657 2.2938491 1483.2286 76.667914 -4.1316034 -5.4326706 -5.334977 -5.3289222 -55.469147 -55.072003 -56.016126 -0.92185972 -0.93375605 -0.78028804 136.59546 136.65657 136.59811 99.855359 99.820147 99.871238 74.730791 74.6995 74.715765 + 133500 1469.8133 -891.12688 -1918.5105 2.2938491 1483.4627 69.808553 -4.131491 -5.4620411 -5.3929316 -5.3349188 -55.549884 -55.093182 -56.020155 -0.93107405 -0.9285493 -0.80645679 136.58436 136.64994 136.58805 99.849345 99.809943 99.863673 74.734838 74.699938 74.719131 + 135000 1415.0773 -890.00202 -1744.3799 2.2938491 1483.5398 72.048137 -4.1314471 -5.4469424 -5.3848025 -5.3299729 -55.56373 -55.064652 -56.011445 -0.92533613 -0.91884324 -0.8053452 136.58261 136.64995 136.58368 99.847139 99.80704 99.865239 74.735199 74.698258 74.717296 + 136500 1612.6785 -887.07452 654.87115 2.2938491 1483.6529 67.437302 -4.1314192 -5.4498377 -5.3876296 -5.3538994 -55.692434 -55.067519 -56.031943 -0.91676435 -0.92734642 -0.8195822 136.57966 136.64577 136.57881 99.84415 99.804891 99.861444 74.735792 74.701869 74.712867 + 138000 1473.962 -890.53569 763.79994 2.2938491 1483.7705 71.369364 -4.1314061 -5.4426099 -5.3855117 -5.35429 -55.813141 -55.042337 -56.039437 -0.9165079 -0.91152423 -0.81876959 136.57326 136.65017 136.57649 99.848233 99.800459 99.860298 74.734293 74.697256 74.716098 + 139500 1596.0337 -893.74873 1757.7107 2.2938491 1483.7538 71.085198 -4.1314032 -5.4408424 -5.3821989 -5.3535306 -55.766616 -54.926992 -56.134318 -0.9104569 -0.90839802 -0.82099317 136.57238 136.65208 136.57395 99.84783 99.799153 99.860218 74.735968 74.696897 74.717128 + 141000 1437.4667 -895.36059 -1486.7579 2.2938491 1483.5831 64.471495 -4.1313687 -5.4443976 -5.3948513 -5.357086 -55.797397 -54.841839 -56.047812 -0.91971925 -0.90347628 -0.82595005 136.56972 136.64928 136.57002 99.844721 99.795279 99.856969 74.736631 74.695788 74.717885 + 142500 1602.076 -891.24649 -609.59533 2.2938491 1483.6377 65.582158 -4.1313775 -5.4218455 -5.3960455 -5.3546129 -55.729872 -55.028556 -55.931212 -0.91876041 -0.9037974 -0.83048707 136.57021 136.65503 136.5745 99.847536 99.797359 99.858638 74.735436 74.693773 74.71997 + 144000 1479.0007 -895.72537 35.234041 2.2938491 1483.8592 71.090866 -4.1313827 -5.4381104 -5.3843113 -5.367512 -55.69843 -55.127984 -55.96905 -0.91450582 -0.91079338 -0.82484257 136.57136 136.65505 136.57591 99.84928 99.799229 99.8597 74.732505 74.692175 74.719442 + 145500 1422.1197 -895.40393 710.65923 2.2938491 1483.8925 77.830899 -4.1314429 -5.4493282 -5.3829713 -5.3816399 -55.666589 -54.944912 -55.969454 -0.91807472 -0.92530948 -0.83079113 136.58108 136.65693 136.58756 99.854539 99.809148 99.864025 74.729857 74.694806 74.717729 + 147000 1478.3429 -893.09781 488.31447 2.2938491 1483.912 78.951407 -4.1314331 -5.440132 -5.3646442 -5.3671282 -55.628451 -54.953971 -55.88551 -0.91725251 -0.92232089 -0.82393511 136.58018 136.65435 136.58553 99.8537 99.807293 99.8645 74.729604 74.691437 74.716811 + 148500 1473.0911 -895.35729 507.56314 2.2938491 1483.9115 80.697625 -4.1314511 -5.4348305 -5.3577535 -5.3879451 -55.596438 -54.893402 -56.269907 -0.90315239 -0.9332194 -0.83273692 136.57525 136.65116 136.58409 99.8543 99.805369 99.863434 74.725754 74.687316 74.714839 + 150000 1544.1047 -895.3599 604.58807 2.2938491 1483.9156 78.74187 -4.1314601 -5.4441678 -5.3514889 -5.3908589 -55.496372 -55.057119 -56.300056 -0.90082902 -0.93375763 -0.82244 136.57147 136.64767 136.58374 99.853221 99.805681 99.859476 74.722001 74.686388 74.715471 +Loop time of 232.036 on 1 procs for 150000 steps with 216 atoms + +Performance: 42.784 ns/day, 0.561 hours/ns, 646.451 timesteps/s, 139.633 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 188.56 | 188.56 | 188.56 | 0.0 | 81.26 +Neigh | 3.9739 | 3.9739 | 3.9739 | 0.0 | 1.71 +Comm | 1.3079 | 1.3079 | 1.3079 | 0.0 | 0.56 +Output | 0.018252 | 0.018252 | 0.018252 | 0.0 | 0.01 +Modify | 37.601 | 37.601 | 37.601 | 0.0 | 16.20 +Other | | 0.5802 | | | 0.25 + +Nlocal: 216 ave 216 max 216 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 650 ave 650 max 650 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 5730 ave 5730 max 5730 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 5730 +Ave neighs/atom = 26.527778 +Neighbor list builds = 2557 +Dangerous builds = 0 + +# Output final values + +include final_output.in +# Average moduli for cubic crystals + +variable C11cubic equal (${C11}+${C22}+${C33})/3.0 +variable C11cubic equal (135.142919282103+${C22}+${C33})/3.0 +variable C11cubic equal (135.142919282103+135.3118001505+${C33})/3.0 +variable C11cubic equal (135.142919282103+135.3118001505+135.208494162214)/3.0 +variable C12cubic equal (${C12}+${C13}+${C23})/3.0 +variable C12cubic equal (73.8211718089398+${C13}+${C23})/3.0 +variable C12cubic equal (73.8211718089398+73.7526302317849+${C23})/3.0 +variable C12cubic equal (73.8211718089398+73.7526302317849+73.8930310663028)/3.0 +variable C44cubic equal (${C44}+${C55}+${C66})/3.0 +variable C44cubic equal (46.3646580442449+${C55}+${C66})/3.0 +variable C44cubic equal (46.3646580442449+46.7563709375539+${C66})/3.0 +variable C44cubic equal (46.3646580442449+46.7563709375539+45.5672286557687)/3.0 + +variable bulkmodulus equal (${C11cubic}+2*${C12cubic})/3.0 +variable bulkmodulus equal (135.221071198272+2*${C12cubic})/3.0 +variable bulkmodulus equal (135.221071198272+2*73.8222777023425)/3.0 +variable shearmodulus1 equal ${C44cubic} +variable shearmodulus1 equal 46.2294192125225 +variable shearmodulus2 equal (${C11cubic}-${C12cubic})/2.0 +variable shearmodulus2 equal (135.221071198272-${C12cubic})/2.0 +variable shearmodulus2 equal (135.221071198272-73.8222777023425)/2.0 +variable poissonratio equal 1.0/(1.0+${C11cubic}/${C12cubic}) +variable poissonratio equal 1.0/(1.0+135.221071198272/${C12cubic}) +variable poissonratio equal 1.0/(1.0+135.221071198272/73.8222777023425) + +# For Stillinger-Weber silicon, the analytical results +# are known to be (E. R. Cowley, 1988): +# C11 = 151.4 GPa +# C12 = 76.4 GPa +# C44 = 56.4 GPa + +#print "=========================================" +#print "Components of the Elastic Constant Tensor" +#print "=========================================" + +print "Elastic Constant C11 = ${C11} ${cunits}" +Elastic Constant C11 = 135.142919282103 GPa +print "Elastic Constant C22 = ${C22} ${cunits}" +Elastic Constant C22 = 135.3118001505 GPa +print "Elastic Constant C33 = ${C33} ${cunits}" +Elastic Constant C33 = 135.208494162214 GPa + +print "Elastic Constant C12 = ${C12} ${cunits}" +Elastic Constant C12 = 73.8211718089398 GPa +print "Elastic Constant C13 = ${C13} ${cunits}" +Elastic Constant C13 = 73.7526302317849 GPa +print "Elastic Constant C23 = ${C23} ${cunits}" +Elastic Constant C23 = 73.8930310663028 GPa + +print "Elastic Constant C44 = ${C44} ${cunits}" +Elastic Constant C44 = 46.3646580442449 GPa +print "Elastic Constant C55 = ${C55} ${cunits}" +Elastic Constant C55 = 46.7563709375539 GPa +print "Elastic Constant C66 = ${C66} ${cunits}" +Elastic Constant C66 = 45.5672286557687 GPa + +print "Elastic Constant C14 = ${C14} ${cunits}" +Elastic Constant C14 = -0.079944165480444 GPa +print "Elastic Constant C15 = ${C15} ${cunits}" +Elastic Constant C15 = 0.39457148260366 GPa +print "Elastic Constant C16 = ${C16} ${cunits}" +Elastic Constant C16 = 0.243679612313066 GPa + +print "Elastic Constant C24 = ${C24} ${cunits}" +Elastic Constant C24 = 0.0145821700806613 GPa +print "Elastic Constant C25 = ${C25} ${cunits}" +Elastic Constant C25 = 0.0483146702101452 GPa +print "Elastic Constant C26 = ${C26} ${cunits}" +Elastic Constant C26 = -0.0128689564409826 GPa + +print "Elastic Constant C34 = ${C34} ${cunits}" +Elastic Constant C34 = 0.0970392151677403 GPa +print "Elastic Constant C35 = ${C35} ${cunits}" +Elastic Constant C35 = 0.0271766723313842 GPa +print "Elastic Constant C36 = ${C36} ${cunits}" +Elastic Constant C36 = 0.482930510866424 GPa + +print "Elastic Constant C45 = ${C45} ${cunits}" +Elastic Constant C45 = -0.245809275986938 GPa +print "Elastic Constant C46 = ${C46} ${cunits}" +Elastic Constant C46 = 1.02647725654437 GPa +print "Elastic Constant C56 = ${C56} ${cunits}" +Elastic Constant C56 = -0.423764011839042 GPa + +print "=========================================" +========================================= +print "Average properties for a cubic crystal" +Average properties for a cubic crystal +print "=========================================" +========================================= + +print "Bulk Modulus = ${bulkmodulus} ${cunits}" +Bulk Modulus = 94.2885422009857 GPa +print "Shear Modulus 1 = ${shearmodulus1} ${cunits}" +Shear Modulus 1 = 46.2294192125225 GPa +print "Shear Modulus 2 = ${shearmodulus2} ${cunits}" +Shear Modulus 2 = 30.6993967479647 GPa +print "Poisson Ratio = ${poissonratio}" +Poisson Ratio = 0.353143393896927 + +# summarize sampling protocol + +variable tmp equal atoms +print "Number of atoms = ${tmp}" +Number of atoms = 216 +print "Stress sampling interval = ${nevery}" +Stress sampling interval = 10 +variable tmp equal ${nrun}/${nevery} +variable tmp equal 150000/${nevery} +variable tmp equal 150000/10 +print "Stress sample count = ${tmp}" +Stress sample count = 15000 +print "Born sampling interval = ${neveryborn}" +Born sampling interval = 100 +variable tmp equal ${nrun}/${neveryborn} +variable tmp equal 150000/${neveryborn} +variable tmp equal 150000/100 +print "Born sample count = ${tmp}" +Born sample count = 1500 +Total wall time: 0:04:12 diff --git a/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.tri b/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.tri new file mode 100644 index 00000000000..107a99cee26 --- /dev/null +++ b/examples/ELASTIC_T/BORN_MATRIX/Silicon/log.elastic.tri @@ -0,0 +1,666 @@ + +# select temperature and pressure (lattice constant) + +variable temp index 1477.0 # temperature of initial sample +variable a index 5.457 # lattice constant + +# select sampling parameters, important for speed/convergence + +variable nthermo index 1500 # interval for thermo output +variable nevery index 10 # stress sampling interval +variable neveryborn index 100 # Born sampling interval +variable timestep index 0.000766 # timestep +variable nlat index 3 # number of lattice unit cells + +# other settings + +variable mass1 index 28.06 # mass +variable tdamp index 0.01 # time constant for thermostat +variable seed index 123457 # seed for thermostat +variable thermostat index 1 # 0 if NVE, 1 if NVT +variable delta index 1.0e-6 # Born numdiff strain magnitude + +# hard-coded rules-of-thumb for run length, etc. + +variable nfreq equal ${nthermo} # interval for averaging output +variable nfreq equal 1500 +variable nrepeat equal floor(${nfreq}/${nevery}) # number of samples +variable nrepeat equal floor(1500/${nevery}) +variable nrepeat equal floor(1500/10) +variable nrepeatborn equal floor(${nfreq}/${neveryborn}) # number of samples +variable nrepeatborn equal floor(1500/${neveryborn}) +variable nrepeatborn equal floor(1500/100) +variable nequil equal 10*${nthermo} # length of equilibration run +variable nequil equal 10*1500 +variable nrun equal 100*${nthermo} # length of equilibrated run +variable nrun equal 100*1500 + +# this generates a general triclinic cell +# conforming to LAMMPS cell (upper triangular) + +units metal +box tilt large + +The 'box' command has been removed and will be ignored + + +# unit lattice vectors are +# a1 = (a1x 0 0) +# a2 = (a2x a2y 0) +# a3 = (a3x a3y a3z) + +variable a1x index 1 +variable a2x index 0 +variable a2y index 1 +variable a3x index 0 +variable a3y index 0 +variable a3z index 1 +variable atmp equal $a +variable atmp equal 5.457 +variable l index $a +variable l index 5.457 +variable basis index "basis 0 0 0 basis 0.25 0.25 0.25 basis 0 0.5 0.5 basis 0.25 0.75 0.75 basis 0.5 0 0.5 basis 0.75 0.25 0.75 basis 0.5 0.5 0 basis 0.75 0.75 0.25" +lattice custom ${l} a1 ${a1x} 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 ${a1x} 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 ${a2x} ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 ${a2y} 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 0.8660254037844385 0 a3 ${a3x} ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 0.8660254037844385 0 a3 0.5 ${a3y} ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 0.8660254037844385 0 a3 0.5 0.2886751345948129 ${a3z} ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 0.8660254037844385 0 a3 0.5 0.2886751345948129 0.8164965809277259 ${basis} spacing 1 1 1 +lattice custom 3.8586817049349893 a1 1.0 0 0 a2 0.4999999999999999 0.8660254037844385 0 a3 0.5 0.2886751345948129 0.8164965809277259 basis 0 0 0 basis 0.25 0.25 0.25 spacing 1 1 1 +Lattice spacing in x,y,z = 3.8586817 3.8586817 3.8586817 + +region box prism 0 ${a1x} 0 ${a2y} 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 ${a2y} 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 0.8660254037844385 0 ${a3z} ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 0.8660254037844385 0 0.8164965809277259 ${a2x} ${a3x} ${a3y} +region box prism 0 1.0 0 0.8660254037844385 0 0.8164965809277259 0.4999999999999999 ${a3x} ${a3y} +region box prism 0 1.0 0 0.8660254037844385 0 0.8164965809277259 0.4999999999999999 0.5 ${a3y} +region box prism 0 1.0 0 0.8660254037844385 0 0.8164965809277259 0.4999999999999999 0.5 0.2886751345948129 + +create_box 1 box +Created triclinic box = (0 0 0) to (3.8586817 3.3417164 3.1506004) with tilt (1.9293409 1.9293409 1.1139055) +WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:221) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 2 atoms + using lattice units in triclinic box = (0 0 0) to (3.8586817 3.3417164 3.1506004) with tilt (1.9293409 1.9293409 1.1139055) + create_atoms CPU = 0.000 seconds +mass 1 ${mass1} +mass 1 28.06 +replicate ${nlat} ${nlat} ${nlat} +replicate 5 ${nlat} ${nlat} +replicate 5 5 ${nlat} +replicate 5 5 5 +Replication is creating a 5x5x5 = 125 times larger system... + triclinic box = (0 0 0) to (19.293409 16.708582 15.753002) with tilt (9.6467043 9.6467043 5.5695273) +WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:221) + 1 by 1 by 1 MPI processor grid + 250 atoms + replicate CPU = 0.001 seconds +velocity all create ${temp} 87287 +velocity all create 1477.0 87287 + + + +# Compute initial state + +include potential.in +# NOTE: This script can be modified for different pair styles +# See in.elastic for more info. + +reset_timestep 0 + +# Choose potential +pair_style sw +pair_coeff * * Si.sw Si +Reading sw potential file Si.sw with DATE: 2007-06-11 + +# Setup neighbor style +neighbor 1.0 nsq +neigh_modify once no every 1 delay 0 check yes + +# Setup MD + +timestep ${timestep} +timestep 0.000766 +fix 4 all nve +if "${thermostat} == 1" then "fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed}" +fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 123457 + + +thermo_style custom step temp pe press density +run ${nequil} +run 15000 +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.77118 + ghost atom cutoff = 4.77118 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair sw, perpetual + attributes: full, newton on + pair build: full/nsq + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 3.073 | 3.073 | 3.073 Mbytes + Step Temp PotEng Press Density + 0 1477 -1083.8249 -4258.3947 2.2938491 + 15000 1457.1447 -1034.963 -461.03637 2.2938491 +Loop time of 22.8999 on 1 procs for 15000 steps with 250 atoms + +Performance: 43.351 ns/day, 0.554 hours/ns, 655.024 timesteps/s, 163.756 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 21.406 | 21.406 | 21.406 | 0.0 | 93.48 +Neigh | 0.55439 | 0.55439 | 0.55439 | 0.0 | 2.42 +Comm | 0.15225 | 0.15225 | 0.15225 | 0.0 | 0.66 +Output | 8.6707e-05 | 8.6707e-05 | 8.6707e-05 | 0.0 | 0.00 +Modify | 0.72198 | 0.72198 | 0.72198 | 0.0 | 3.15 +Other | | 0.06512 | | | 0.28 + +Nlocal: 250 ave 250 max 250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 774 ave 774 max 774 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 6616 ave 6616 max 6616 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 6616 +Ave neighs/atom = 26.464 +Neighbor list builds = 265 +Dangerous builds = 0 + +# Run dynamics + +include potential.in +# NOTE: This script can be modified for different pair styles +# See in.elastic for more info. + +reset_timestep 0 + +# Choose potential +pair_style sw +pair_coeff * * Si.sw Si +Reading sw potential file Si.sw with DATE: 2007-06-11 + +# Setup neighbor style +neighbor 1.0 nsq +neigh_modify once no every 1 delay 0 check yes + +# Setup MD + +timestep ${timestep} +timestep 0.000766 +fix 4 all nve +if "${thermostat} == 1" then "fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed}" +fix 5 all langevin ${temp} ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 ${temp} ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 ${tdamp} ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 ${seed} +fix 5 all langevin 1477.0 1477.0 0.01 123457 + + +include output.in +# Setup output + +# Stress fluctuation term F + +compute stress all pressure NULL virial +variable s1 equal c_stress[1] +variable s2 equal c_stress[2] +variable s3 equal c_stress[3] +variable s4 equal c_stress[6] +variable s5 equal c_stress[5] +variable s6 equal c_stress[4] + +variable s11 equal v_s1*v_s1 +variable s22 equal v_s2*v_s2 +variable s33 equal v_s3*v_s3 +variable s44 equal v_s4*v_s4 +variable s55 equal v_s5*v_s5 +variable s66 equal v_s6*v_s6 +variable s12 equal v_s1*v_s2 +variable s13 equal v_s1*v_s3 +variable s14 equal v_s1*v_s4 +variable s15 equal v_s1*v_s5 +variable s16 equal v_s1*v_s6 +variable s23 equal v_s2*v_s3 +variable s24 equal v_s2*v_s4 +variable s25 equal v_s2*v_s5 +variable s26 equal v_s2*v_s6 +variable s34 equal v_s3*v_s4 +variable s35 equal v_s3*v_s5 +variable s36 equal v_s3*v_s6 +variable s45 equal v_s4*v_s5 +variable s46 equal v_s4*v_s6 +variable s56 equal v_s5*v_s6 + +variable mytemp equal temp +variable mypress equal press +variable mype equal pe/atoms +fix avt all ave/time ${nevery} ${nrepeat} ${nfreq} v_mytemp ave running +fix avt all ave/time 10 ${nrepeat} ${nfreq} v_mytemp ave running +fix avt all ave/time 10 150 ${nfreq} v_mytemp ave running +fix avt all ave/time 10 150 1500 v_mytemp ave running +fix avp all ave/time ${nevery} ${nrepeat} ${nfreq} v_mypress ave running +fix avp all ave/time 10 ${nrepeat} ${nfreq} v_mypress ave running +fix avp all ave/time 10 150 ${nfreq} v_mypress ave running +fix avp all ave/time 10 150 1500 v_mypress ave running +fix avpe all ave/time ${nevery} ${nrepeat} ${nfreq} v_mype ave running +fix avpe all ave/time 10 ${nrepeat} ${nfreq} v_mype ave running +fix avpe all ave/time 10 150 ${nfreq} v_mype ave running +fix avpe all ave/time 10 150 1500 v_mype ave running +fix avs all ave/time ${nevery} ${nrepeat} ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 ${nrepeat} ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 150 ${nfreq} v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avs all ave/time 10 150 1500 v_s1 v_s2 v_s3 v_s4 v_s5 v_s6 ave running +fix avssq all ave/time ${nevery} ${nrepeat} ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 ${nrepeat} ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 150 ${nfreq} v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running +fix avssq all ave/time 10 150 1500 v_s11 v_s22 v_s33 v_s44 v_s55 v_s66 v_s12 v_s13 v_s14 v_s15 v_s16 v_s23 v_s24 v_s25 v_s26 v_s34 v_s35 v_s36 v_s45 v_s46 v_s56 ave running + +# bar to GPa +variable pconv equal 1.0e5/1.0e9 +variable cunits index GPa +# metal unit constants from LAMMPS +# force->nktv2p = 1.6021765e6; +# force->boltz = 8.617343e-5; +variable boltz equal 8.617343e-5 +variable nktv2p equal 1.6021765e6 +variable vkt equal vol/(${boltz}*${temp})/${nktv2p} +variable vkt equal vol/(8.617343e-05*${temp})/${nktv2p} +variable vkt equal vol/(8.617343e-05*1477.0)/${nktv2p} +variable vkt equal vol/(8.617343e-05*1477.0)/1602176.5 +variable ffac equal ${pconv}*${vkt} +variable ffac equal 0.0001*${vkt} +variable ffac equal 0.0001*0.0249027696047235 + +variable F11 equal -(f_avssq[1]-f_avs[1]*f_avs[1])*${ffac} +variable F11 equal -(f_avssq[1]-f_avs[1]*f_avs[1])*2.49027696047235e-06 +variable F22 equal -(f_avssq[2]-f_avs[2]*f_avs[2])*${ffac} +variable F22 equal -(f_avssq[2]-f_avs[2]*f_avs[2])*2.49027696047235e-06 +variable F33 equal -(f_avssq[3]-f_avs[3]*f_avs[3])*${ffac} +variable F33 equal -(f_avssq[3]-f_avs[3]*f_avs[3])*2.49027696047235e-06 +variable F44 equal -(f_avssq[4]-f_avs[4]*f_avs[4])*${ffac} +variable F44 equal -(f_avssq[4]-f_avs[4]*f_avs[4])*2.49027696047235e-06 +variable F55 equal -(f_avssq[5]-f_avs[5]*f_avs[5])*${ffac} +variable F55 equal -(f_avssq[5]-f_avs[5]*f_avs[5])*2.49027696047235e-06 +variable F66 equal -(f_avssq[6]-f_avs[6]*f_avs[6])*${ffac} +variable F66 equal -(f_avssq[6]-f_avs[6]*f_avs[6])*2.49027696047235e-06 + +variable F12 equal -(f_avssq[7]-f_avs[1]*f_avs[2])*${ffac} +variable F12 equal -(f_avssq[7]-f_avs[1]*f_avs[2])*2.49027696047235e-06 +variable F13 equal -(f_avssq[8]-f_avs[1]*f_avs[3])*${ffac} +variable F13 equal -(f_avssq[8]-f_avs[1]*f_avs[3])*2.49027696047235e-06 +variable F14 equal -(f_avssq[9]-f_avs[1]*f_avs[4])*${ffac} +variable F14 equal -(f_avssq[9]-f_avs[1]*f_avs[4])*2.49027696047235e-06 +variable F15 equal -(f_avssq[10]-f_avs[1]*f_avs[5])*${ffac} +variable F15 equal -(f_avssq[10]-f_avs[1]*f_avs[5])*2.49027696047235e-06 +variable F16 equal -(f_avssq[11]-f_avs[1]*f_avs[6])*${ffac} +variable F16 equal -(f_avssq[11]-f_avs[1]*f_avs[6])*2.49027696047235e-06 + +variable F23 equal -(f_avssq[12]-f_avs[2]*f_avs[3])*${ffac} +variable F23 equal -(f_avssq[12]-f_avs[2]*f_avs[3])*2.49027696047235e-06 +variable F24 equal -(f_avssq[13]-f_avs[2]*f_avs[4])*${ffac} +variable F24 equal -(f_avssq[13]-f_avs[2]*f_avs[4])*2.49027696047235e-06 +variable F25 equal -(f_avssq[14]-f_avs[2]*f_avs[5])*${ffac} +variable F25 equal -(f_avssq[14]-f_avs[2]*f_avs[5])*2.49027696047235e-06 +variable F26 equal -(f_avssq[15]-f_avs[2]*f_avs[6])*${ffac} +variable F26 equal -(f_avssq[15]-f_avs[2]*f_avs[6])*2.49027696047235e-06 + +variable F34 equal -(f_avssq[16]-f_avs[3]*f_avs[4])*${ffac} +variable F34 equal -(f_avssq[16]-f_avs[3]*f_avs[4])*2.49027696047235e-06 +variable F35 equal -(f_avssq[17]-f_avs[3]*f_avs[5])*${ffac} +variable F35 equal -(f_avssq[17]-f_avs[3]*f_avs[5])*2.49027696047235e-06 +variable F36 equal -(f_avssq[18]-f_avs[3]*f_avs[6])*${ffac} +variable F36 equal -(f_avssq[18]-f_avs[3]*f_avs[6])*2.49027696047235e-06 + +variable F45 equal -(f_avssq[19]-f_avs[4]*f_avs[5])*${ffac} +variable F45 equal -(f_avssq[19]-f_avs[4]*f_avs[5])*2.49027696047235e-06 +variable F46 equal -(f_avssq[20]-f_avs[4]*f_avs[6])*${ffac} +variable F46 equal -(f_avssq[20]-f_avs[4]*f_avs[6])*2.49027696047235e-06 + +variable F56 equal -(f_avssq[21]-f_avs[5]*f_avs[6])*${ffac} +variable F56 equal -(f_avssq[21]-f_avs[5]*f_avs[6])*2.49027696047235e-06 + +# Born term + +compute virial all pressure NULL virial +compute born all born/matrix numdiff ${delta} virial +compute born all born/matrix numdiff 1.0e-6 virial +fix avborn all ave/time ${neveryborn} ${nrepeatborn} ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 ${nrepeatborn} ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 15 ${nfreq} c_born[*] ave running +fix avborn all ave/time 100 15 1500 c_born[*] ave running + +variable bfac equal ${pconv}*${nktv2p}/vol +variable bfac equal 0.0001*${nktv2p}/vol +variable bfac equal 0.0001*1602176.5/vol +variable B vector f_avborn*${bfac} +variable B vector f_avborn*0.0315499354029305 + +# Kinetic term + +variable kfac equal ${pconv}*${nktv2p}*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*${nktv2p}*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*${boltz}*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*8.617343e-05*${temp}/vol +variable kfac equal 0.0001*1602176.5*atoms*8.617343e-05*1477.0/vol +variable K11 equal 4.0*${kfac} +variable K11 equal 4.0*1.00390440086865 +variable K22 equal 4.0*${kfac} +variable K22 equal 4.0*1.00390440086865 +variable K33 equal 4.0*${kfac} +variable K33 equal 4.0*1.00390440086865 +variable K44 equal 2.0*${kfac} +variable K44 equal 2.0*1.00390440086865 +variable K55 equal 2.0*${kfac} +variable K55 equal 2.0*1.00390440086865 +variable K66 equal 2.0*${kfac} +variable K66 equal 2.0*1.00390440086865 + +# Add F, K, and B together + +variable C11 equal v_F11+v_B[1]+v_K11 +variable C22 equal v_F22+v_B[2]+v_K22 +variable C33 equal v_F33+v_B[3]+v_K33 +variable C44 equal v_F44+v_B[4]+v_K44 +variable C55 equal v_F55+v_B[5]+v_K55 +variable C66 equal v_F66+v_B[6]+v_K66 + +variable C12 equal v_F12+v_B[7] +variable C13 equal v_F13+v_B[8] +variable C14 equal v_F14+v_B[9] +variable C15 equal v_F15+v_B[10] +variable C16 equal v_F16+v_B[11] + +variable C23 equal v_F23+v_B[12] +variable C24 equal v_F24+v_B[13] +variable C25 equal v_F25+v_B[14] +variable C26 equal v_F26+v_B[15] + +variable C34 equal v_F34+v_B[16] +variable C35 equal v_F35+v_B[17] +variable C36 equal v_F36+v_B[18] + +variable C45 equal v_F45+v_B[19] +variable C46 equal v_F46+v_B[20] + +variable C56 equal v_F56+v_B[21] + +thermo ${nthermo} +thermo 1500 +thermo_style custom step temp pe press density f_avt f_avp f_avpe v_F11 v_F22 v_F33 v_F44 v_F55 v_F66 v_F12 v_F13 v_F23 v_B[*8] v_B[12] + +thermo_modify norm no + +run ${nrun} +run 150000 +Per MPI rank memory allocation (min/avg/max) = 3.823 | 3.823 | 3.823 Mbytes + Step Temp PotEng Press Density f_avt f_avp f_avpe v_F11 v_F22 v_F33 v_F44 v_F55 v_F66 v_F12 v_F13 v_F23 v_B[1] v_B[2] v_B[3] v_B[4] v_B[5] v_B[6] v_B[7] v_B[8] v_B[12] + 0 1457.1447 -1034.963 -461.03637 2.2938491 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1500 1377.2185 -1031.0727 698.99111 2.2938491 1480.032 -253.34597 -4.13129 -81.699493 -67.724837 -76.008795 -27.777806 -18.555927 -34.297662 27.556609 41.116598 32.924333 206.06089 205.5974 228.01163 53.887567 53.778986 77.302113 52.29116 28.919479 28.926815 + 3000 1559.0771 -1031.0306 688.89316 2.2938491 1486.685 -131.39493 -4.1291173 -66.026542 -62.135307 -80.570452 -23.383025 -18.993256 -34.275516 21.668627 35.310639 34.638433 205.53521 206.1525 227.37791 53.811716 53.499947 77.010746 52.066897 28.796647 28.903108 + 4500 1450.6562 -1035.7192 -775.44519 2.2938491 1482.7193 -233.77186 -4.1285256 -60.816713 -62.889945 -78.313494 -23.620753 -19.478003 -36.62141 19.126689 32.743756 37.193609 205.16595 205.78365 228.03086 53.785183 53.605451 76.815835 51.920355 28.91752 28.763924 + 6000 1429.1616 -1033.1421 -395.99893 2.2938491 1478.0329 -220.84971 -4.1295524 -57.725008 -63.913912 -74.812903 -23.682933 -19.636174 -37.78643 19.035084 28.671214 37.692238 205.30668 205.73998 228.40698 53.940813 53.710224 76.893205 51.9617 28.933372 28.864805 + 7500 1625.0485 -1034.2484 716.97173 2.2938491 1478.5575 -203.87007 -4.1299556 -56.620123 -62.601338 -73.280244 -23.053576 -19.188652 -36.918749 18.882854 29.213291 36.722531 205.34269 205.36867 228.73717 54.006509 53.822299 76.827791 51.841004 28.962993 28.870502 + 9000 1636.92 -1036.1362 530.82054 2.2938491 1478.4478 -230.97915 -4.1302174 -54.993684 -59.971482 -73.552996 -21.940548 -18.991313 -36.594577 17.205617 30.591427 35.11164 205.34137 205.2578 228.59894 53.984712 53.814168 76.803719 51.791973 28.954681 28.867238 + 10500 1427.1278 -1034.1413 -1752.4675 2.2938491 1477.6506 -288.20116 -4.130078 -55.378305 -61.367318 -75.361652 -21.589257 -19.272969 -36.117959 17.832293 30.516865 36.776472 205.34965 205.19475 228.36535 53.936086 53.829228 76.789656 51.764893 28.980166 28.856779 + 12000 1510.4967 -1035.8009 -177.47135 2.2938491 1480.078 -269.28102 -4.1300133 -54.096039 -59.257449 -73.775732 -20.956301 -20.022952 -37.252185 17.096292 30.411766 35.384562 205.2498 205.04719 228.59507 53.940448 53.818959 76.740754 51.728274 28.93212 28.854381 + 13500 1427.7339 -1025.8845 -72.436601 2.2938491 1479.7835 -273.2811 -4.129831 -55.923387 -60.102538 -76.872395 -21.377537 -19.523674 -36.568544 16.353683 33.01884 35.712702 205.16505 205.08235 228.4507 53.891995 53.804389 76.725709 51.730258 28.933757 28.86084 + 15000 1437.4991 -1035.1685 234.79106 2.2938491 1481.2364 -280.99339 -4.1292467 -60.84954 -58.345234 -79.397422 -20.855607 -19.274417 -36.919768 16.337707 36.763907 34.666794 205.12692 205.15675 228.27861 53.861742 53.793476 76.753601 51.776247 28.953534 28.867127 + 16500 1490.8344 -1034.8473 -132.88856 2.2938491 1481.4885 -215.80098 -4.1294711 -61.375575 -60.186968 -80.36065 -21.155519 -19.225957 -37.049065 16.013369 37.251376 35.741905 205.07524 205.31709 228.24302 53.906674 53.782868 76.766166 51.745631 28.902518 28.874865 + 18000 1401.103 -1035.7748 -967.84759 2.2938491 1481.5159 -198.23465 -4.1297636 -61.505356 -59.68759 -78.791865 -21.056209 -19.303349 -37.877526 16.496151 36.550887 35.325026 205.04504 205.48598 228.25031 53.91141 53.799088 76.804899 51.765441 28.887116 28.867934 + 19500 1520.811 -1037.4779 582.84906 2.2938491 1481.6344 -180.8959 -4.1299749 -62.222279 -59.748572 -78.61442 -21.043456 -19.433744 -37.830376 16.870539 36.667811 35.203182 205.10755 205.41367 228.24346 53.911643 53.814208 76.819651 51.770514 28.893565 28.843849 + 21000 1353.0226 -1030.28 -459.20969 2.2938491 1481.5869 -143.21512 -4.1303265 -61.801041 -58.897618 -77.084369 -21.012599 -19.412385 -37.602081 16.994411 36.303668 34.182026 205.18778 205.47282 228.24318 53.933361 53.847484 76.85681 51.775006 28.880145 28.836197 + 22500 1464.6298 -1034.9565 -1505.6741 2.2938491 1482.4233 -146.78624 -4.129929 -60.858686 -59.379843 -76.426874 -20.856192 -20.082205 -38.543862 17.137525 35.607786 34.219823 205.21733 205.34876 228.21025 53.914709 53.838836 76.814341 51.769935 28.903853 28.861534 + 24000 1469.8442 -1034.8522 1134.2342 2.2938491 1482.8311 -129.69495 -4.1299486 -61.732695 -58.496182 -75.753096 -20.883792 -19.982818 -38.404247 17.151961 36.283623 33.155676 205.1949 205.42498 228.1588 53.911499 53.834362 76.820374 51.773352 28.880578 28.862361 + 25500 1421.202 -1035.1943 -544.64288 2.2938491 1482.969 -116.55824 -4.1300629 -62.506047 -59.072514 -76.018479 -21.276066 -20.475577 -39.636235 17.824608 36.658397 33.061546 205.30114 205.4525 228.17266 53.916015 53.846086 76.843092 51.785778 28.897812 28.86967 + 27000 1508.4276 -1034.3712 -428.29718 2.2938491 1482.8508 -133.20814 -4.1302083 -62.740447 -59.501563 -75.150165 -21.387919 -20.161645 -39.337735 18.463462 36.426348 32.60051 205.34273 205.43339 228.1082 53.899198 53.843145 76.844824 51.791938 28.877426 28.850818 + 28500 1488.3646 -1028.3613 325.7432 2.2938491 1483.5611 -121.0463 -4.1301119 -62.763943 -58.361955 -75.254042 -21.056747 -19.957823 -39.145093 17.972866 36.799132 32.200008 205.43432 205.40037 228.05936 53.871449 53.834184 76.845167 51.799007 28.884544 28.839888 + 30000 1561.8065 -1032.9878 1244.3818 2.2938491 1483.3455 -109.20092 -4.1302015 -62.443014 -57.875927 -74.733792 -20.989446 -20.142114 -39.087873 18.381413 36.171123 31.691966 205.43412 205.43817 228.02918 53.875832 53.835271 76.841267 51.788572 28.875612 28.837756 + 31500 1463.4101 -1036.4103 -11.073903 2.2938491 1483.1967 -94.16251 -4.1304385 -62.114482 -57.704767 -75.000832 -21.049788 -20.030795 -38.868985 18.605625 36.037344 31.554331 205.49856 205.41989 228.01719 53.877292 53.852726 76.855243 51.787487 28.874898 28.826509 + 33000 1411.985 -1032.1173 1830.19 2.2938491 1482.9394 -70.101325 -4.1307323 -62.313718 -57.061168 -75.160161 -20.97237 -20.155416 -39.360721 18.353645 36.648817 31.142105 205.53771 205.40929 228.16268 53.895466 53.867222 76.857852 51.773634 28.865288 28.829395 + 34500 1553.1043 -1031.0342 -314.1235 2.2938491 1483.1344 -63.915175 -4.1306399 -62.083876 -57.213508 -74.747643 -20.992103 -20.084996 -39.443245 18.42679 36.372535 31.257409 205.48647 205.39625 228.20314 53.896438 53.864911 76.84907 51.774476 28.874249 28.833382 + 36000 1443.5125 -1031.5924 -293.44477 2.2938491 1482.9903 -76.085043 -4.1306217 -61.436545 -57.064471 -74.201235 -20.73725 -20.091996 -39.565071 18.131211 36.097369 31.303284 205.51442 205.34692 228.24686 53.894377 53.876236 76.852069 51.783155 28.896057 28.843134 + 37500 1500.0594 -1035.8638 -305.88976 2.2938491 1482.9337 -60.398769 -4.1307196 -61.599862 -57.183681 -74.112445 -21.059107 -19.954584 -39.229894 18.652818 36.024189 30.846223 205.53693 205.28466 228.31383 53.896617 53.887248 76.849993 51.765623 28.894662 28.827983 + 39000 1527.387 -1033.0827 -144.98455 2.2938491 1482.9324 -48.780549 -4.1308182 -61.959291 -57.70159 -74.802772 -21.257812 -19.966317 -39.174714 18.853572 36.233253 31.331815 205.57494 205.34378 228.24594 53.89838 53.901682 76.872795 51.787339 28.890781 28.823491 + 40500 1521.8892 -1036.2067 -1191.6542 2.2938491 1483.1849 -46.561343 -4.130792 -61.48783 -58.280721 -74.579771 -21.205674 -19.908377 -39.135916 19.1441 35.461358 31.851869 205.55358 205.3454 228.29166 53.894803 53.895328 76.856467 51.77427 28.890101 28.827227 + 42000 1575.3074 -1032.892 24.871517 2.2938491 1483.5899 -35.205422 -4.1308715 -61.763925 -58.459893 -75.110396 -21.283227 -19.8587 -39.136717 19.421571 35.461833 32.021378 205.55551 205.37823 228.31376 53.902393 53.890376 76.856867 51.769722 28.876016 28.835273 + 43500 1442.3133 -1030.5 -110.89598 2.2938491 1483.578 -42.766937 -4.1308634 -62.155508 -58.568437 -75.392547 -21.268467 -19.862187 -39.050178 19.107956 35.614025 32.391032 205.58742 205.39509 228.28783 53.885739 53.893815 76.864628 51.791638 28.882739 28.835831 + 45000 1563.7265 -1036.6481 1242.3727 2.2938491 1483.5419 -27.062011 -4.1309916 -61.804397 -58.370633 -75.098898 -21.088219 -19.762083 -38.81548 18.856506 35.531571 32.26363 205.54961 205.43766 228.28976 53.892759 53.89385 76.87209 51.786715 28.869987 28.834792 + 46500 1543.7827 -1036.18 718.06802 2.2938491 1483.7202 -15.728893 -4.1310599 -61.494597 -58.530017 -75.899461 -20.953283 -19.905669 -38.940002 18.570067 35.689533 32.729709 205.54216 205.43841 228.42755 53.910137 53.911477 76.878136 51.790397 28.878697 28.837285 + 48000 1454.2003 -1035.921 -1502.3665 2.2938491 1483.6297 -5.4737272 -4.1311653 -61.327796 -58.115298 -75.60321 -20.824714 -19.911734 -39.124342 18.324455 35.830409 32.478724 205.52979 205.43093 228.4327 53.913252 53.911916 76.873657 51.785659 28.874088 28.84058 + 49500 1547.0056 -1029.8134 711.58142 2.2938491 1484.0082 4.5859449 -4.1311921 -61.4204 -58.378256 -76.745703 -20.768047 -20.052185 -39.358421 17.819582 36.308872 33.257921 205.5781 205.44351 228.42392 53.913721 53.913507 76.885209 51.793332 28.875102 28.833919 + 51000 1474.5431 -1031.5365 -1676.9482 2.2938491 1484.277 -5.0431899 -4.1310372 -61.169624 -58.47522 -76.768116 -20.770904 -20.106748 -39.460134 17.791272 36.064374 33.415742 205.55025 205.38004 228.41264 53.905519 53.903112 76.868706 51.795581 28.882105 28.839194 + 52500 1473.2337 -1029.3561 1487.5888 2.2938491 1484.2312 -14.791008 -4.1310284 -61.189842 -58.5801 -76.518299 -20.644778 -20.083013 -39.437592 17.673352 35.715369 33.516265 205.59881 205.38055 228.38954 53.903244 53.908338 76.875754 51.806787 28.878442 28.83534 + 54000 1598.3894 -1029.6989 -360.13474 2.2938491 1484.1073 6.0147148 -4.1311948 -61.039162 -58.101681 -76.818374 -20.568601 -20.028172 -39.274561 17.373153 35.743614 33.449976 205.60276 205.39362 228.44375 53.917297 53.925347 76.887309 51.806876 28.875226 28.838322 + 55500 1506.0315 -1029.8082 -1823.14 2.2938491 1483.7885 -6.4366551 -4.1311101 -61.051524 -58.536548 -76.987263 -20.652242 -19.967162 -39.013136 17.439903 35.659032 33.694967 205.63363 205.39182 228.38759 53.899738 53.92311 76.891074 51.815094 28.873159 28.830192 + 57000 1471.0434 -1027.4975 -829.37248 2.2938491 1483.638 -13.364308 -4.1311328 -60.897619 -58.600086 -77.227704 -20.601042 -19.920394 -38.858357 17.247081 35.572425 34.028403 205.65016 205.39622 228.35354 53.893825 53.922518 76.897799 51.823232 28.87207 28.832056 + 58500 1454.2374 -1033.6592 -368.49139 2.2938491 1483.8636 -15.881224 -4.131014 -60.933572 -59.080422 -77.848665 -20.615288 -20.116863 -38.942599 17.267379 35.731211 34.377916 205.62082 205.40732 228.31715 53.885574 53.914947 76.890283 51.823194 28.866164 28.82584 + 60000 1516.5606 -1030.6528 682.95649 2.2938491 1483.8661 -6.1136667 -4.1310922 -61.011427 -59.213893 -78.205608 -20.653491 -20.054426 -38.763768 17.284592 35.964087 34.46483 205.65525 205.3874 228.32038 53.887059 53.927387 76.896296 51.822694 28.871855 28.817061 + 61500 1386.6712 -1036.3645 -202.73069 2.2938491 1483.7796 -6.4276585 -4.1311162 -61.613302 -59.106528 -79.237611 -20.598764 -20.187012 -39.027235 16.990762 36.911939 34.638875 205.66544 205.38547 228.33353 53.889082 53.927648 76.893505 51.812745 28.865667 28.811892 + 63000 1386.7875 -1035.2008 -1208.0332 2.2938491 1483.4882 -1.3865929 -4.131215 -61.766395 -59.256453 -79.382667 -20.682686 -20.054648 -38.840758 17.019373 36.944445 34.870512 205.68778 205.38376 228.34951 53.891273 53.934369 76.908314 51.817532 28.862824 28.807843 + 64500 1422.6441 -1033.4235 -483.14268 2.2938491 1483.5906 4.5172791 -4.1312713 -61.393921 -59.244322 -79.256897 -20.585122 -19.990471 -38.787213 16.901615 36.793139 34.948119 205.68147 205.41817 228.30077 53.895162 53.930212 76.908943 51.813065 28.851568 28.802409 + 66000 1341.2816 -1032.3855 -1115.0086 2.2938491 1483.6819 8.1547963 -4.1312278 -62.092474 -59.161233 -79.4017 -20.709716 -19.910366 -38.632267 16.955168 37.311893 34.799799 205.67488 205.38993 228.31322 53.901522 53.931491 76.893438 51.804126 28.849156 28.805376 + 67500 1455.9625 -1030.8774 -2373.9463 2.2938491 1483.3255 -2.6095648 -4.1312295 -61.602945 -58.910016 -79.03196 -20.604318 -19.937804 -38.533221 16.806335 37.078928 34.562386 205.68948 205.37226 228.31852 53.895193 53.932411 76.88724 51.804981 28.856349 28.801314 + 69000 1474.5356 -1034.1776 -117.82028 2.2938491 1483.3369 -8.8709089 -4.1312239 -62.055457 -59.255675 -78.961764 -20.76248 -19.740331 -38.25714 17.265361 37.085535 34.453851 205.68384 205.32093 228.32968 53.888526 53.935935 76.881391 51.800009 28.85232 28.791164 + 70500 1508.2191 -1031.2471 1387.2836 2.2938491 1483.401 -7.5434001 -4.1311991 -61.758427 -58.988429 -78.645665 -20.630709 -19.719859 -38.158793 17.05361 36.937384 34.376035 205.68379 205.32503 228.3267 53.89062 53.932979 76.878628 51.795261 28.851835 28.794143 + 72000 1484.6337 -1034.5597 833.0193 2.2938491 1483.5207 1.6576779 -4.1312543 -61.652453 -58.867312 -78.790266 -20.522515 -19.771908 -38.159373 17.005452 36.955631 34.340792 205.67383 205.3496 228.34333 53.899674 53.939128 76.882996 51.789252 28.853758 28.796497 + 73500 1389.9499 -1032.5053 -1547.103 2.2938491 1483.2473 1.565493 -4.1312829 -61.365384 -58.95085 -78.585667 -20.48361 -19.793685 -38.332678 16.960129 36.696652 34.388494 205.658 205.35781 228.34985 53.902698 53.940427 76.877285 51.786681 28.852484 28.79421 + 75000 1422.3447 -1033.4832 -1326.073 2.2938491 1483.3581 5.6856281 -4.1313016 -61.24321 -58.750553 -78.571188 -20.429307 -19.828171 -38.411254 16.738977 36.813756 34.358807 205.64091 205.35485 228.36065 53.901589 53.942258 76.87718 51.784577 28.84966 28.790671 + 76500 1477.1041 -1038.1124 2144.0903 2.2938491 1483.2201 11.486051 -4.1313529 -61.613758 -59.220387 -78.562241 -20.590631 -19.776041 -38.354715 17.075689 36.894682 34.347175 205.62076 205.3623 228.39677 53.909193 53.947676 76.879795 51.777596 28.850598 28.790547 + 78000 1338.3179 -1034.0469 -975.28201 2.2938491 1483.1449 15.732153 -4.1313772 -61.709231 -59.471592 -78.656934 -20.577967 -19.724373 -38.169231 17.244946 36.909182 34.450748 205.6504 205.35735 228.39669 53.908271 53.948291 76.884697 51.781774 28.845797 28.784384 + 79500 1478.5749 -1029.4464 -707.2242 2.2938491 1483.2765 19.829895 -4.1313835 -61.877585 -59.459038 -78.641661 -20.619977 -19.678749 -38.0763 17.326517 37.017159 34.38163 205.6292 205.37367 228.41006 53.912738 53.945072 76.879018 51.778065 28.842007 28.78823 + 81000 1442.7352 -1033.7638 -119.76276 2.2938491 1483.3768 20.554803 -4.1313715 -61.630275 -59.097061 -78.444845 -20.499381 -19.702666 -38.115454 17.145255 36.991513 34.224871 205.64816 205.38408 228.37662 53.910226 53.943047 76.885208 51.783028 28.843609 28.785539 + 82500 1424.8152 -1037.0622 -1345.0322 2.2938491 1483.5246 10.01228 -4.1312992 -62.058514 -59.044049 -78.801665 -20.469535 -19.602185 -38.001561 17.0928 37.418428 34.207716 205.66764 205.38336 228.34055 53.902355 53.940526 76.883041 51.785161 28.847639 28.786202 + 84000 1444.3115 -1030.9903 -1271.3964 2.2938491 1483.608 15.161489 -4.1313489 -62.189905 -59.238137 -78.765966 -20.533801 -19.569689 -37.87808 17.203868 37.445729 34.250124 205.65451 205.3853 228.34853 53.906486 53.941415 76.884405 51.780859 28.845124 28.785276 + 85500 1628.8664 -1034.2322 2018.847 2.2938491 1483.9723 20.093133 -4.131343 -62.133802 -59.237589 -78.515134 -20.559747 -19.550081 -37.823847 17.334665 37.382633 34.05754 205.64808 205.39786 228.33329 53.904695 53.939033 76.888639 51.779129 28.844428 28.783484 + 87000 1459.662 -1030.5313 -845.51705 2.2938491 1483.7073 15.353595 -4.1313983 -62.177214 -59.119455 -78.564025 -20.589015 -19.60795 -37.864733 17.449886 37.471739 33.832822 205.65662 205.39485 228.33341 53.904291 53.940512 76.887048 51.776218 28.847727 28.783092 + 88500 1396.3108 -1036.2034 -817.71186 2.2938491 1483.6348 11.271836 -4.1313466 -61.883855 -59.210937 -78.547198 -20.54519 -19.648088 -37.976469 17.462415 37.26084 33.980415 205.65104 205.39321 228.33022 53.902441 53.938089 76.883864 51.778203 28.850462 28.783467 + 90000 1602.1414 -1033.8549 2503.0167 2.2938491 1483.6836 13.997396 -4.1314183 -61.834752 -59.399028 -78.179701 -20.638304 -19.600863 -37.981641 17.76687 37.011597 33.88927 205.62073 205.41917 228.33213 53.91095 53.938975 76.889415 51.781147 28.84714 28.785177 + 91500 1458.1565 -1032.7187 -2416.3228 2.2938491 1483.9156 12.199533 -4.1313328 -61.830918 -59.389339 -78.087596 -20.663575 -19.635968 -37.9328 17.778487 36.817429 33.982639 205.59916 205.39607 228.32914 53.905076 53.935346 76.884186 51.776423 28.848043 28.785936 + 93000 1454.8394 -1030.027 -493.84012 2.2938491 1484.0092 13.832704 -4.1313562 -61.933104 -59.463525 -78.079358 -20.736375 -19.64228 -37.930735 17.924716 36.752048 33.986908 205.59355 205.40558 228.32994 53.904232 53.93739 76.887129 51.775542 28.846351 28.787224 + 94500 1508.4334 -1033.9901 -221.88898 2.2938491 1484.091 12.794682 -4.1313135 -62.144828 -59.379968 -78.166238 -20.731099 -19.733309 -38.029093 17.903058 36.93376 33.951736 205.59565 205.42066 228.31535 53.903048 53.929841 76.883967 51.780593 28.843646 28.789042 + 96000 1435.4018 -1032.0513 -837.77365 2.2938491 1484.0833 7.9431977 -4.1312854 -62.161271 -59.087059 -78.048014 -20.763715 -19.778166 -38.110551 17.869528 37.025504 33.735022 205.59557 205.40066 228.29682 53.898724 53.928029 76.879363 51.784096 28.846399 28.787302 + 97500 1434.7948 -1031.7595 -263.87888 2.2938491 1484.184 5.0697285 -4.1312718 -62.434779 -59.236851 -77.973589 -20.824298 -19.760137 -38.058757 18.001233 37.03096 33.747317 205.59268 205.40708 228.26499 53.894896 53.921607 76.881015 51.792889 28.838119 28.785664 + 99000 1450.5436 -1031.5 611.32137 2.2938491 1484.1818 11.164467 -4.1313269 -62.14934 -59.225941 -77.916029 -20.836608 -19.812387 -38.1072 17.819714 36.975313 33.85795 205.59477 205.40146 228.27121 53.895862 53.925296 76.883276 51.789768 28.834324 28.780383 + 100500 1522.7198 -1035.316 673.02028 2.2938491 1483.924 14.770979 -4.1313998 -61.952199 -59.176774 -78.058093 -20.773756 -19.849351 -38.133095 17.758165 36.970328 33.895787 205.60275 205.41803 228.27488 53.900547 53.92974 76.889801 51.791893 28.833483 28.780826 + 102000 1396.8171 -1027.9134 -207.15322 2.2938491 1483.8866 13.845115 -4.131394 -62.448198 -59.199854 -78.110692 -20.817828 -19.807528 -38.078445 17.780341 37.344783 33.836081 205.61973 205.39758 228.28266 53.90079 53.931859 76.887913 51.790749 28.836629 28.781947 + 103500 1472.1788 -1029.4181 762.63376 2.2938491 1483.8947 15.505274 -4.1313682 -62.131372 -59.09162 -77.878266 -20.802946 -19.801366 -38.042013 17.75865 37.083648 33.816336 205.6173 205.40653 228.2834 53.898814 53.930947 76.887029 51.788617 28.833865 28.789805 + 105000 1503.1232 -1037.9155 32.911002 2.2938491 1484.0115 18.312899 -4.1313492 -61.850281 -59.173646 -77.780905 -20.766543 -19.80456 -38.056659 17.618946 36.913935 33.956345 205.61292 205.41536 228.27204 53.896797 53.929029 76.887643 51.788849 28.831977 28.791262 + 106500 1487.1802 -1031.172 700.88245 2.2938491 1483.9059 13.173434 -4.1313257 -61.853836 -59.408848 -77.761604 -20.816462 -19.791005 -38.002197 17.744268 36.708999 34.180898 205.62698 205.40609 228.23047 53.890921 53.92518 76.886216 51.792653 28.83152 28.792113 + 108000 1504.3333 -1030.3505 1461.7892 2.2938491 1483.8805 20.533387 -4.1313742 -61.926836 -59.459333 -77.749556 -20.814326 -19.759901 -37.907569 17.773778 36.749722 34.171547 205.66173 205.41504 228.22234 53.889577 53.927467 76.898542 51.798943 28.83036 28.787497 + 109500 1592.8622 -1035.2131 419.02794 2.2938491 1484.0778 30.766774 -4.1313649 -61.734002 -59.658763 -77.82269 -20.806811 -19.773625 -37.973919 17.619308 36.706643 34.358658 205.65769 205.41704 228.2493 53.890731 53.928783 76.897454 51.796273 28.82919 28.785801 + 111000 1507.722 -1035.2534 592.2669 2.2938491 1484.0233 33.156495 -4.1314069 -61.783184 -59.501036 -78.074175 -20.797779 -19.682014 -37.847979 17.553061 36.891064 34.358699 205.6627 205.41817 228.26113 53.889483 53.930636 76.895778 51.789974 28.827996 28.780954 + 112500 1432.9551 -1032.7707 -15.239958 2.2938491 1483.9742 35.59381 -4.1314184 -61.507131 -59.290684 -77.787088 -20.727631 -19.690954 -37.783536 17.407154 36.698941 34.219947 205.6678 205.44108 228.26664 53.893044 53.929057 76.903588 51.795682 28.82942 28.781711 + 114000 1436.8797 -1028.7627 -1161.3927 2.2938491 1484.1263 31.903439 -4.1313469 -61.418746 -59.172101 -77.685859 -20.695744 -19.703578 -37.795384 17.385911 36.674648 34.145758 205.65596 205.41122 228.26491 53.887714 53.925701 76.894254 51.791608 28.830777 28.781108 + 115500 1595.1258 -1035.5386 1919.7361 2.2938491 1484.0997 30.519524 -4.131328 -61.409173 -59.456508 -77.797955 -20.754065 -19.71263 -37.83881 17.497541 36.597483 34.364294 205.65441 205.41253 228.28272 53.889654 53.927582 76.894017 51.794633 28.831442 28.780926 + 117000 1418.4843 -1032.1956 -907.28242 2.2938491 1484.0073 31.480517 -4.1313696 -61.157677 -59.421763 -77.943122 -20.684576 -19.686591 -37.812187 17.286082 36.545836 34.547678 205.66721 205.42548 228.26256 53.889662 53.927779 76.89908 51.798055 28.826754 28.779095 + 118500 1484.1141 -1034.1351 517.01397 2.2938491 1484.1118 31.377301 -4.131331 -61.054098 -59.33186 -77.830895 -20.620684 -19.689951 -37.793727 17.222734 36.53154 34.463233 205.65443 205.41996 228.26492 53.885658 53.923735 76.897975 51.798925 28.823386 28.778545 + 120000 1497.8799 -1035.8765 -549.48577 2.2938491 1484.1809 33.035417 -4.131321 -61.09059 -59.21728 -77.824664 -20.588208 -19.688806 -37.732861 17.215723 36.602423 34.421116 205.64696 205.43618 228.26398 53.883131 53.919513 76.897051 51.796062 28.819848 28.777479 + 121500 1402.5922 -1028.9055 562.48449 2.2938491 1484.1515 35.922603 -4.1313608 -60.93877 -59.114446 -77.738114 -20.551424 -19.696765 -37.751895 17.170055 36.481798 34.423407 205.66799 205.41946 228.26347 53.880372 53.920587 76.900879 51.798167 28.818443 28.770767 + 123000 1524.1528 -1035.1059 -424.65119 2.2938491 1484.295 40.029736 -4.1313466 -60.781641 -59.01533 -77.375963 -20.553224 -19.706938 -37.764217 17.246809 36.266166 34.297591 205.66436 205.43259 228.26198 53.883778 53.919894 76.904506 51.798237 28.817051 28.773724 + 124500 1564.311 -1035.1047 -349.8949 2.2938491 1484.384 47.679966 -4.1313772 -60.877314 -59.063051 -77.335194 -20.620027 -19.672739 -37.72847 17.270228 36.205966 34.360926 205.6777 205.41446 228.28078 53.885663 53.927774 76.907098 51.796176 28.819655 28.773355 + 126000 1568.7131 -1030.5456 66.143968 2.2938491 1484.4583 46.595891 -4.1313739 -61.098756 -59.17178 -77.359369 -20.649197 -19.623911 -37.62936 17.390302 36.3497 34.301017 205.69628 205.39158 228.29603 53.881964 53.931859 76.908068 51.795444 28.823842 28.773377 + 127500 1522.902 -1034.6797 557.83987 2.2938491 1484.4873 46.109186 -4.1313816 -61.218121 -59.241213 -77.497159 -20.683461 -19.646391 -37.633775 17.453889 36.414329 34.34605 205.69448 205.38822 228.29853 53.882447 53.933807 76.908316 51.794212 28.82683 28.775227 + 129000 1417.0886 -1032.5298 -1680.2016 2.2938491 1484.4653 40.946037 -4.1313774 -61.048884 -59.12431 -77.477282 -20.701818 -19.657678 -37.687188 17.397717 36.326694 34.325431 205.6932 205.39869 228.29846 53.880205 53.933304 76.912307 51.798328 28.830379 28.77857 + 130500 1401.7951 -1036.6411 26.695286 2.2938491 1484.4844 44.131888 -4.1314239 -61.117343 -59.237642 -77.593874 -20.781173 -19.73396 -37.832429 17.443658 36.387562 34.381635 205.69835 205.39723 228.32466 53.882413 53.936663 76.913325 51.79449 28.830579 28.778167 + 132000 1531.9262 -1035.8104 -454.66105 2.2938491 1484.3758 46.047235 -4.1314014 -61.07594 -59.225306 -77.729113 -20.768642 -19.733333 -37.777506 17.420967 36.47223 34.385127 205.68983 205.38043 228.33612 53.885006 53.938619 76.911181 51.789558 28.829624 28.777212 + 133500 1612.7407 -1031.8896 2838.8952 2.2938491 1484.4517 53.558624 -4.1314349 -60.948011 -59.226556 -77.561421 -20.78053 -19.732381 -37.839562 17.519266 36.367588 34.233895 205.69367 205.39136 228.3517 53.889832 53.941096 76.915648 51.789313 28.827286 28.776749 + 135000 1521.2087 -1033.4591 454.13729 2.2938491 1484.6363 55.80391 -4.1314325 -60.904929 -59.084186 -77.415336 -20.801037 -19.789548 -37.90064 17.531914 36.341868 34.115463 205.70545 205.39036 228.34484 53.888458 53.942885 76.917508 51.793598 28.830605 28.776034 + 136500 1531.5313 -1029.192 -717.1576 2.2938491 1484.6311 53.239475 -4.1314096 -61.027395 -59.012111 -77.371106 -20.818787 -19.9047 -38.057191 17.612667 36.375737 34.04232 205.68794 205.3873 228.356 53.891374 53.93841 76.911384 51.794044 28.831328 28.779846 + 138000 1530.7438 -1028.2335 1689.2135 2.2938491 1484.6075 52.143612 -4.1314161 -61.12225 -59.232618 -77.513771 -20.879734 -19.909162 -38.039018 17.765502 36.368631 34.107766 205.69465 205.39912 228.35878 53.894777 53.938897 76.913448 51.79675 28.831845 28.783101 + 139500 1637.3089 -1033.3344 1279.2929 2.2938491 1484.5451 52.631612 -4.1314254 -60.987574 -59.198693 -77.321359 -20.840503 -19.924083 -38.073917 17.78274 36.235023 34.064281 205.69359 205.40275 228.35583 53.893671 53.938344 76.909193 51.792251 28.829041 28.782395 + 141000 1489.0608 -1032.3058 1630.2026 2.2938491 1484.5653 49.12813 -4.1313885 -61.025436 -59.343134 -77.276572 -20.893744 -19.87512 -37.9684 17.904117 36.179799 34.025526 205.68531 205.42163 228.34735 53.891768 53.934366 76.909747 51.799411 28.82674 28.785944 + 142500 1578.7401 -1035.29 1154.49 2.2938491 1484.5937 49.103153 -4.1313871 -60.863339 -59.237185 -77.046737 -20.811081 -19.873031 -37.946797 17.806836 36.080391 33.980261 205.69437 205.4207 228.33312 53.891932 53.935828 76.911113 51.803913 28.828613 28.785842 + 144000 1450.4093 -1035.2433 -177.69606 2.2938491 1484.587 48.225811 -4.1313584 -60.925647 -59.30677 -76.905222 -20.857836 -19.841015 -37.899693 17.933226 36.013941 33.954159 205.7068 205.42335 228.31997 53.892806 53.933528 76.910976 51.803519 28.829119 28.788464 + 145500 1553.7494 -1031.7228 1768.5289 2.2938491 1484.5987 48.613816 -4.1313218 -60.76498 -59.357706 -76.820116 -20.79664 -19.812954 -37.873856 17.817969 35.901431 34.099361 205.69876 205.42559 228.33035 53.891956 53.934012 76.912003 51.805667 28.830704 28.78828 + 147000 1427.5502 -1035.2127 -1772.6396 2.2938491 1484.5609 50.275011 -4.1313451 -60.622887 -59.435918 -76.850232 -20.781814 -19.846275 -37.890749 17.724827 35.868632 34.165617 205.69421 205.42648 228.33897 53.892297 53.934264 76.913069 51.80338 28.828495 28.786603 + 148500 1608.7361 -1030.9261 451.16203 2.2938491 1484.4401 50.162687 -4.131379 -60.560399 -59.476008 -76.763265 -20.796966 -19.804133 -37.880651 17.785517 35.76907 34.202308 205.67787 205.43374 228.34493 53.895658 53.935739 76.913237 51.801627 28.828117 28.787591 + 150000 1486.3037 -1036.443 -105.94161 2.2938491 1484.4624 52.260709 -4.1314072 -60.74209 -59.520192 -76.696782 -20.839851 -19.824707 -37.891713 17.919993 35.842215 34.099312 205.68103 205.43091 228.35418 53.898265 53.938553 76.916873 51.802187 28.82698 28.786077 +Loop time of 269.384 on 1 procs for 150000 steps with 250 atoms + +Performance: 36.852 ns/day, 0.651 hours/ns, 556.825 timesteps/s, 139.206 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 218.46 | 218.46 | 218.46 | 0.0 | 81.10 +Neigh | 5.5642 | 5.5642 | 5.5642 | 0.0 | 2.07 +Comm | 1.5595 | 1.5595 | 1.5595 | 0.0 | 0.58 +Output | 0.017053 | 0.017053 | 0.017053 | 0.0 | 0.01 +Modify | 43.092 | 43.092 | 43.092 | 0.0 | 16.00 +Other | | 0.6867 | | | 0.25 + +Nlocal: 250 ave 250 max 250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 775 ave 775 max 775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 6680 ave 6680 max 6680 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 6680 +Ave neighs/atom = 26.72 +Neighbor list builds = 2628 +Dangerous builds = 0 + +# Output final values + +include final_output.in +# Average moduli for cubic crystals + +variable C11cubic equal (${C11}+${C22}+${C33})/3.0 +variable C11cubic equal (148.954555702137+${C22}+${C33})/3.0 +variable C11cubic equal (148.954555702137+149.926340161663+${C33})/3.0 +variable C11cubic equal (148.954555702137+149.926340161663+155.673015048317)/3.0 +variable C12cubic equal (${C12}+${C13}+${C23})/3.0 +variable C12cubic equal (69.7221801323199+${C13}+${C23})/3.0 +variable C12cubic equal (69.7221801323199+64.6691952711806+${C23})/3.0 +variable C12cubic equal (69.7221801323199+64.6691952711806+62.8853886015143)/3.0 +variable C44cubic equal (${C44}+${C55}+${C66})/3.0 +variable C44cubic equal (35.0662230991614+${C55}+${C66})/3.0 +variable C44cubic equal (35.0662230991614+36.1216541734018+${C66})/3.0 +variable C44cubic equal (35.0662230991614+36.1216541734018+41.0329688586967)/3.0 + +variable bulkmodulus equal (${C11cubic}+2*${C12cubic})/3.0 +variable bulkmodulus equal (151.517970304039+2*${C12cubic})/3.0 +variable bulkmodulus equal (151.517970304039+2*65.7589213350049)/3.0 +variable shearmodulus1 equal ${C44cubic} +variable shearmodulus1 equal 37.40694871042 +variable shearmodulus2 equal (${C11cubic}-${C12cubic})/2.0 +variable shearmodulus2 equal (151.517970304039-${C12cubic})/2.0 +variable shearmodulus2 equal (151.517970304039-65.7589213350049)/2.0 +variable poissonratio equal 1.0/(1.0+${C11cubic}/${C12cubic}) +variable poissonratio equal 1.0/(1.0+151.517970304039/${C12cubic}) +variable poissonratio equal 1.0/(1.0+151.517970304039/65.7589213350049) + +# For Stillinger-Weber silicon, the analytical results +# are known to be (E. R. Cowley, 1988): +# C11 = 151.4 GPa +# C12 = 76.4 GPa +# C44 = 56.4 GPa + +#print "=========================================" +#print "Components of the Elastic Constant Tensor" +#print "=========================================" + +print "Elastic Constant C11 = ${C11} ${cunits}" +Elastic Constant C11 = 148.954555702137 GPa +print "Elastic Constant C22 = ${C22} ${cunits}" +Elastic Constant C22 = 149.926340161663 GPa +print "Elastic Constant C33 = ${C33} ${cunits}" +Elastic Constant C33 = 155.673015048317 GPa + +print "Elastic Constant C12 = ${C12} ${cunits}" +Elastic Constant C12 = 69.7221801323199 GPa +print "Elastic Constant C13 = ${C13} ${cunits}" +Elastic Constant C13 = 64.6691952711806 GPa +print "Elastic Constant C23 = ${C23} ${cunits}" +Elastic Constant C23 = 62.8853886015143 GPa + +print "Elastic Constant C44 = ${C44} ${cunits}" +Elastic Constant C44 = 35.0662230991614 GPa +print "Elastic Constant C55 = ${C55} ${cunits}" +Elastic Constant C55 = 36.1216541734018 GPa +print "Elastic Constant C66 = ${C66} ${cunits}" +Elastic Constant C66 = 41.0329688586967 GPa + +print "Elastic Constant C14 = ${C14} ${cunits}" +Elastic Constant C14 = 6.14047742579717 GPa +print "Elastic Constant C15 = ${C15} ${cunits}" +Elastic Constant C15 = -0.123040801028632 GPa +print "Elastic Constant C16 = ${C16} ${cunits}" +Elastic Constant C16 = -0.303471517228473 GPa + +print "Elastic Constant C24 = ${C24} ${cunits}" +Elastic Constant C24 = -6.78251264997486 GPa +print "Elastic Constant C25 = ${C25} ${cunits}" +Elastic Constant C25 = 0.0388246084763399 GPa +print "Elastic Constant C26 = ${C26} ${cunits}" +Elastic Constant C26 = 0.580537991508875 GPa + +print "Elastic Constant C34 = ${C34} ${cunits}" +Elastic Constant C34 = 0.876358118422116 GPa +print "Elastic Constant C35 = ${C35} ${cunits}" +Elastic Constant C35 = 0.508259928395758 GPa +print "Elastic Constant C36 = ${C36} ${cunits}" +Elastic Constant C36 = 0.35211074288891 GPa + +print "Elastic Constant C45 = ${C45} ${cunits}" +Elastic Constant C45 = -0.102620837341657 GPa +print "Elastic Constant C46 = ${C46} ${cunits}" +Elastic Constant C46 = -0.349199224927754 GPa +print "Elastic Constant C56 = ${C56} ${cunits}" +Elastic Constant C56 = 7.55182057636239 GPa + +print "=========================================" +========================================= +print "Average properties for a cubic crystal" +Average properties for a cubic crystal +print "=========================================" +========================================= + +print "Bulk Modulus = ${bulkmodulus} ${cunits}" +Bulk Modulus = 94.3452709913496 GPa +print "Shear Modulus 1 = ${shearmodulus1} ${cunits}" +Shear Modulus 1 = 37.40694871042 GPa +print "Shear Modulus 2 = ${shearmodulus2} ${cunits}" +Shear Modulus 2 = 42.8795244845171 GPa +print "Poisson Ratio = ${poissonratio}" +Poisson Ratio = 0.302650322539814 + +# summarize sampling protocol + +variable tmp equal atoms +print "Number of atoms = ${tmp}" +Number of atoms = 250 +print "Stress sampling interval = ${nevery}" +Stress sampling interval = 10 +variable tmp equal ${nrun}/${nevery} +variable tmp equal 150000/${nevery} +variable tmp equal 150000/10 +print "Stress sample count = ${tmp}" +Stress sample count = 15000 +print "Born sampling interval = ${neveryborn}" +Born sampling interval = 100 +variable tmp equal ${nrun}/${neveryborn} +variable tmp equal 150000/${neveryborn} +variable tmp equal 150000/100 +print "Born sample count = ${tmp}" +Born sample count = 1500 +Total wall time: 0:04:52 From 90fc2f63c0ed294d31cd4c055f24f4be1b17f587 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 16 Sep 2025 23:28:50 -0400 Subject: [PATCH 051/604] fix memory leak --- src/output.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/output.cpp b/src/output.cpp index 61de89366e2..f32852d98be 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -758,6 +758,9 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i indent.resize(--json_level*tab, ' '); fprintf(fp, "%s}\n", indent.c_str()); } + #if !defined(MPI_STUBS) + MPI_Type_free(&ParticleStructType); + #endif } /* ---------------------------------------------------------------------- From 6e7e07dd908887b419bdcf91d806f5f239daf7b3 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Wed, 24 Sep 2025 20:35:55 -0700 Subject: [PATCH 052/604] Add mask loop for atoms --- src/REPLICA/fix_pimd_langevin.cpp | 306 +++++++++++++++++++----------- 1 file changed, 199 insertions(+), 107 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 453ac637a08..d5e39f49b2e 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -514,10 +514,15 @@ void FixPIMDLangevin::init() void FixPIMDLangevin::setup(int vflag) { int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; imageint *image = atom->image; if (mapflag) { - for (int i = 0; i < nlocal; i++) domain->unmap(x[i], image[i]); + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap(x[i], image[i]); + } + } } if (method == NMPIMD) { @@ -550,7 +555,11 @@ void FixPIMDLangevin::setup(int vflag) nmpimd_transform(bufbeads, x, M_xp2x[universe->iworld]); } if (mapflag) { - for (int i = 0; i < nlocal; i++) domain->unmap_inv(x[i], image[i]); + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap_inv(x[i], image[i]); + } + } } post_force(vflag); @@ -565,10 +574,15 @@ void FixPIMDLangevin::setup(int vflag) void FixPIMDLangevin::initial_integrate(int /*vflag*/) { int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; imageint *image = atom->image; if (mapflag) { - for (int i = 0; i < nlocal; i++) domain->unmap(x[i], image[i]); + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap(x[i], image[i]); + } + } } if (integrator == OBABO) { if (tstat_flag) { @@ -663,7 +677,11 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) } if (mapflag) { - for (int i = 0; i < nlocal; i++) { domain->unmap_inv(x[i], image[i]); } + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap_inv(x[i], image[i]); + } + } } } @@ -702,6 +720,7 @@ void FixPIMDLangevin::prepare_coordinates() void FixPIMDLangevin::post_force(int /*flag*/) { int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; double **f = atom->f; imageint *image = atom->image; @@ -710,17 +729,25 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (atom->nmax > maxunwrap) reallocate_x_unwrap(); if (atom->nmax > maxxc) reallocate_xc(); for (int i = 0; i < nlocal; i++) { - x_unwrap[i][0] = x[i][0]; - x_unwrap[i][1] = x[i][1]; - x_unwrap[i][2] = x[i][2]; + if (mask[i] & groupbit) { + x_unwrap[i][0] = x[i][0]; + x_unwrap[i][1] = x[i][1]; + x_unwrap[i][2] = x[i][2]; + } } if (mapflag) { - for (int i = 0; i < nlocal; i++) { domain->unmap(x_unwrap[i], image[i]); } + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap(x_unwrap[i], image[i]); + } + } } for (int i = 0; i < nlocal; i++) { - xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; - xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; - xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; + if (mask[i] & groupbit) { + xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; + xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; + xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; + } } compute_vir(); @@ -730,14 +757,22 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (method == PIMD) { if (mapflag) { - for (int i = 0; i < nlocal; i++) { domain->unmap(x[i], image[i]); } + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap(x[i], image[i]); + } + } } prepare_coordinates(); spring_force(); compute_spring_energy(); compute_t_prim(); if (mapflag) { - for (int i = 0; i < nlocal; i++) { domain->unmap_inv(x[i], image[i]); } + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + domain->unmap_inv(x[i], image[i]); + } + } } } compute_pote(); @@ -768,12 +803,15 @@ void FixPIMDLangevin::end_of_step() void FixPIMDLangevin::collect_xc() { int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; tagint *tag = atom->tag; if (ireplica == 0) { if (cmode == SINGLE_PROC) { for (int i = 0; i < nlocal; i++) { - xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; + if (mask[i] & groupbit) { + xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; + } } } else if (cmode == MULTI_PROC) { for (int i = 0; i < ntotal; i++) { @@ -783,9 +821,11 @@ void FixPIMDLangevin::collect_xc() const double sqrtnp = sqrt((double) np); for (int i = 0; i < nlocal; i++) { - xcall[3 * (tag[i] - 1) + 0] = x[i][0] / sqrtnp; - xcall[3 * (tag[i] - 1) + 1] = x[i][1] / sqrtnp; - xcall[3 * (tag[i] - 1) + 2] = x[i][2] / sqrtnp; + if (mask[i] & groupbit) { + xcall[3 * (tag[i] - 1) + 0] = x[i][0] / sqrtnp; + xcall[3 * (tag[i] - 1) + 1] = x[i][1] / sqrtnp; + xcall[3 * (tag[i] - 1) + 2] = x[i][2] / sqrtnp; + } } if (cmode == MULTI_PROC) { @@ -802,16 +842,19 @@ void FixPIMDLangevin::b_step() // used for both NMPIMD and PIMD // For NMPIMD, force only includes the contribution of external potential. // For PIMD, force includes the contributions of external potential and spring force. - int n = atom->nlocal; + int nlocal = atom->nlocal; + int *mask = atom->mask; int *type = atom->type; double **v = atom->v; double **f = atom->f; - for (int i = 0; i < n; i++) { - double dtfm = dtf / mass[type[i]]; - v[i][0] += dtfm * f[i][0]; - v[i][1] += dtfm * f[i][1]; - v[i][2] += dtfm * f[i][2]; + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + double dtfm = dtf / mass[type[i]]; + v[i][0] += dtfm * f[i][0]; + v[i][1] += dtfm * f[i][1]; + v[i][2] += dtfm * f[i][2]; + } } } @@ -822,6 +865,7 @@ void FixPIMDLangevin::qc_step() // used for NMPIMD // evolve the centroid mode int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; double **v = atom->v; double oldlo, oldhi; @@ -829,9 +873,11 @@ void FixPIMDLangevin::qc_step() if (!pstat_flag) { if (universe->iworld == 0) { for (int i = 0; i < nlocal; i++) { - x[i][0] += dtv * v[i][0]; - x[i][1] += dtv * v[i][1]; - x[i][2] += dtv * v[i][2]; + if (mask[i] & groupbit) { + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + } } } } else { @@ -847,12 +893,14 @@ void FixPIMDLangevin::qc_step() } if (barostat == BZP) { for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) { - if (p_flag[j]) { - x[i][j] = expq[j] * x[i][j] + (expq[j] - expp[j]) / 2. / vw[j] * v[i][j]; - v[i][j] = expp[j] * v[i][j]; - } else { - x[i][j] += dtv * v[i][j]; + if (mask[i] & groupbit) { + for (int j = 0; j < 3; j++) { + if (p_flag[j]) { + x[i][j] = expq[j] * x[i][j] + (expq[j] - expp[j]) / 2. / vw[j] * v[i][j]; + v[i][j] = expp[j] * v[i][j]; + } else { + x[i][j] += dtv * v[i][j]; + } } } } @@ -887,31 +935,34 @@ void FixPIMDLangevin::a_step() { // used for NMPIMD // use analytical solution of harmonic oscillator to evolve the non-centroid modes - int n = atom->nlocal; + int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; double **v = atom->v; double x0, x1, x2, v0, v1, v2; // three components of x[i] and v[i] if (universe->iworld != 0) { - for (int i = 0; i < n; i++) { - x0 = x[i][0]; - x1 = x[i][1]; - x2 = x[i][2]; - v0 = v[i][0]; - v1 = v[i][1]; - v2 = v[i][2]; - x[i][0] = Lan_c[universe->iworld] * x0 + - 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v0; - x[i][1] = Lan_c[universe->iworld] * x1 + - 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v1; - x[i][2] = Lan_c[universe->iworld] * x2 + - 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v2; - v[i][0] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x0 + - Lan_c[universe->iworld] * v0; - v[i][1] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x1 + - Lan_c[universe->iworld] * v1; - v[i][2] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x2 + - Lan_c[universe->iworld] * v2; + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + x0 = x[i][0]; + x1 = x[i][1]; + x2 = x[i][2]; + v0 = v[i][0]; + v1 = v[i][1]; + v2 = v[i][2]; + x[i][0] = Lan_c[universe->iworld] * x0 + + 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v0; + x[i][1] = Lan_c[universe->iworld] * x1 + + 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v1; + x[i][2] = Lan_c[universe->iworld] * x2 + + 1.0 / _omega_k[universe->iworld] * Lan_s[universe->iworld] * v2; + v[i][0] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x0 + + Lan_c[universe->iworld] * v0; + v[i][1] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x1 + + Lan_c[universe->iworld] * v1; + v[i][2] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x2 + + Lan_c[universe->iworld] * v2; + } } } } @@ -923,14 +974,17 @@ void FixPIMDLangevin::q_step() // used for PIMD // evolve all beads int nlocal = atom->nlocal; + int *mask = atom->mask; double **x = atom->x; double **v = atom->v; if (!pstat_flag) { for (int i = 0; i < nlocal; i++) { - x[i][0] += dtv * v[i][0]; - x[i][1] += dtv * v[i][1]; - x[i][2] += dtv * v[i][2]; + if (mask[i] & groupbit) { + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + } } } } @@ -957,6 +1011,7 @@ void FixPIMDLangevin::baro_init() void FixPIMDLangevin::press_v_step() { int nlocal = atom->nlocal; + int *mask = atom->mask; double **f = atom->f; double **v = atom->v; int *type = atom->type; @@ -968,8 +1023,10 @@ void FixPIMDLangevin::press_v_step() if (universe->iworld == 0) { double dvw_proc = 0.0, dvw = 0.0; for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) { - dvw_proc += dtv2 * f[i][j] * v[i][j] / W + dtv3 * f[i][j] * f[i][j] / mass[type[i]] / W; + if (mask[i] & groupbit) { + for (int j = 0; j < 3; j++) { + dvw_proc += dtv2 * f[i][j] * v[i][j] / W + dtv3 * f[i][j] * f[i][j] / mass[type[i]] / W; + } } } MPI_Allreduce(&dvw_proc, &dvw, 1, MPI_DOUBLE, MPI_SUM, world); @@ -990,8 +1047,10 @@ void FixPIMDLangevin::press_v_step() if (universe->iworld == 0) { double dvw_proc = 0.0, dvw = 0.0; for (int i = 0; i < nlocal; i++) { - dvw_proc += - dtv2 * f[i][ii] * v[i][ii] / W + dtv3 * f[i][ii] * f[i][ii] / mass[type[i]] / W; + if (mask[i] & groupbit) { + dvw_proc += + dtv2 * f[i][ii] * v[i][ii] / W + dtv3 * f[i][ii] * f[i][ii] / mass[type[i]] / W; + } } MPI_Allreduce(&dvw_proc, &dvw, 1, MPI_DOUBLE, MPI_SUM, world); vw[ii] += dvw; @@ -1110,26 +1169,31 @@ void FixPIMDLangevin::langevin_init() void FixPIMDLangevin::o_step() { int nlocal = atom->nlocal; + int *mask = atom->mask; int *type = atom->type; double beta_np = 1.0 / force->boltz / Lan_temp * inverse_np * force->mvv2e; if (thermostat == PILE_L) { if (method == NMPIMD) { for (int i = 0; i < nlocal; i++) { - atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + - c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + if (mask[i] & groupbit) { + atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + } } } else if (method == PIMD) { for (int i = 0; i < nlocal; i++) { - atom->v[i][0] = - c1 * atom->v[i][0] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][1] = - c1 * atom->v[i][1] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - atom->v[i][2] = - c1 * atom->v[i][2] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + if (mask[i] & groupbit) { + atom->v[i][0] = + c1 * atom->v[i][0] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][1] = + c1 * atom->v[i][1] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + atom->v[i][2] = + c1 * atom->v[i][2] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); + } } } } @@ -1206,15 +1270,19 @@ void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vecto for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } } } else if (cmode == MULTI_PROC) { - int n = atom->nlocal; + int nlocal = atom->nlocal; int m = 0; + int *mask = atom->mask; - for (int i = 0; i < n; i++) - for (int d = 0; d < 3; d++) { - des[i][d] = 0.0; - for (int j = 0; j < np; j++) { des[i][d] += (src[j][m] * vector[j]); } - m++; + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + for (int d = 0; d < 3; d++) { + des[i][d] = 0.0; + for (int j = 0; j < np; j++) { des[i][d] += (src[j][m] * vector[j]); } + m++; + } } + } } } @@ -1335,25 +1403,30 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) MPI_Status statuses[2]; if (atom->nmax > maxlocal) reallocate(); int nlocal = atom->nlocal; + int *mask = atom->mask; tagint *tag = atom->tag; int i, m; // copy local values for (i = 0; i < nlocal; i++) { - bufbeads[ireplica][3 * i + 0] = ptr[i][0]; - bufbeads[ireplica][3 * i + 1] = ptr[i][1]; - bufbeads[ireplica][3 * i + 2] = ptr[i][2]; + if (mask[i] & groupbit) { + bufbeads[ireplica][3 * i + 0] = ptr[i][0]; + bufbeads[ireplica][3 * i + 1] = ptr[i][1]; + bufbeads[ireplica][3 * i + 2] = ptr[i][2]; + } } // communicate values from the other beads if (cmode == SINGLE_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - tagint tagtmp = atom->tag[i]; - bufsorted[tagtmp - 1][0] = ptr[i][0]; - bufsorted[tagtmp - 1][1] = ptr[i][1]; - bufsorted[tagtmp - 1][2] = ptr[i][2]; - m++; + if (mask[i] & groupbit) { + tagint tagtmp = atom->tag[i]; + bufsorted[tagtmp - 1][0] = ptr[i][0]; + bufsorted[tagtmp - 1][1] = ptr[i][1]; + bufsorted[tagtmp - 1][2] = ptr[i][2]; + m++; + } } MPI_Allgather(&m, 1, MPI_INT, counts, 1, MPI_INT, universe->uworld); for (i = 0; i < nreplica; i++) counts[i] *= 3; @@ -1364,11 +1437,13 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) } else if (cmode == MULTI_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - tagsend[m] = tag[i]; - bufsend[m][0] = ptr[i][0]; - bufsend[m][1] = ptr[i][1]; - bufsend[m][2] = ptr[i][2]; - m++; + if (mask[i] & groupbit) { + tagsend[m] = tag[i]; + bufsend[m][0] = ptr[i][0]; + bufsend[m][1] = ptr[i][1]; + bufsend[m][2] = ptr[i][2]; + m++; + } } MPI_Gather(&m, 1, MPI_INT, counts, 1, MPI_INT, 0, world); displacements[0] = 0; @@ -1448,10 +1523,13 @@ void FixPIMDLangevin::remove_com_motion() void FixPIMDLangevin::compute_xf_vir() { int nlocal = atom->nlocal; + int *mask = atom->mask; double xf = 0.0; vir_ = 0.0; for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) { xf += x_unwrap[i][j] * atom->f[i][j]; } + if (mask[i] & groupbit) { + for (int j = 0; j < 3; j++) { xf += x_unwrap[i][j] * atom->f[i][j]; } + } } MPI_Allreduce(&xf, &vir_, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); } @@ -1461,21 +1539,26 @@ void FixPIMDLangevin::compute_xf_vir() void FixPIMDLangevin::compute_cvir() { int nlocal = atom->nlocal; + int *mask = atom->mask; double xcf = 0.0; centroid_vir = 0.0; for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) { xcf += (x_unwrap[i][j] - xc[i][j]) * atom->f[i][j]; } + if (mask[i] & groupbit) { + for (int j = 0; j < 3; j++) { xcf += (x_unwrap[i][j] - xc[i][j]) * atom->f[i][j]; } + } } MPI_Allreduce(&xcf, ¢roid_vir, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); if (pstyle == ANISO) { for (int i = 0; i < 6; i++) c_vir_tensor[i] = 0.0; for (int i = 0; i < nlocal; i++) { - c_vir_tensor[0] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][0]; - c_vir_tensor[1] += (x_unwrap[i][1] - xc[i][1]) * atom->f[i][1]; - c_vir_tensor[2] += (x_unwrap[i][2] - xc[i][2]) * atom->f[i][2]; - c_vir_tensor[3] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][1]; - c_vir_tensor[4] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][2]; - c_vir_tensor[5] += (x_unwrap[i][1] - xc[i][1]) * atom->f[i][2]; + if (mask[i] & groupbit) { + c_vir_tensor[0] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][0]; + c_vir_tensor[1] += (x_unwrap[i][1] - xc[i][1]) * atom->f[i][1]; + c_vir_tensor[2] += (x_unwrap[i][2] - xc[i][2]) * atom->f[i][2]; + c_vir_tensor[3] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][1]; + c_vir_tensor[4] += (x_unwrap[i][0] - xc[i][0]) * atom->f[i][2]; + c_vir_tensor[5] += (x_unwrap[i][1] - xc[i][1]) * atom->f[i][2]; + } } MPI_Allreduce(MPI_IN_PLACE, &c_vir_tensor, 6, MPI_DOUBLE, MPI_SUM, universe->uworld); } @@ -1504,17 +1587,20 @@ void FixPIMDLangevin::compute_vir() void FixPIMDLangevin::compute_stress_tensor() { int nlocal = atom->nlocal; + int *mask = atom->mask; int *type = atom->type; if (universe->iworld == 0) { double inv_volume = 1.0 / (domain->xprd * domain->yprd * domain->zprd); for (int i = 0; i < 6; i++) ke_tensor[i] = 0.0; for (int i = 0; i < nlocal; i++) { - ke_tensor[0] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][0] * force->mvv2e; - ke_tensor[1] += 0.5 * mass[type[i]] * atom->v[i][1] * atom->v[i][1] * force->mvv2e; - ke_tensor[2] += 0.5 * mass[type[i]] * atom->v[i][2] * atom->v[i][2] * force->mvv2e; - ke_tensor[3] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][1] * force->mvv2e; - ke_tensor[4] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][2] * force->mvv2e; - ke_tensor[5] += 0.5 * mass[type[i]] * atom->v[i][1] * atom->v[i][2] * force->mvv2e; + if (mask[i] & groupbit) { + ke_tensor[0] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][0] * force->mvv2e; + ke_tensor[1] += 0.5 * mass[type[i]] * atom->v[i][1] * atom->v[i][1] * force->mvv2e; + ke_tensor[2] += 0.5 * mass[type[i]] * atom->v[i][2] * atom->v[i][2] * force->mvv2e; + ke_tensor[3] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][1] * force->mvv2e; + ke_tensor[4] += 0.5 * mass[type[i]] * atom->v[i][0] * atom->v[i][2] * force->mvv2e; + ke_tensor[5] += 0.5 * mass[type[i]] * atom->v[i][1] * atom->v[i][2] * force->mvv2e; + } } MPI_Allreduce(MPI_IN_PLACE, &ke_tensor, 6, MPI_DOUBLE, MPI_SUM, world); for (int i = 0; i < 6; i++) { @@ -1532,9 +1618,12 @@ void FixPIMDLangevin::compute_totke() double kine = 0.0; totke = ke_bead = 0.0; int nlocal = atom->nlocal; + int *mask = atom->mask; int *type = atom->type; for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) { kine += 0.5 * mass[type[i]] * atom->v[i][j] * atom->v[i][j]; } + if (mask[i] & groupbit) { + for (int j = 0; j < 3; j++) { kine += 0.5 * mass[type[i]] * atom->v[i][j] * atom->v[i][j]; } + } } kine *= force->mvv2e; MPI_Allreduce(&kine, &ke_bead, 1, MPI_DOUBLE, MPI_SUM, world); @@ -1554,10 +1643,13 @@ void FixPIMDLangevin::compute_spring_energy() double *_mass = atom->mass; int *type = atom->type; int nlocal = atom->nlocal; + int *mask = atom->mask; for (int i = 0; i < nlocal; i++) { - spring_energy += 0.5 * _mass[type[i]] * fbond * lam[universe->iworld] * - (x[i][0] * x[i][0] + x[i][1] * x[i][1] + x[i][2] * x[i][2]); + if (mask[i] & groupbit) { + spring_energy += 0.5 * _mass[type[i]] * fbond * lam[universe->iworld] * + (x[i][0] * x[i][0] + x[i][1] * x[i][1] + x[i][2] * x[i][2]); + } } MPI_Allreduce(&spring_energy, &se_bead, 1, MPI_DOUBLE, MPI_SUM, world); MPI_Allreduce(&se_bead, &total_spring_energy, 1, MPI_DOUBLE, MPI_SUM, universe->uworld); From 920a97f87f39e799d61fae0d4d2dbf62cdce4a81 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Thu, 25 Sep 2025 10:25:32 -0700 Subject: [PATCH 053/604] Add mask loop for atoms and change natoms to group count --- src/REPLICA/fix_pimd_langevin.cpp | 46 +++++++++++++++++++------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index d5e39f49b2e..347e85d81db 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -815,7 +815,9 @@ void FixPIMDLangevin::collect_xc() } } else if (cmode == MULTI_PROC) { for (int i = 0; i < ntotal; i++) { - xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; + if (mask[i] & groupbit) { + xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; + } } } @@ -995,10 +997,10 @@ void FixPIMDLangevin::baro_init() { vw[0] = vw[1] = vw[2] = vw[3] = vw[4] = vw[5] = 0.0; if (pstyle == ISO) { - W = 3 * (atom->natoms) * tau_p * tau_p * np * kt; + W = 3 * (group->count(igroup)) * tau_p * tau_p * np * kt; } // consistent with the definition in i-Pi else if (pstyle == ANISO) { - W = atom->natoms * tau_p * tau_p * np * kt; + W = group->count(igroup) * tau_p * tau_p * np * kt; } Vcoeff = 1.0; std::string out = fmt::format("\nInitializing PIMD {:s} barostat...\n", Barostats[barostat]); @@ -1035,7 +1037,7 @@ void FixPIMDLangevin::press_v_step() MPI_Barrier(universe->uworld); MPI_Bcast(&vw[0], 1, MPI_DOUBLE, 0, universe->uworld); } else if (barostat == MTTK) { - double mtk_term1 = 2.0 / atom->natoms * totke / 3.0; + double mtk_term1 = 2.0 / group->count(igroup) * totke / 3.0; vw[0] += 0.5 * dtv * (volume * np * (p_md - p_hydro) + mtk_term1) / W; } } else if (pstyle == ANISO) { @@ -1257,17 +1259,23 @@ void FixPIMDLangevin::nmpimd_init() void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vector) { if (cmode == SINGLE_PROC) { + int *mask = atom->mask; + for (int i = 0; i < ntotal; i++) { - for (int d = 0; d < 3; d++) { - bufsorted[i][d] = 0.0; - for (int j = 0; j < nreplica; j++) { - bufsorted[i][d] += src[j * ntotal + i][d] * vector[j]; + if (mask[i] & groupbit) { + for (int d = 0; d < 3; d++) { + bufsorted[i][d] = 0.0; + for (int j = 0; j < nreplica; j++) { + bufsorted[i][d] += src[j * ntotal + i][d] * vector[j]; + } } } } for (int i = 0; i < ntotal; i++) { - tagint tagtmp = atom->tag[i]; - for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } + if (mask[i] & groupbit) { + tagint tagtmp = atom->tag[i]; + for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } + } } } else if (cmode == MULTI_PROC) { int nlocal = atom->nlocal; @@ -1467,11 +1475,13 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) MPI_Bcast(tagrecvall, ntotal, MPI_LMP_TAGINT, 0, world); MPI_Bcast(bufrecvall[0], 3 * ntotal, MPI_DOUBLE, 0, world); for (i = 0; i < ntotal; i++) { - m = atom->map(tagrecvall[i]); - if (m < 0 || m >= nlocal) continue; - bufbeads[modeindex[iplan]][3 * m + 0] = bufrecvall[i][0]; - bufbeads[modeindex[iplan]][3 * m + 1] = bufrecvall[i][1]; - bufbeads[modeindex[iplan]][3 * m + 2] = bufrecvall[i][2]; + if (mask[i] & groupbit) { + m = atom->map(tagrecvall[i]); + if (m < 0 || m >= nlocal) continue; + bufbeads[modeindex[iplan]][3 * m + 0] = bufrecvall[i][0]; + bufbeads[modeindex[iplan]][3 * m + 1] = bufrecvall[i][1]; + bufbeads[modeindex[iplan]][3 * m + 2] = bufrecvall[i][2]; + } } } } @@ -1690,7 +1700,7 @@ void FixPIMDLangevin::compute_tote() void FixPIMDLangevin::compute_t_prim() { - t_prim = 1.5 * atom->natoms * np * force->boltz * temp - total_spring_energy * inverse_np; + t_prim = 1.5 * group->count(igroup) * np * force->boltz * temp - total_spring_energy * inverse_np; } /* ---------------------------------------------------------------------- */ @@ -1698,7 +1708,7 @@ void FixPIMDLangevin::compute_t_prim() void FixPIMDLangevin::compute_t_vir() { t_vir = -0.5 * inverse_np * vir_; - t_cv = 1.5 * atom->natoms * force->boltz * temp - 0.5 * inverse_np * centroid_vir; + t_cv = 1.5 * group->count(igroup) * force->boltz * temp - 0.5 * inverse_np * centroid_vir; } /* ---------------------------------------------------------------------- */ @@ -1706,7 +1716,7 @@ void FixPIMDLangevin::compute_t_vir() void FixPIMDLangevin::compute_p_prim() { double inv_volume = 1.0 / (domain->xprd * domain->yprd * domain->zprd); - p_prim = atom->natoms * np * force->boltz * temp * inv_volume - + p_prim = group->count(igroup) * np * force->boltz * temp * inv_volume - 1.0 / 1.5 * inv_volume * total_spring_energy; p_prim *= force->nktv2p; } From dab31a8e5a62b7348bb9d03adddf3f642c064056 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 26 Sep 2025 15:11:22 -0600 Subject: [PATCH 054/604] From averaging thremal conductivity to thermal diffusivity in pair rheo --- src/RHEO/pair_rheo.cpp | 24 +++++++++++++++++++----- src/RHEO/pair_rheo.h | 1 + 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/RHEO/pair_rheo.cpp b/src/RHEO/pair_rheo.cpp index d5cf4f1c91c..e4c663a7080 100644 --- a/src/RHEO/pair_rheo.cpp +++ b/src/RHEO/pair_rheo.cpp @@ -27,6 +27,7 @@ #include "error.h" #include "fix_rheo.h" #include "fix_rheo_pressure.h" +#include "fix_rheo_thermal.h" #include "force.h" #include "info.h" #include "math_extra.h" @@ -80,8 +81,8 @@ void PairRHEO::compute(int eflag, int vflag) int pair_force_flag, pair_rho_flag, pair_avisc_flag; int fluidi, fluidj; double xtmp, ytmp, ztmp, wp, Ti, Tj, dT, cs_ave; - double rhoi, rhoj, rho0i, rho0j, voli, volj, Pi, Pj, etai, etaj, kappai, kappaj, csqi, csqj; - double eta_ave, kappa_ave, dT_prefactor; + double rhoi, rhoj, rho0i, rho0j, voli, volj, Pi, Pj, etai, etaj, kappai, kappaj, csqi, csqj, alphai, alphaj, cpi, cpj; + double eta_ave, alpha_ave, dT_prefactor; double mu, q, fp_prefactor, drho_damp, fmag, psi_ij, Fij; double *dWij, *dWji; double dx[3], du[3], dv[3], fv[3], dfp[3], fsolid[3], ft[3], vi[3], vj[3]; @@ -158,6 +159,7 @@ void PairRHEO::compute(int eflag, int vflag) if (thermal_flag) { kappai = conductivity[i]; Ti = temperature[i]; + cpi = fix_thermal->calc_cv(itype); } for (jj = 0; jj < jnum; jj++) { @@ -261,13 +263,16 @@ void PairRHEO::compute(int eflag, int vflag) // Thermal Evolution if (thermal_flag) { + cpj = fix_thermal->calc_cv(jtype); + alphai = kappai / (rho0i * cpi); + alphaj = kappaj / (rho0j * cpj); if (harmonic_means_flag) { - kappa_ave = 2.0 * kappai * kappaj / (kappai + kappaj); + alpha_ave = 2.0 * alphai * alphaj / (alphai + alphaj); } else { - kappa_ave = 0.5 * (kappai + kappaj); + alpha_ave = 0.5 * (alphai + alphaj); } dT_prefactor = - 2.0 * kappa_ave * (Ti - Tj) * rinv * rinv * voli * volj * 2.0 / (rhoi + rhoj); + 2.0 * alpha_ave * (Ti - Tj) * rinv * rinv * voli * volj; dT = dot3(dx, dWij); heatflow[i] += dT * dT_prefactor; @@ -494,6 +499,15 @@ void PairRHEO::setup() variable_csq = fix_pressure->variable_csq; + // Only if thermal flag + if (thermal_flag) { + fixes = modify->get_fix_by_style("^rheo/thermal$"); + // FixRHEO should perform these checks + if (fixes.size() == 0 || fixes.size() > 1) + error->all(FLERR, "Must have one and only one instance of fix rheo/thermal defined"); + fix_pressure = dynamic_cast(fixes[0]); + } + if (cutk != fix_rheo->cut) error->all(FLERR, "Pair rheo cutoff {} does not agree with fix rheo cutoff {}", cutk, fix_rheo->cut); diff --git a/src/RHEO/pair_rheo.h b/src/RHEO/pair_rheo.h index f1fcd10bf88..c996f4fab70 100644 --- a/src/RHEO/pair_rheo.h +++ b/src/RHEO/pair_rheo.h @@ -56,6 +56,7 @@ class PairRHEO : public Pair { class ComputeRHEOInterface *compute_interface; class FixRHEO *fix_rheo; class FixRHEOPressure *fix_pressure; + class FixRHEOThermal *fix_thermal; }; } // namespace LAMMPS_NS From 12537497df9496d284dc70366e503e98393ff556 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Sat, 27 Sep 2025 12:50:05 -0700 Subject: [PATCH 055/604] Remove some masks --- src/REPLICA/fix_pimd_langevin.cpp | 104 +++++++++++++++--------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 347e85d81db..37ac48fd10d 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -519,9 +519,9 @@ void FixPIMDLangevin::setup(int vflag) imageint *image = atom->image; if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap(x[i], image[i]); - } + // } } } @@ -556,9 +556,9 @@ void FixPIMDLangevin::setup(int vflag) } if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap_inv(x[i], image[i]); - } + // } } } @@ -579,9 +579,9 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) imageint *image = atom->image; if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap(x[i], image[i]); - } + // } } } if (integrator == OBABO) { @@ -678,9 +678,9 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap_inv(x[i], image[i]); - } + // } } } } @@ -729,25 +729,25 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (atom->nmax > maxunwrap) reallocate_x_unwrap(); if (atom->nmax > maxxc) reallocate_xc(); for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { x_unwrap[i][0] = x[i][0]; x_unwrap[i][1] = x[i][1]; x_unwrap[i][2] = x[i][2]; - } + // } } if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap(x_unwrap[i], image[i]); - } + // } } } for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; - } + // } } compute_vir(); @@ -758,9 +758,9 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (method == PIMD) { if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap(x[i], image[i]); - } + // } } } prepare_coordinates(); @@ -769,9 +769,9 @@ void FixPIMDLangevin::post_force(int /*flag*/) compute_t_prim(); if (mapflag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { domain->unmap_inv(x[i], image[i]); - } + // } } } } @@ -809,25 +809,25 @@ void FixPIMDLangevin::collect_xc() if (ireplica == 0) { if (cmode == SINGLE_PROC) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; - } + // } } } else if (cmode == MULTI_PROC) { for (int i = 0; i < ntotal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; - } + // } } } const double sqrtnp = sqrt((double) np); for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { xcall[3 * (tag[i] - 1) + 0] = x[i][0] / sqrtnp; xcall[3 * (tag[i] - 1) + 1] = x[i][1] / sqrtnp; xcall[3 * (tag[i] - 1) + 2] = x[i][2] / sqrtnp; - } + // } } if (cmode == MULTI_PROC) { @@ -851,12 +851,12 @@ void FixPIMDLangevin::b_step() double **f = atom->f; for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { double dtfm = dtf / mass[type[i]]; v[i][0] += dtfm * f[i][0]; v[i][1] += dtfm * f[i][1]; v[i][2] += dtfm * f[i][2]; - } + // } } } @@ -875,11 +875,11 @@ void FixPIMDLangevin::qc_step() if (!pstat_flag) { if (universe->iworld == 0) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { x[i][0] += dtv * v[i][0]; x[i][1] += dtv * v[i][1]; x[i][2] += dtv * v[i][2]; - } + // } } } } else { @@ -895,7 +895,7 @@ void FixPIMDLangevin::qc_step() } if (barostat == BZP) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { for (int j = 0; j < 3; j++) { if (p_flag[j]) { x[i][j] = expq[j] * x[i][j] + (expq[j] - expp[j]) / 2. / vw[j] * v[i][j]; @@ -904,7 +904,7 @@ void FixPIMDLangevin::qc_step() x[i][j] += dtv * v[i][j]; } } - } + // } } oldlo = domain->boxlo[0]; oldhi = domain->boxhi[0]; @@ -945,7 +945,7 @@ void FixPIMDLangevin::a_step() if (universe->iworld != 0) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { x0 = x[i][0]; x1 = x[i][1]; x2 = x[i][2]; @@ -964,7 +964,7 @@ void FixPIMDLangevin::a_step() Lan_c[universe->iworld] * v1; v[i][2] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x2 + Lan_c[universe->iworld] * v2; - } + // } } } } @@ -982,11 +982,11 @@ void FixPIMDLangevin::q_step() if (!pstat_flag) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { x[i][0] += dtv * v[i][0]; x[i][1] += dtv * v[i][1]; x[i][2] += dtv * v[i][2]; - } + // } } } } @@ -1177,25 +1177,25 @@ void FixPIMDLangevin::o_step() if (thermostat == PILE_L) { if (method == NMPIMD) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - } + // } } } else if (method == PIMD) { for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { atom->v[i][0] = c1 * atom->v[i][0] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][1] = c1 * atom->v[i][1] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][2] = c1 * atom->v[i][2] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - } + // } } } } @@ -1262,20 +1262,20 @@ void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vecto int *mask = atom->mask; for (int i = 0; i < ntotal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { for (int d = 0; d < 3; d++) { bufsorted[i][d] = 0.0; for (int j = 0; j < nreplica; j++) { bufsorted[i][d] += src[j * ntotal + i][d] * vector[j]; } } - } + // } } for (int i = 0; i < ntotal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { tagint tagtmp = atom->tag[i]; for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } - } + // } } } else if (cmode == MULTI_PROC) { int nlocal = atom->nlocal; @@ -1283,13 +1283,13 @@ void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vecto int *mask = atom->mask; for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { for (int d = 0; d < 3; d++) { des[i][d] = 0.0; for (int j = 0; j < np; j++) { des[i][d] += (src[j][m] * vector[j]); } m++; } - } + // } } } } @@ -1417,24 +1417,24 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) // copy local values for (i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { bufbeads[ireplica][3 * i + 0] = ptr[i][0]; bufbeads[ireplica][3 * i + 1] = ptr[i][1]; bufbeads[ireplica][3 * i + 2] = ptr[i][2]; - } + // } } // communicate values from the other beads if (cmode == SINGLE_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { tagint tagtmp = atom->tag[i]; bufsorted[tagtmp - 1][0] = ptr[i][0]; bufsorted[tagtmp - 1][1] = ptr[i][1]; bufsorted[tagtmp - 1][2] = ptr[i][2]; m++; - } + // } } MPI_Allgather(&m, 1, MPI_INT, counts, 1, MPI_INT, universe->uworld); for (i = 0; i < nreplica; i++) counts[i] *= 3; @@ -1445,13 +1445,13 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) } else if (cmode == MULTI_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { tagsend[m] = tag[i]; bufsend[m][0] = ptr[i][0]; bufsend[m][1] = ptr[i][1]; bufsend[m][2] = ptr[i][2]; m++; - } + // } } MPI_Gather(&m, 1, MPI_INT, counts, 1, MPI_INT, 0, world); displacements[0] = 0; @@ -1475,13 +1475,13 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) MPI_Bcast(tagrecvall, ntotal, MPI_LMP_TAGINT, 0, world); MPI_Bcast(bufrecvall[0], 3 * ntotal, MPI_DOUBLE, 0, world); for (i = 0; i < ntotal; i++) { - if (mask[i] & groupbit) { + // if (mask[i] & groupbit) { m = atom->map(tagrecvall[i]); if (m < 0 || m >= nlocal) continue; bufbeads[modeindex[iplan]][3 * m + 0] = bufrecvall[i][0]; bufbeads[modeindex[iplan]][3 * m + 1] = bufrecvall[i][1]; bufbeads[modeindex[iplan]][3 * m + 2] = bufrecvall[i][2]; - } + // } } } } From 5550686accfbe222976cd71247062de00f46ff69 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Sat, 27 Sep 2025 22:01:34 -0600 Subject: [PATCH 056/604] Correcting fix name --- src/RHEO/pair_rheo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RHEO/pair_rheo.cpp b/src/RHEO/pair_rheo.cpp index e4c663a7080..0135c83882e 100644 --- a/src/RHEO/pair_rheo.cpp +++ b/src/RHEO/pair_rheo.cpp @@ -505,7 +505,7 @@ void PairRHEO::setup() // FixRHEO should perform these checks if (fixes.size() == 0 || fixes.size() > 1) error->all(FLERR, "Must have one and only one instance of fix rheo/thermal defined"); - fix_pressure = dynamic_cast(fixes[0]); + fix_thermal = dynamic_cast(fixes[0]); } if (cutk != fix_rheo->cut) From 827ed68f24d721f335d83e6047c2999ab3dbd0c8 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Sat, 27 Sep 2025 23:02:30 -0600 Subject: [PATCH 057/604] Toning down rheo ice example --- examples/rheo/ice-cubes/in.rheo.ice.cubes | 4 +- .../rheo/ice-cubes/log.17Apr2024.ice.g++.4 | 379 ----------------- .../rheo/ice-cubes/log.22Jul2025.ice.g++.1 | 387 ++++++++++++++++++ 3 files changed, 389 insertions(+), 381 deletions(-) delete mode 100644 examples/rheo/ice-cubes/log.17Apr2024.ice.g++.4 create mode 100644 examples/rheo/ice-cubes/log.22Jul2025.ice.g++.1 diff --git a/examples/rheo/ice-cubes/in.rheo.ice.cubes b/examples/rheo/ice-cubes/in.rheo.ice.cubes index 91e02c780d2..da473b11057 100644 --- a/examples/rheo/ice-cubes/in.rheo.ice.cubes +++ b/examples/rheo/ice-cubes/in.rheo.ice.cubes @@ -62,8 +62,8 @@ fix 4 all rheo/thermal conductivity * constant ${kappa} & latent/heat * constant ${L} & react 1.5 1 fix 5 all wall/region wall harmonic 1.0 1.0 1.0 -fix 6 all gravity 5e-4 vector 0 -1 0 -fix 7 all deposit 8 0 1000 37241459 mol my_mol region drop near 2.0 vy -0.02 -0.02 +fix 6 all gravity 2e-4 vector 0 -1 0 +fix 7 all deposit 8 0 1500 37241459 mol my_mol region drop near 2.0 vy -0.02 -0.02 fix 8 all enforce2d compute rho all rheo/property/atom rho diff --git a/examples/rheo/ice-cubes/log.17Apr2024.ice.g++.4 b/examples/rheo/ice-cubes/log.17Apr2024.ice.g++.4 deleted file mode 100644 index 98fc2e75407..00000000000 --- a/examples/rheo/ice-cubes/log.17Apr2024.ice.g++.4 +++ /dev/null @@ -1,379 +0,0 @@ -LAMMPS (17 Apr 2024 - Development - patch_5May2020-18508-g3c0eaf6870-modified) -# ------ 2D Ice Cube Pour ------ # - -dimension 2 -units lj -atom_style hybrid rheo/thermal bond -boundary m m p -comm_modify vel yes -newton off -special_bonds lj 0.0 1.0 1.0 coul 1.0 1.0 1.0 - -region box block -25 25 0 100 -0.01 0.01 units box -create_box 1 box bond/types 1 extra/bond/per/atom 15 extra/special/per/atom 50 -Created orthogonal box = (-25 0 -0.01) to (25 100 0.01) - 2 by 2 by 1 MPI processor grid - -region fluid block $(xlo+1) $(xhi-1) $(ylo+1) $(ylo+30) EDGE EDGE units box -region fluid block -24 $(xhi-1) $(ylo+1) $(ylo+30) EDGE EDGE units box -region fluid block -24 24 $(ylo+1) $(ylo+30) EDGE EDGE units box -region fluid block -24 24 1 $(ylo+30) EDGE EDGE units box -region fluid block -24 24 1 30 EDGE EDGE units box -lattice sq 1.0 -Lattice spacing in x,y,z = 1 1 1 -create_atoms 1 region fluid -Created 1470 atoms - using lattice units in orthogonal box = (-25 0 -0.01) to (25 100 0.01) - create_atoms CPU = 0.001 seconds - -set group all sph/e 8.0 -Setting atom values ... - 1470 settings made for sph/e - -# ------ Model parameters ------# - -variable cut equal 3.0 -variable n equal 1.0 -variable rho0 equal 1.0 -variable cs equal 1.0 -variable mp equal ${rho0}/${n} -variable mp equal 1/${n} -variable mp equal 1/1 -variable zeta equal 0.05 -variable kappa equal 0.01*${rho0}/${mp} -variable kappa equal 0.01*1/${mp} -variable kappa equal 0.01*1/1 -variable dt_max equal 0.1*${cut}/${cs}/3 -variable dt_max equal 0.1*3/${cs}/3 -variable dt_max equal 0.1*3/1/3 -variable eta equal 0.05 -variable Cv equal 1.0 -variable L equal 1.0 -variable Tf equal 1.0 - -mass * ${mp} -mass * 1 -timestep 0.1 - -pair_style hybrid/overlay rheo ${cut} artificial/visc ${zeta} rheo/solid -pair_style hybrid/overlay rheo 3 artificial/visc ${zeta} rheo/solid -pair_style hybrid/overlay rheo 3 artificial/visc 0.05 rheo/solid -pair_coeff * * rheo -pair_coeff * * rheo/solid 1.0 1.0 1.0 - -bond_style bpm/spring -bond_coeff 1 1.0 1.0 1.0 - -# ------ Pour particles ------# - -molecule my_mol "square.mol" -Read molecule template my_mol: -#Made with create_mol.py - 1 molecules - 0 fragments - 100 atoms with max type 1 - 342 bonds with max type 1 - 0 angles with max type 0 - 0 dihedrals with max type 0 - 0 impropers with max type 0 - -# Wall region extends far enough in z to avoid contact -region wall block EDGE EDGE EDGE EDGE -5 5 side in open 4 units box -region drop block -16 16 70 90 EDGE EDGE side in units box - -fix 1 all rheo ${cut} quintic 0 thermal shift surface/detection coordination 22 8 -fix 1 all rheo 3 quintic 0 thermal shift surface/detection coordination 22 8 -fix 2 all rheo/viscosity * constant ${eta} -fix 2 all rheo/viscosity * constant 0.05 -fix 3 all rheo/pressure * linear -fix 4 all rheo/thermal conductivity * constant ${kappa} specific/heat * constant ${Cv} Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 -fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant ${Cv} Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 -fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 -fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant 1 latent/heat * constant ${L} react 1.5 1 -fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant 1 latent/heat * constant 1 react 1.5 1 -fix 5 all wall/region wall harmonic 1.0 1.0 1.0 -fix 6 all gravity 5e-4 vector 0 -1 0 -fix 7 all deposit 8 0 1000 37241459 mol my_mol region drop near 2.0 vy -0.02 -0.02 -WARNING: Molecule attributes do not match system attributes (../molecule.cpp:1881) -fix 8 all enforce2d - -compute rho all rheo/property/atom rho -compute phase all rheo/property/atom phase -compute temp all rheo/property/atom temperature -compute eng all rheo/property/atom energy -compute nbond all nbond/atom - -# ------ Output & Run ------ # - -thermo 200 -thermo_style custom step time ke press atoms - -dump 1 all custom 200 atomDump id type x y vx vy fx fy c_phase c_temp c_eng c_nbond c_rho - -run 30000 - -CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE - -Your simulation uses code contributions which should be cited: - -- BPM bond style: doi:10.1039/D3SM01373A - -@Article{Clemmer2024, - author = {Clemmer, Joel T. and Monti, Joseph M. and Lechman, Jeremy B.}, - title = {A soft departure from jamming: the compaction of deformable - granular matter under high pressures}, - journal = {Soft Matter}, - year = 2024, - volume = 20, - number = 8, - pages = {1702--1718} -} - -- @article{PalermoInPrep, - journal = {in prep}, - title = {RHEO: A Hybrid Mesh-Free Model Framework for Dynamic Multi-Phase Flows}, - year = {2024}, - author = {Eric T. Palermo and Ki T. Wolf and Joel T. Clemmer and Thomas C. O'Connor}, -} - -- @article{ApplMathModel.130.310, - title = {A hybrid smoothed-particle hydrodynamics model of oxide skins on molten aluminum}, - journal = {Applied Mathematical Modelling}, - volume = {130}, - pages = {310-326}, - year = {2024}, - issn = {0307-904X}, - doi = {https://doi.org/10.1016/j.apm.2024.02.027}, - author = {Joel T. Clemmer and Flint Pierce and Thomas C. O'Connor and Thomas D. Nevins and Elizabeth M.C. Jones and Jeremy B. Lechman and John Tencer}, -} - -CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE - -Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule -Neighbor list info ... - update: every = 1 steps, delay = 0 steps, check = yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 3.3 - ghost atom cutoff = 3.3 - binsize = 1.65, bins = 31 61 1 - 7 neighbor lists, perpetual/occasional/extra = 6 1 0 - (1) pair rheo, perpetual, half/full from (3) - attributes: half, newton off - pair build: halffull/newtoff - stencil: none - bin: none - (2) pair rheo/solid, perpetual, trim from (4) - attributes: half, newton off, cut 1.3 - pair build: trim - stencil: none - bin: none - (3) compute RHEO/KERNEL, perpetual - attributes: full, newton off - pair build: full/bin - stencil: full/bin/2d - bin: standard - (4) compute RHEO/GRAD, perpetual, copy from (1) - attributes: half, newton off - pair build: copy - stencil: none - bin: none - (5) compute RHEO/VSHIFT, perpetual, copy from (1) - attributes: half, newton off - pair build: copy - stencil: none - bin: none - (6) compute RHEO/SURFACE, perpetual, copy from (1) - attributes: half, newton off - pair build: copy - stencil: none - bin: none - (7) fix rheo/thermal, occasional, trim from (4) - attributes: half, newton off, cut 3 - pair build: trim - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 15.53 | 15.61 | 15.69 Mbytes - Step Time KinEng Press Atoms - 0 0 0 0 1470 - 200 20 5.6002982e-05 3.4434234e-05 1570 - 400 40 8.2173099e-05 8.6171768e-05 1570 - 600 60 8.019018e-05 0.00010750355 1570 - 800 80 0.00013866953 0.00010265608 1570 - 1000 100 0.00018965028 8.1985605e-05 1570 - 1200 120 0.00022033242 7.4736443e-05 1670 - 1400 140 0.00030767062 0.00011264333 1670 - 1600 160 0.00040770127 0.00018779992 1670 - 1800 180 0.00047476332 0.00023153009 1670 - 2000 200 0.00059116774 0.00027200445 1670 - 2200 220 0.0007151733 0.0002919963 1770 - 2400 240 0.00083392135 0.00029757889 1770 - 2600 260 0.00099653466 0.00036547269 1770 - 2800 280 0.0011964069 0.00045983458 1770 - 3000 300 0.0013716953 0.00055013647 1770 - 3200 320 0.0015174096 0.00064203572 1870 - 3400 340 0.0016539743 0.00086671622 1870 - 3600 360 0.0015887858 0.00066353749 1870 - 3800 380 0.0016451684 0.00070551483 1870 - 4000 400 0.0017330971 0.00080722283 1870 - 4200 420 0.001812193 0.00073573903 1970 - 4400 440 0.001755871 0.0010621909 1970 - 4600 460 0.0016190772 0.00072913706 1970 - 4800 480 0.0015741931 0.00073524088 1970 - 5000 500 0.0016488815 0.00088684275 1970 - 5200 520 0.0017213288 0.00077042378 2070 - 5400 540 0.0018509598 0.0010219434 2070 - 5600 560 0.0020251064 0.00083182483 2070 - 5800 580 0.0022473255 0.00095076144 2070 - 6000 600 0.0024843519 0.0011247014 2070 - 6200 620 0.0022282321 0.0018105932 2170 - 6400 640 0.0020289063 0.0014158497 2170 - 6600 660 0.002145241 0.0011359383 2170 - 6800 680 0.0024313937 0.0016475504 2170 - 7000 700 0.0021000599 0.0020983745 2170 - 7200 720 0.0019137235 0.0010439152 2270 - 7400 740 0.0018801367 0.00095436448 2270 - 7600 760 0.0017979449 0.0011184039 2270 - 7800 780 0.0018005205 0.0009243205 2270 - 8000 800 0.0017827073 0.0013671228 2270 - 8200 820 0.0018387108 0.0015426012 2270 - 8400 840 0.0016000788 0.0016751514 2270 - 8600 860 0.0013954964 0.0016884335 2270 - 8800 880 0.0013283728 0.0012399398 2270 - 9000 900 0.001389385 0.0012968496 2270 - 9200 920 0.0012295438 0.0012995821 2270 - 9400 940 0.0010522655 0.00082245528 2270 - 9600 960 0.00097085496 0.00053833131 2270 - 9800 980 0.0009398987 0.00063467387 2270 - 10000 1000 0.00092710392 0.00059494446 2270 - 10200 1020 0.00095545471 0.00074560644 2270 - 10400 1040 0.0009645841 0.00085429807 2270 - 10600 1060 0.00064037148 0.0017222246 2270 - 10800 1080 0.00046790978 0.00088204234 2270 - 11000 1100 0.00030106229 0.00074950209 2270 - 11200 1120 0.00027746016 0.00052831745 2270 - 11400 1140 0.0002533348 0.0006272715 2270 - 11600 1160 0.00021825085 0.00029691552 2270 - 11800 1180 0.0001451308 0.00015037478 2270 - 12000 1200 0.0001314823 0.00017227174 2270 - 12200 1220 0.00013693632 0.00017791384 2270 - 12400 1240 0.00014987347 0.0002286677 2270 - 12600 1260 0.00015092598 0.0003698436 2270 - 12800 1280 0.0001291653 0.00047229532 2270 - 13000 1300 0.00011949988 0.00049560375 2270 - 13200 1320 0.00011694665 0.00057542084 2270 - 13400 1340 9.6164519e-05 0.00062714755 2270 - 13600 1360 8.4517591e-05 0.00044156913 2270 - 13800 1380 0.00019140516 0.0003264745 2270 - 14000 1400 0.00013868599 0.00037753497 2270 - 14200 1420 9.3701636e-05 0.00031517848 2270 - 14400 1440 6.7389077e-05 0.0002946861 2270 - 14600 1460 5.3640086e-05 0.00026650711 2270 - 14800 1480 4.2699992e-05 0.00023789279 2270 - 15000 1500 5.3012016e-05 0.00019933234 2270 - 15200 1520 5.8834197e-05 0.00022407007 2270 - 15400 1540 5.0899982e-05 0.00029695531 2270 - 15600 1560 3.0476742e-05 0.00039119066 2270 - 15800 1580 1.6633264e-05 0.00033770401 2270 - 16000 1600 1.098906e-05 0.00036684894 2270 - 16200 1620 1.464848e-05 0.00036449759 2270 - 16400 1640 1.9598429e-05 0.00021056689 2270 - 16600 1660 1.2644955e-05 0.00020781781 2270 - 16800 1680 8.8428553e-06 0.000165 2270 - 17000 1700 8.8971439e-06 0.00012266475 2270 - 17200 1720 1.7032781e-05 0.00019873443 2270 - 17400 1740 1.9448563e-05 0.00025661663 2270 - 17600 1760 1.3714713e-05 0.000324022 2270 - 17800 1780 9.1326468e-06 0.00031392513 2270 - 18000 1800 9.2464802e-06 0.00029729527 2270 - 18200 1820 1.5553042e-05 0.00027488475 2270 - 18400 1840 1.4132933e-05 0.00019565459 2270 - 18600 1860 9.4734832e-06 0.00016716988 2270 - 18800 1880 5.5115145e-06 0.00013728033 2270 - 19000 1900 8.268812e-06 0.00015119605 2270 - 19200 1920 1.2470136e-05 0.00020222131 2270 - 19400 1940 9.9387775e-06 0.00024503373 2270 - 19600 1960 5.4241999e-06 0.00026921858 2270 - 19800 1980 2.7987348e-06 0.00026201267 2270 - 20000 2000 6.272538e-06 0.00025626323 2270 - 20200 2020 8.0157781e-06 0.000220139 2270 - 20400 2040 6.1652093e-06 0.00017089058 2270 - 20600 2060 2.9967592e-06 0.00014582864 2270 - 20800 2080 3.016678e-06 0.000148629 2270 - 21000 2100 7.287645e-06 0.00016486102 2270 - 21200 2120 8.6905277e-06 0.00020276916 2270 - 21400 2140 6.8453018e-06 0.00023156153 2270 - 21600 2160 3.3853799e-06 0.0002432462 2270 - 21800 2180 4.1241209e-06 0.00022829024 2270 - 22000 2200 7.0802396e-06 0.00020784823 2270 - 22200 2220 7.3361691e-06 0.00018114134 2270 - 22400 2240 5.0764593e-06 0.00014351106 2270 - 22600 2260 2.7487537e-06 0.00012919872 2270 - 22800 2280 4.620167e-06 0.00013746218 2270 - 23000 2300 6.9819357e-06 0.00015985102 2270 - 23200 2320 6.8923916e-06 0.00018713045 2270 - 23400 2340 4.1795088e-06 0.00019846682 2270 - 23600 2360 2.2871028e-06 0.00021068421 2270 - 23800 2380 3.862046e-06 0.00019553306 2270 - 24000 2400 5.2448555e-06 0.00017398041 2270 - 24200 2420 4.7565441e-06 0.00015008142 2270 - 24400 2440 2.2952135e-06 0.00012747106 2270 - 24600 2460 2.1575617e-06 0.00012516996 2270 - 24800 2480 4.1777868e-06 0.0001331902 2270 - 25000 2500 5.5679133e-06 0.00015504562 2270 - 25200 2520 4.5758741e-06 0.00017146032 2270 - 25400 2540 2.3403277e-06 0.00017611666 2270 - 25600 2560 2.7029302e-06 0.00016850788 2270 - 25800 2580 4.3601102e-06 0.00015884642 2270 - 26000 2600 5.2244249e-06 0.00013793898 2270 - 26200 2620 3.4577672e-06 0.00012395875 2270 - 26400 2640 2.361577e-06 0.00011600057 2270 - 26600 2660 2.8515644e-06 0.00011277063 2270 - 26800 2680 4.0851213e-06 0.0001290832 2270 - 27000 2700 4.2579644e-06 0.0001476495 2270 - 27200 2720 2.6593858e-06 0.00015977745 2270 - 27400 2740 1.990115e-06 0.00015612787 2270 - 27600 2760 2.6756835e-06 0.00014913772 2270 - 27800 2780 3.9032806e-06 0.00014014763 2270 - 28000 2800 3.2729446e-06 0.00012216846 2270 - 28200 2820 1.9357278e-06 0.00011078621 2270 - 28400 2840 1.7094832e-06 0.00010910509 2270 - 28600 2860 2.8731406e-06 0.00011179644 2270 - 28800 2880 3.7062354e-06 0.00012254091 2270 - 29000 2900 2.7844262e-06 0.00013060331 2270 - 29200 2920 1.7680655e-06 0.00013797514 2270 - 29400 2940 1.706873e-06 0.0001350685 2270 - 29600 2960 2.8764562e-06 0.00012428508 2270 - 29800 2980 3.1502029e-06 0.00011456718 2270 - 30000 3000 2.1833409e-06 0.00010317469 2270 -Loop time of 165.611 on 4 procs for 30000 steps with 2270 atoms - -Performance: 1565111.240 tau/day, 181.147 timesteps/s, 411.204 katom-step/s -99.7% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.63183 | 21.226 | 42.266 | 444.6 | 12.82 -Bond | 0.095073 | 0.17799 | 0.27877 | 17.0 | 0.11 -Neigh | 2.0745 | 2.0781 | 2.0822 | 0.2 | 1.25 -Comm | 0.32024 | 0.38703 | 0.45564 | 8.1 | 0.23 -Output | 0.60459 | 0.76798 | 0.93724 | 18.6 | 0.46 -Modify | 119.85 | 140.76 | 161.36 | 172.2 | 85.00 -Other | | 0.2124 | | | 0.13 - -Nlocal: 567.5 ave 1139 max 0 min -Histogram: 2 0 0 0 0 0 0 0 0 2 -Nghost: 75.5 ave 152 max 0 min -Histogram: 2 0 0 0 0 0 0 0 0 2 -Neighs: 9238.25 ave 18490 max 0 min -Histogram: 2 0 0 0 0 0 0 0 0 2 -FullNghs: 17945 ave 35917 max 0 min -Histogram: 2 0 0 0 0 0 0 0 0 2 - -Total # of neighbors = 71780 -Ave neighs/atom = 31.621145 -Ave special neighs/atom = 0.22026432 -Neighbor list builds = 2071 -Dangerous builds = 0 - -Total wall time: 0:02:45 diff --git a/examples/rheo/ice-cubes/log.22Jul2025.ice.g++.1 b/examples/rheo/ice-cubes/log.22Jul2025.ice.g++.1 new file mode 100644 index 00000000000..fc84ee81d1a --- /dev/null +++ b/examples/rheo/ice-cubes/log.22Jul2025.ice.g++.1 @@ -0,0 +1,387 @@ +LAMMPS (22 Jul 2025 - Development - patch_2Apr2025-1775-g5550686acc-modified) +# ------ 2D Ice Cube Pour ------ # + +dimension 2 +units lj +atom_style hybrid rheo/thermal bond +boundary m m p +comm_modify vel yes +newton off +special_bonds lj 0.0 1.0 1.0 coul 1.0 1.0 1.0 + +region box block -25 25 0 100 -0.01 0.01 units box +create_box 1 box bond/types 1 extra/bond/per/atom 15 extra/special/per/atom 50 +Created orthogonal box = (-25 0 -0.01) to (25 100 0.01) + 1 by 1 by 1 MPI processor grid + +region fluid block $(xlo+1) $(xhi-1) $(ylo+1) $(ylo+30) EDGE EDGE units box +region fluid block -24 $(xhi-1) $(ylo+1) $(ylo+30) EDGE EDGE units box +region fluid block -24 24 $(ylo+1) $(ylo+30) EDGE EDGE units box +region fluid block -24 24 1 $(ylo+30) EDGE EDGE units box +region fluid block -24 24 1 30 EDGE EDGE units box +lattice sq 1.0 +Lattice spacing in x,y,z = 1 1 1 +create_atoms 1 region fluid +Created 1470 atoms + using lattice units in orthogonal box = (-25 0 -0.01) to (25 100 0.01) + create_atoms CPU = 0.001 seconds + +set group all sph/e 8.0 +Setting atom values ... + 1470 settings made for sph/e + +# ------ Model parameters ------# + +variable cut equal 3.0 +variable n equal 1.0 +variable rho0 equal 1.0 +variable cs equal 1.0 +variable mp equal ${rho0}/${n} +variable mp equal 1/${n} +variable mp equal 1/1 +variable zeta equal 0.05 +variable kappa equal 0.01*${rho0}/${mp} +variable kappa equal 0.01*1/${mp} +variable kappa equal 0.01*1/1 +variable dt_max equal 0.1*${cut}/${cs}/3 +variable dt_max equal 0.1*3/${cs}/3 +variable dt_max equal 0.1*3/1/3 +variable eta equal 0.05 +variable Cv equal 1.0 +variable L equal 1.0 +variable Tf equal 1.0 + +mass * ${mp} +mass * 1 +timestep 0.1 + +pair_style hybrid/overlay rheo ${cut} artificial/visc ${zeta} rheo/solid +pair_style hybrid/overlay rheo 3 artificial/visc ${zeta} rheo/solid +pair_style hybrid/overlay rheo 3 artificial/visc 0.05 rheo/solid +pair_coeff * * rheo +pair_coeff * * rheo/solid 1.0 1.0 1.0 + +bond_style bpm/spring +bond_coeff 1 1.0 1.0 1.0 + +# ------ Pour particles ------# + +molecule my_mol "square.mol" +Read molecule template my_mol: +#Made with create_mol.py + 1 molecules + 0 fragments + 0 bodies + 100 atoms with max type 1 + 342 bonds with max type 1 + 0 angles with max type 0 + 0 dihedrals with max type 0 + 0 impropers with max type 0 + +# Wall region extends far enough in z to avoid contact +region wall block EDGE EDGE EDGE EDGE -5 5 side in open 4 units box +region drop block -16 16 70 90 EDGE EDGE side in units box + +fix 1 all rheo ${cut} quintic 0 thermal shift surface/detection coordination 22 8 +fix 1 all rheo 3 quintic 0 thermal shift surface/detection coordination 22 8 +fix 2 all rheo/viscosity * constant ${eta} +fix 2 all rheo/viscosity * constant 0.05 +fix 3 all rheo/pressure * linear +fix 4 all rheo/thermal conductivity * constant ${kappa} specific/heat * constant ${Cv} Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 +fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant ${Cv} Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 +fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant ${Tf} latent/heat * constant ${L} react 1.5 1 +fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant 1 latent/heat * constant ${L} react 1.5 1 +fix 4 all rheo/thermal conductivity * constant 0.01 specific/heat * constant 1 Tfreeze * constant 1 latent/heat * constant 1 react 1.5 1 +fix 5 all wall/region wall harmonic 1.0 1.0 1.0 +fix 6 all gravity 2e-4 vector 0 -1 0 +fix 7 all deposit 8 0 1500 37241459 mol my_mol region drop near 2.0 vy -0.02 -0.02 +WARNING: Molecule attributes do not match system attributes +For more information see https://docs.lammps.org/err0026 (../molecule.cpp:3634) +fix 8 all enforce2d + +compute rho all rheo/property/atom rho +compute phase all rheo/property/atom phase +compute temp all rheo/property/atom temperature +compute eng all rheo/property/atom energy +compute nbond all nbond/atom + +# ------ Output & Run ------ # + +thermo 200 +thermo_style custom step time ke press atoms + +#dump 1 all custom 200 atomDump id type x y vx vy fx fy c_phase c_temp c_eng c_nbond c_rho + +run 30000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- BPM bond style: doi:10.1039/D3SM01373A + +@Article{Clemmer2024, + author = {Clemmer, Joel T. and Monti, Joseph M. and Lechman, Jeremy B.}, + title = {A soft departure from jamming: the compaction of deformable + granular matter under high pressures}, + journal = {Soft Matter}, + year = 2024, + volume = 20, + number = 8, + pages = {1702--1718} +} + +- RHEO package: doi:10.1063/5.0228823 + +@article{Palermo2024, + journal = {Physics of Fluids}, + title = {Reproducing hydrodynamics and elastic objects: A hybrid mesh-free model framework for dynamic multi-phase flows}, + volume = {36}, + number = {11}, + pages = {113337}, + year = {2024}, + issn = {1070-6631}, + doi = {https://doi.org/10.1063/5.0228823}, + author = {Palermo, Eric T. and Wolf, Ki T. and Clemmer, Joel T. and O'Connor, Thomas C.}, +} + +- @article{ApplMathModel.130.310, + title = {A hybrid smoothed-particle hydrodynamics model of oxide skins on molten aluminum}, + journal = {Applied Mathematical Modelling}, + volume = {130}, + pages = {310-326}, + year = {2024}, + issn = {0307-904X}, + doi = {https://doi.org/10.1016/j.apm.2024.02.027}, + author = {Joel T. Clemmer and Flint Pierce and Thomas C. O'Connor and Thomas D. Nevins and Elizabeth M.C. Jones and Jeremy B. Lechman and John Tencer}, +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.3 + ghost atom cutoff = 3.3 + binsize = 1.65, bins = 31 61 1 + 7 neighbor lists, perpetual/occasional/extra = 6 1 0 + (1) pair rheo, perpetual, half/full from (3) + attributes: half, newton off, cut 3.3 + pair build: halffull/newtoff + stencil: none + bin: none + (2) pair rheo/solid, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none + (3) compute RHEO/KERNEL, perpetual + attributes: full, newton off + pair build: full/bin + stencil: full/bin/2d + bin: standard + (4) compute RHEO/GRAD, perpetual, copy from (2) + attributes: half, newton off + pair build: copy + stencil: none + bin: none + (5) compute RHEO/VSHIFT, perpetual, copy from (2) + attributes: half, newton off + pair build: copy + stencil: none + bin: none + (6) compute RHEO/SURFACE, perpetual, copy from (2) + attributes: half, newton off + pair build: copy + stencil: none + bin: none + (7) fix rheo/thermal, occasional, trim from (1) + attributes: half, newton off, cut 3 + pair build: trim + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.42 | 15.42 | 15.42 Mbytes + Step Time KinEng Press Atoms + 0 0 0 0 1470 + 200 20 2.2717157e-05 7.1371773e-06 1570 + 400 40 3.0038335e-05 9.4317394e-06 1570 + 600 60 3.2730133e-05 1.0277282e-05 1570 + 800 80 4.4929756e-05 1.4107942e-05 1570 + 1000 100 5.6520075e-05 1.7747304e-05 1570 + 1200 120 6.2089335e-05 1.9496051e-05 1570 + 1400 140 7.6066721e-05 2.3884951e-05 1570 + 1600 160 0.00010093707 3.3713692e-05 1670 + 1800 180 0.0001151137 3.8447856e-05 1670 + 2000 200 0.00013647734 4.5583445e-05 1670 + 2200 220 0.00016270772 5.4344377e-05 1670 + 2400 240 0.00018314924 6.1171846e-05 1670 + 2600 260 0.00020900069 6.980623e-05 1670 + 2800 280 0.00024130727 8.059663e-05 1670 + 3000 300 0.00026871452 8.9750651e-05 1670 + 3200 320 0.00029835327 0.00010561628 1770 + 3400 340 0.00034025448 0.00012045014 1770 + 3600 360 0.00037925068 0.00013425474 1770 + 3800 380 0.00042007564 0.00014870678 1770 + 4000 400 0.00046961376 0.00016624327 1770 + 4200 420 0.00051826313 0.00018346515 1770 + 4400 440 0.00056653402 0.00020055304 1770 + 4600 460 0.00060322075 0.00022560527 1870 + 4800 480 0.00066281609 0.00024794721 1870 + 5000 500 0.00068094883 0.00033267233 1870 + 5200 520 0.00065586513 0.00027934185 1870 + 5400 540 0.00063818623 0.00026141392 1870 + 5600 560 0.00064611574 0.00028840664 1870 + 5800 580 0.00069215566 0.00025566612 1870 + 6000 600 0.0007270144 0.00027861317 1870 + 6200 620 0.00072765687 0.00027966711 1970 + 6400 640 0.00075380311 0.0003591263 1970 + 6600 660 0.000685186 0.00036453413 1970 + 6800 680 0.00063771669 0.00029214835 1970 + 7000 700 0.00060054572 0.00028000609 1970 + 7200 720 0.00063040642 0.00028080238 1970 + 7400 740 0.00066564874 0.00029070803 1970 + 7600 760 0.00065703831 0.00026324096 2070 + 7800 780 0.00068443783 0.00031952344 2070 + 8000 800 0.0007303798 0.00030715365 2070 + 8200 820 0.0007847074 0.00032546318 2070 + 8400 840 0.00084391012 0.00037184367 2070 + 8600 860 0.00089544059 0.00039536474 2070 + 8800 880 0.00095251417 0.00042896539 2070 + 9000 900 0.0010157761 0.00045962325 2070 + 9200 920 0.0009240859 0.00091523569 2170 + 9400 940 0.00081511848 0.00051286007 2170 + 9600 960 0.00077894163 0.00032724205 2170 + 9800 980 0.00085705736 0.00049571068 2170 + 10000 1000 0.00091811593 0.00042233906 2170 + 10200 1020 0.00094538472 0.00039229791 2170 + 10400 1040 0.00077707627 0.00093563064 2170 + 10600 1060 0.00075090675 0.00046388293 2270 + 10800 1080 0.00075422987 0.00030514098 2270 + 11000 1100 0.00074353651 0.00039832511 2270 + 11200 1120 0.00076846368 0.00033461997 2270 + 11400 1140 0.00081108881 0.00045787999 2270 + 11600 1160 0.00083649799 0.00046538584 2270 + 11800 1180 0.00076615167 0.00061077896 2270 + 12000 1200 0.00068696049 0.00054597438 2270 + 12200 1220 0.00067706861 0.00054333845 2270 + 12400 1240 0.0006846894 0.00059220006 2270 + 12600 1260 0.00070834168 0.00044970538 2270 + 12800 1280 0.00072784007 0.00043671935 2270 + 13000 1300 0.00065372714 0.00066868971 2270 + 13200 1320 0.00059489165 0.0005988157 2270 + 13400 1340 0.00057433489 0.0003306144 2270 + 13600 1360 0.00057759953 0.00034951381 2270 + 13800 1380 0.0005906022 0.00048382515 2270 + 14000 1400 0.00061787161 0.00037760087 2270 + 14200 1420 0.00063257988 0.00042098942 2270 + 14400 1440 0.00048420042 0.00082251415 2270 + 14600 1460 0.0004459047 0.00061784676 2270 + 14800 1480 0.00040116642 0.00025126644 2270 + 15000 1500 0.00038845827 0.0002831356 2270 + 15200 1520 0.00039417082 0.00033821639 2270 + 15400 1540 0.0004190173 0.00029389934 2270 + 15600 1560 0.00041919856 0.00023954742 2270 + 15800 1580 0.00041479363 0.00034317388 2270 + 16000 1600 0.00027384222 0.00071403737 2270 + 16200 1620 0.00030548118 0.00024689256 2270 + 16400 1640 0.00024063032 0.00014685866 2270 + 16600 1660 0.0001937864 0.00023746944 2270 + 16800 1680 0.00016567197 0.00032818028 2270 + 17000 1700 0.00014436419 0.00025219169 2270 + 17200 1720 0.00013330488 8.0811687e-05 2270 + 17400 1740 0.00010110926 0.00021817751 2270 + 17600 1760 8.2497426e-05 8.5314727e-05 2270 + 17800 1780 6.9980309e-05 4.7443364e-05 2270 + 18000 1800 6.4907726e-05 3.2253337e-05 2270 + 18200 1820 6.1098706e-05 4.9637151e-05 2270 + 18400 1840 5.1225998e-05 0.00011588292 2270 + 18600 1860 4.3552797e-05 0.00012693633 2270 + 18800 1880 3.8179148e-05 0.00015278102 2270 + 19000 1900 3.6853452e-05 0.00011903345 2270 + 19200 1920 3.54081e-05 5.8354185e-05 2270 + 19400 1940 3.2260717e-05 6.39919e-05 2270 + 19600 1960 3.0731657e-05 6.0779859e-05 2270 + 19800 1980 3.1997217e-05 5.1577413e-05 2270 + 20000 2000 3.3019845e-05 8.703213e-05 2270 + 20200 2020 3.2421224e-05 0.00010912897 2270 + 20400 2040 2.9958827e-05 0.00014292515 2270 + 20600 2060 2.8345916e-05 0.00014089394 2270 + 20800 2080 2.2171259e-05 0.00022329189 2270 + 21000 2100 2.1075537e-05 0.00016691884 2270 + 21200 2120 1.8707445e-05 9.5068937e-05 2270 + 21400 2140 1.5989193e-05 5.6977e-05 2270 + 21600 2160 1.4420844e-05 5.5543202e-05 2270 + 21800 2180 1.4074683e-05 6.8614563e-05 2270 + 22000 2200 1.4801588e-05 5.8302293e-05 2270 + 22200 2220 1.5187664e-05 8.4548848e-05 2270 + 22400 2240 1.3940673e-05 0.00012506229 2270 + 22600 2260 1.2297727e-05 0.00011740479 2270 + 22800 2280 1.1189305e-05 9.9551164e-05 2270 + 23000 2300 1.1084669e-05 9.3835514e-05 2270 + 23200 2320 1.0603835e-05 8.9901861e-05 2270 + 23400 2340 9.6180326e-06 6.897431e-05 2270 + 23600 2360 8.6142163e-06 4.8678429e-05 2270 + 23800 2380 8.2807246e-06 4.9214993e-05 2270 + 24000 2400 8.3799347e-06 6.5386426e-05 2270 + 24200 2420 8.1785229e-06 6.7936583e-05 2270 + 24400 2440 7.4973908e-06 7.3551179e-05 2270 + 24600 2460 6.6901277e-06 7.869978e-05 2270 + 24800 2480 6.304684e-06 7.7712111e-05 2270 + 25000 2500 6.2811359e-06 6.8995329e-05 2270 + 25200 2520 6.1733497e-06 5.7511644e-05 2270 + 25400 2540 5.8547493e-06 4.8450322e-05 2270 + 25600 2560 5.5807792e-06 4.3263769e-05 2270 + 25800 2580 5.7376809e-06 4.1298742e-05 2270 + 26000 2600 6.1235437e-06 4.6792289e-05 2270 + 26200 2620 6.3133448e-06 5.2539425e-05 2270 + 26400 2640 6.1302052e-06 5.6223248e-05 2270 + 26600 2660 5.8963204e-06 5.8294112e-05 2270 + 26800 2680 5.9840827e-06 5.5178641e-05 2270 + 27000 2700 6.233101e-06 5.0392993e-05 2270 + 27200 2720 6.4030159e-06 4.5425101e-05 2270 + 27400 2740 6.3645947e-06 4.0285284e-05 2270 + 27600 2760 6.3812928e-06 3.5424811e-05 2270 + 27800 2780 6.6705163e-06 3.6948809e-05 2270 + 28000 2800 7.0132913e-06 4.019863e-05 2270 + 28200 2820 7.18651e-06 3.9967954e-05 2270 + 28400 2840 7.0886176e-06 4.3071992e-05 2270 + 28600 2860 6.9307901e-06 4.5782387e-05 2270 + 28800 2880 6.8877726e-06 4.3850085e-05 2270 + 29000 2900 6.9126636e-06 4.0251523e-05 2270 + 29200 2920 6.8264196e-06 3.6233076e-05 2270 + 29400 2940 6.5665314e-06 3.5656627e-05 2270 + 29600 2960 6.3200336e-06 3.4500037e-05 2270 + 29800 2980 6.1811978e-06 3.3514753e-05 2270 + 30000 3000 6.0713164e-06 3.5282348e-05 2270 +Loop time of 235.536 on 1 procs for 30000 steps with 2270 atoms + +Performance: 1100470.356 tau/day, 127.369 timesteps/s, 289.128 katom-step/s +99.7% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 61.303 | 61.303 | 61.303 | 0.0 | 26.03 +Bond | 0.67807 | 0.67807 | 0.67807 | 0.0 | 0.29 +Neigh | 2.8917 | 2.8917 | 2.8917 | 0.0 | 1.23 +Comm | 0.025338 | 0.025338 | 0.025338 | 0.0 | 0.01 +Output | 0.0045594 | 0.0045594 | 0.0045594 | 0.0 | 0.00 +Modify | 170.38 | 170.38 | 170.38 | 0.0 | 72.34 +Other | | 0.2562 | | | 0.11 + +Nlocal: 2270 ave 2270 max 2270 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 35274 ave 35274 max 35274 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 70548 ave 70548 max 70548 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 70548 +Ave neighs/atom = 31.078414 +Ave special neighs/atom = 1.4982379 +Neighbor list builds = 1772 +Dangerous builds = 0 +Total wall time: 0:03:55 From 1711b8ba0aabb5785f11b22ff1992f8ee12cf136 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Thu, 2 Oct 2025 15:35:18 -0600 Subject: [PATCH 058/604] Incorrect arg index in error message --- src/RHEO/fix_rheo_pressure.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RHEO/fix_rheo_pressure.cpp b/src/RHEO/fix_rheo_pressure.cpp index e02c0bc7f7c..2262a1e4546 100644 --- a/src/RHEO/fix_rheo_pressure.cpp +++ b/src/RHEO/fix_rheo_pressure.cpp @@ -120,7 +120,7 @@ FixRHEOPressure::FixRHEOPressure(LAMMPS *lmp, int narg, char **arg) : pbackground[i] = pbackground_one; background_flag = 1; } else { - error->all(FLERR, "Illegal fix command, {}", arg[iarg]); + error->all(FLERR, "Illegal fix command, {}", arg[iarg + 1]); } iarg += 2; } From e34b3adafc9182a4816b2f26518557e3f02ff4ee Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 3 Oct 2025 12:18:43 -0600 Subject: [PATCH 059/604] Unnecessary period --- doc/src/fix_rheo_pressure.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_rheo_pressure.rst b/doc/src/fix_rheo_pressure.rst index 7d5d08ddd1d..81f91ea93a8 100644 --- a/doc/src/fix_rheo_pressure.rst +++ b/doc/src/fix_rheo_pressure.rst @@ -78,7 +78,7 @@ Style *tait/general* generalizes this equation of state .. math:: - P = \frac{c^2 \rho_0}{\gamma} \biggl[\left(\frac{\rho}{\rho_0}\right)^{\gamma} - 1\biggr]. + P = \frac{c^2 \rho_0}{\gamma} \biggl[\left(\frac{\rho}{\rho_0}\right)^{\gamma} - 1\biggr] where :math:`\gamma` is an exponent. From 9ebc52beb6cbf4e0289c9ac8652c9f1ee10e4be1 Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Mon, 6 Oct 2025 13:43:34 +0200 Subject: [PATCH 060/604] doc add DOI --- doc/src/fix_wall.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_wall.rst b/doc/src/fix_wall.rst index 3ed5233a4ad..e94f2de56b5 100644 --- a/doc/src/fix_wall.rst +++ b/doc/src/fix_wall.rst @@ -192,7 +192,8 @@ spring potential: E = \epsilon \quad (r - r_c)^2 \qquad r < r_c For style *wall/harmonic/reversed*, the energy E is given by an attractive-only harmonic -spring potential of the same form as *wall/harmonic*. +spring potential of the same form as *wall/harmonic*, +as employed for the CMC determination in Barraud et al :ref:`(Barraud) `. For style *wall/morse*, the energy E is given by a Morse potential: @@ -565,3 +566,7 @@ The option defaults units = lattice, fld = no, and pbc = no. **(Magda)** Magda, Tirrell, Davis, J Chem Phys, 83, 1888-1901 (1985); erratum in JCP 84, 2901 (1986). + +.. _Barraud: + +**(Barraud)** Barraud, Dalmazzone, Moureta, De Bruin, Creton, Pasquier, Lachet and Nieto-Draghi, Langmuir, 41 (11), 7272-7282 (2025). From 3c90ad61e06cb34504a194d7f4308fd28271fc86 Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Mon, 6 Oct 2025 13:49:13 +0200 Subject: [PATCH 061/604] Trailing whitespace --- doc/src/fix_wall.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_wall.rst b/doc/src/fix_wall.rst index e94f2de56b5..8f9538ec458 100644 --- a/doc/src/fix_wall.rst +++ b/doc/src/fix_wall.rst @@ -192,7 +192,7 @@ spring potential: E = \epsilon \quad (r - r_c)^2 \qquad r < r_c For style *wall/harmonic/reversed*, the energy E is given by an attractive-only harmonic -spring potential of the same form as *wall/harmonic*, +spring potential of the same form as *wall/harmonic*, as employed for the CMC determination in Barraud et al :ref:`(Barraud) `. For style *wall/morse*, the energy E is given by a Morse potential: From 1a20c4faeb93c760a7f3d7153739d1698c3c8929 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 14 Oct 2025 20:48:29 -0600 Subject: [PATCH 062/604] Fixing subscript --- doc/src/bond_bpm_spring_plastic.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/bond_bpm_spring_plastic.rst b/doc/src/bond_bpm_spring_plastic.rst index 7077ef07c42..e287247a6bc 100644 --- a/doc/src/bond_bpm_spring_plastic.rst +++ b/doc/src/bond_bpm_spring_plastic.rst @@ -141,7 +141,7 @@ will cause their reference states to be redefined. The potential energy and the single() function of this bond style returns zero. The single() function also calculates two extra bond quantities, the initial distance :math:`r_0` and the current equilibrium -length :math:`r_eq`. These extra quantities can be accessed by the +length :math:`r_{eq}`. These extra quantities can be accessed by the :doc:`compute bond/local ` command as *b1* and *b2*, respectively. From 0722f3beee920300859f80afe2ea16eed22bf23a Mon Sep 17 00:00:00 2001 From: jtclemm Date: Tue, 14 Oct 2025 20:51:43 -0600 Subject: [PATCH 063/604] Adding missing units of length --- doc/src/fix_rheo_thermal.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_rheo_thermal.rst b/doc/src/fix_rheo_thermal.rst index bc0e8dbfd06..d472df5d137 100644 --- a/doc/src/fix_rheo_thermal.rst +++ b/doc/src/fix_rheo_thermal.rst @@ -20,7 +20,7 @@ Syntax *conductivity* args = types style args types = lists of types (see below) style = *constant* - *constant* arg = conductivity (power/temperature) + *constant* arg = conductivity (power/(length*temperature)) *specific/heat* args = types style args types = lists of types (see below) style = *constant* From 88fbbbb33d72f2b1e95f55c066f8da0c3f9fde4e Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 15 Oct 2025 21:41:46 -0600 Subject: [PATCH 064/604] copy paste typo in error message --- src/RHEO/fix_rheo_thermal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RHEO/fix_rheo_thermal.cpp b/src/RHEO/fix_rheo_thermal.cpp index 648370694ec..a855c966165 100644 --- a/src/RHEO/fix_rheo_thermal.cpp +++ b/src/RHEO/fix_rheo_thermal.cpp @@ -249,7 +249,7 @@ int FixRHEOThermal::setmask() void FixRHEOThermal::init() { auto fixes = modify->get_fix_by_style("^rheo$"); - if (fixes.size() == 0) error->all(FLERR, "Need to define fix rheo to use fix rheo/viscosity"); + if (fixes.size() == 0) error->all(FLERR, "Need to define fix rheo to use fix rheo/thermal"); fix_rheo = dynamic_cast(fixes[0]); cut_kernel = fix_rheo->cut; From c406d23e4632c8a4a40ab42969863db490e0fa56 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 17 Oct 2025 14:07:43 -0600 Subject: [PATCH 065/604] Forgotten nevery arg in doc page --- doc/src/fix_nonaffine_displacement.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_nonaffine_displacement.rst b/doc/src/fix_nonaffine_displacement.rst index 5fdd3ae772e..aaf8987e660 100644 --- a/doc/src/fix_nonaffine_displacement.rst +++ b/doc/src/fix_nonaffine_displacement.rst @@ -8,7 +8,7 @@ Syntax .. parsed-literal:: - fix ID group nonaffine/displacement style args reference/style nstep keyword values + fix ID group nonaffine/displacement nevery style args reference/style nstep keyword values * ID, group are documented in :doc:`fix ` command * nonaffine/displacement = style name of this fix command From e46343bc247c9bd5e490791f55ed4e3928fca42e Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 17 Oct 2025 16:08:15 -0600 Subject: [PATCH 066/604] Clarifying array_atom size and ensuring it's defined --- src/EXTRA-FIX/fix_nonaffine_displacement.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp index 31a7b6b0313..615002997a2 100644 --- a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp +++ b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp @@ -273,6 +273,11 @@ void FixNonaffineDisplacement::post_force(int /*vflag*/) } else { if ((update->ntimestep % nevery) == 0) calculate_D2Min(); } + } else { + // Otherwise, ensure peratom variables are zeroed + for (int i = 0; i < atom->nlocal; i++) + for (int a = 0; a < size_peratom_cols; a++) + array_atom[i][a] = 0.0; } if (reference_style == FIXED) @@ -773,7 +778,7 @@ void FixNonaffineDisplacement::grow_arrays(int nmax_new) { nmax = nmax_new; memory->destroy(array_atom); - memory->create(array_atom, nmax, 3, "fix_nonaffine_displacement:array_atom"); + memory->create(array_atom, nmax, size_peratom_cols, "fix_nonaffine_displacement:array_atom"); if (nad_style == D2MIN) { memory->destroy(X); memory->destroy(Y); From 24d917e363766333f86ae9087f0c3c319b0f9169 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Sat, 18 Oct 2025 12:57:00 -0600 Subject: [PATCH 067/604] add a NOTE about MC-only for fix atom/swap --- doc/src/fix_atom_swap.rst | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/doc/src/fix_atom_swap.rst b/doc/src/fix_atom_swap.rst index 8a5ef0c99aa..ad9643652a2 100644 --- a/doc/src/fix_atom_swap.rst +++ b/doc/src/fix_atom_swap.rst @@ -49,17 +49,33 @@ atoms of the other given atom types. The specified scaling temperature *T* is used in the Metropolis criterion dictating swap probabilities. Perform *X* swaps of atoms of one type with atoms of another type -according to a Monte Carlo probability. Swap candidates must be in the -fix group, must be in the region (if specified), and must be of one of -the listed types. Swaps are attempted between candidates that are chosen -randomly with equal probability among the candidate atoms. Swaps are not -attempted between atoms of the same type since nothing would happen. - -All atoms in the simulation domain can be moved using regular time -integration displacements (e.g., via :doc:`fix nvt `), resulting -in a hybrid MC+MD simulation. A smaller-than-usual timestep size may be -needed when running such a hybrid simulation, especially if the swapped -atoms are not well equilibrated. +according to a Monte Carlo probability. Swap candidates must be in +the fix group, must be in the region (if specified), and must be of +one of the listed types. Swaps are attempted between candidates that +are chosen randomly with equal probability among the candidate +atoms. Swaps are not attempted between atoms of the same type since +nothing would happen. + +All atoms in the simulation domain can also be moved using regular +time integration displacements (e.g., via :doc:`fix nvt `), +resulting in a hybrid MC+MD simulation, where $X$ MC swap attempts are +made once every $N$ MD steps. A smaller-than-usual timestep size may +be needed when running such a hybrid simulation, especially if the +swapped atoms are not well equilibrated. + +.. note:: + + To run an MC-only simulation (no MD), you should define no + time-integration fix, set *N* = 1, set *X* to the total number of + MC swaps *M* to attempt, and run the simulation for a single + timestep. This will invoke energy evaluations only for the MC + operations, and none for MD. The initial and final potential + energy of the system will be output for the single step. If you + instead define no time-integration fix, set *N* = 1, *X* = 1, and + run for *M* steps (to attempt *M* total swaps), you will get the + same result, but there will be 3x as many times as many energy + evaluations due to the way LAMMPS treats the MD portion of the + timestep. The *types* keyword is required. At least two atom types must be specified. If not using *semi-grand*, exactly two atom types are From cd937e36e42858fb6d75bcc847cbba16bcd291f5 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Sat, 18 Oct 2025 13:06:09 -0600 Subject: [PATCH 068/604] tweaked the note --- doc/src/fix_atom_swap.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/fix_atom_swap.rst b/doc/src/fix_atom_swap.rst index ad9643652a2..7a591955b2e 100644 --- a/doc/src/fix_atom_swap.rst +++ b/doc/src/fix_atom_swap.rst @@ -73,9 +73,9 @@ swapped atoms are not well equilibrated. energy of the system will be output for the single step. If you instead define no time-integration fix, set *N* = 1, *X* = 1, and run for *M* steps (to attempt *M* total swaps), you will get the - same result, but there will be 3x as many times as many energy - evaluations due to the way LAMMPS treats the MD portion of the - timestep. + same result, but there will be 3x more energy evaluations due to + the requirements of (what LAMMPS presumes is a) hybrid MC+MD + simulation. The *types* keyword is required. At least two atom types must be specified. If not using *semi-grand*, exactly two atom types are From 0391ef3d20f5acb74c43a0624eb2c71e6e1970b4 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 20 Oct 2025 07:52:30 -0500 Subject: [PATCH 069/604] first attempt to resolve the bug: special kernel needs work for minimum image checks (using larger boxes than pair cutoff for now), mixed precision are giving wrong results --- lib/gpu/lal_born_coul_long_cs.cu | 55 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index 0ed395fa0ea..1a39fe7e65d 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -40,9 +40,19 @@ _texture( q_tex,int2); #define B4 (acctyp)-5.80844129e-3 #define B5 (acctyp)1.14652755e-1 +#if defined _DOUBLE_DOUBLE #define EPSILON (acctyp)(1.0e-20) #define EPS_EWALD (acctyp)(1.0e-6) #define EPS_EWALD_SQR (acctyp)(1.0e-12) +#elif defined _SINGLE_DOUBLE +#define EPSILON (acctyp)(1.0e-8) +#define EPS_EWALD (acctyp)(1.0e-5) +#define EPS_EWALD_SQR (acctyp)(1.0e-8) +#else +#define EPSILON (numtyp)(1.0e-7) +#define EPS_EWALD (numtyp)(1.0e-4) +#define EPS_EWALD_SQR (numtyp)(1.0e-7) +#endif __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff1, @@ -100,7 +110,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, numtyp factor_lj, factor_coul; factor_lj = sp_lj[sbmask(j)]; - factor_coul = sp_lj[sbmask(j)+4]; + factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4]; j &= NEIGHMASK; numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; @@ -123,14 +133,17 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, numtyp r = ucl_sqrt(rsq); fetch(prefactor,j,q_tex); prefactor *= qqrd2e * qtmp; - if (factor_coul<(numtyp)1.0) { + if (factor_coul>(acctyp)0) { + // When bonded parts are being calculated, a minimal distance (EPS_EWALD) + // has to be added to the prefactor and erfc in order to make the + // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor /= (r+EPS_EWALD); - forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul)); + prefactor *= ucl_recip(r+EPS_EWALD); + forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent r2inv = ucl_recip(rsq + EPS_EWALD_SQR); @@ -140,7 +153,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor /= r; + prefactor *= ucl_recip(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } } else forcecoul = (numtyp)0.0; @@ -161,9 +174,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, if (EVFLAG && eflag) { if (rsq < cut_coulsq) { - numtyp e = prefactor*_erfc; - if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor; - e_coul += e; + e_coul += prefactor*(_erfc-factor_coul); } if (rsq < cutsq_sigma[mtype].y) { numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv @@ -243,10 +254,9 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, for ( ; nbor(acctyp)0) { + // When bonded parts are being calculated, a minimal distance (EPS_EWALD) + // has to be added to the prefactor and erfc in order to make the + // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor /= (r+EPS_EWALD); - forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul)); + prefactor *= ucl_recip(r+EPS_EWALD); + forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent r2inv = ucl_recip(rsq + EPS_EWALD_SQR); - } else { + } + + else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor /= r; + prefactor *= ucl_recip(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } + } else forcecoul = (numtyp)0.0; if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq @@ -306,9 +323,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, if (EVFLAG && eflag) { if (rsq < cut_coulsq) { - numtyp e = prefactor*_erfc; - if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor; - e_coul += e; + e_coul += prefactor*(_erfc-factor_coul); } if (rsq < cutsq_sigma[mtype].y) { numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv From 3ada52d3ad69e5243f04e7f7f22bdce7499e439c Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 22 Sep 2025 10:45:19 -0700 Subject: [PATCH 070/604] eliminate bottleneck in pair_mliap_kokkos --- src/KOKKOS/pair_mliap_kokkos.cpp | 81 ++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/src/KOKKOS/pair_mliap_kokkos.cpp b/src/KOKKOS/pair_mliap_kokkos.cpp index cb84008b2b0..f25434dab3c 100644 --- a/src/KOKKOS/pair_mliap_kokkos.cpp +++ b/src/KOKKOS/pair_mliap_kokkos.cpp @@ -425,12 +425,20 @@ int PairMLIAPKokkos::pack_forward_comm_kokkos( auto val=fill.view(); int nf=vec_len; auto to=copy_to; - Kokkos::parallel_for(nv, KOKKOS_LAMBDA (int i) { - int gstart=idx(i)*nf; - int start=i*nf; - for (int j=0;j(to[gstart++]); - }); + Kokkos::parallel_for( + Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { + const int i = team_member.league_rank(); + const int gstart=idx(i)*nf; + const int start=i*nf; + Kokkos::parallel_for( + Kokkos::TeamVectorRange(team_member, nf), + [=] (const int j) { + val(start+j) = static_cast(to[gstart+j]); + } + ); + } + ); return nv*nf; } @@ -494,14 +502,21 @@ void PairMLIAPKokkos::unpack_forward_comm_kokkos( int nv, int first_up, DAT::tdual_double_1d &fill, CommType *copy_to) { auto val=fill.view(); int nf=vec_len; - - Kokkos::parallel_for(nv, KOKKOS_LAMBDA (int i) { - int gstart=(first_up+i)*nf; - int start=i*nf; - for (int j=0;j(val(start+j)); + auto to=copy_to; + Kokkos::parallel_for( + Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { + const int i = team_member.league_rank(); + const int gstart=(first_up+i)*nf; + const int start=i*nf; + Kokkos::parallel_for( + Kokkos::TeamVectorRange(team_member, nf), + [=] (const int j) { + to[gstart+j] = static_cast(val(start+j)); + } + ); } - }); + ); } /* ---------------------------------------------------------------------- */ @@ -557,13 +572,21 @@ int PairMLIAPKokkos::pack_reverse_comm_kokkos(int nv, int first_up, { int nf=vec_len; auto val=fill.view(); - Kokkos::parallel_for(nv, KOKKOS_LAMBDA (int i) { - int gstart=(first_up+i)*nf; - int start=i*nf; - for (int j=0;j(copy_to[gstart++]); + auto to=copy_to; + Kokkos::parallel_for( + Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { + const int i = team_member.league_rank(); + const int gstart=(first_up+i)*nf; + const int start=i*nf; + Kokkos::parallel_for( + Kokkos::TeamVectorRange(team_member, nf), + [=] (const int j) { + val(start+j) = static_cast(to[gstart+j]); + } + ); } - }); + ); return nv*nf; } /* ---------------------------------------------------------------------- */ @@ -625,12 +648,20 @@ void PairMLIAPKokkos::unpack_reverse_comm_kokkos(int nv, DAT::tdual_ auto val=fill.view(); auto idx=idx_v.view(); auto to=copy_to; - Kokkos::parallel_for(nv, KOKKOS_LAMBDA (int i) { - int gstart=idx(i)*nf; - int start=i*nf; - for (int j=0;j(val(start++)); - }); + Kokkos::parallel_for( + Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), + KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { + const int i = team_member.league_rank(); + const int gstart=idx(i)*nf; + const int start=i*nf; + Kokkos::parallel_for( + Kokkos::TeamVectorRange(team_member, nf), + [=] (const int j) { + to[gstart+j] += static_cast(val(start+j)); + } + ); + } + ); } /* ---------------------------------------------------------------------- */ From 3bbba9b3c66d55b63277fff6a091750c499e13b9 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Wed, 22 Oct 2025 12:47:04 -0700 Subject: [PATCH 071/604] Uncomment mask atoms in equation of motion integration steps --- src/REPLICA/fix_pimd_langevin.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 37ac48fd10d..c120c1d9d9f 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -851,12 +851,12 @@ void FixPIMDLangevin::b_step() double **f = atom->f; for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { double dtfm = dtf / mass[type[i]]; v[i][0] += dtfm * f[i][0]; v[i][1] += dtfm * f[i][1]; v[i][2] += dtfm * f[i][2]; - // } + } } } @@ -875,11 +875,11 @@ void FixPIMDLangevin::qc_step() if (!pstat_flag) { if (universe->iworld == 0) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { x[i][0] += dtv * v[i][0]; x[i][1] += dtv * v[i][1]; x[i][2] += dtv * v[i][2]; - // } + } } } } else { @@ -895,7 +895,7 @@ void FixPIMDLangevin::qc_step() } if (barostat == BZP) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { for (int j = 0; j < 3; j++) { if (p_flag[j]) { x[i][j] = expq[j] * x[i][j] + (expq[j] - expp[j]) / 2. / vw[j] * v[i][j]; @@ -904,7 +904,7 @@ void FixPIMDLangevin::qc_step() x[i][j] += dtv * v[i][j]; } } - // } + } } oldlo = domain->boxlo[0]; oldhi = domain->boxhi[0]; @@ -945,7 +945,7 @@ void FixPIMDLangevin::a_step() if (universe->iworld != 0) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { x0 = x[i][0]; x1 = x[i][1]; x2 = x[i][2]; @@ -964,7 +964,7 @@ void FixPIMDLangevin::a_step() Lan_c[universe->iworld] * v1; v[i][2] = -1.0 * _omega_k[universe->iworld] * Lan_s[universe->iworld] * x2 + Lan_c[universe->iworld] * v2; - // } + } } } } @@ -982,11 +982,11 @@ void FixPIMDLangevin::q_step() if (!pstat_flag) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { x[i][0] += dtv * v[i][0]; x[i][1] += dtv * v[i][1]; x[i][2] += dtv * v[i][2]; - // } + } } } } @@ -1177,25 +1177,25 @@ void FixPIMDLangevin::o_step() if (thermostat == PILE_L) { if (method == NMPIMD) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { atom->v[i][0] = c1_k[universe->iworld] * atom->v[i][0] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][1] = c1_k[universe->iworld] * atom->v[i][1] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][2] = c1_k[universe->iworld] * atom->v[i][2] + c2_k[universe->iworld] * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - // } + } } } else if (method == PIMD) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { + if (mask[i] & groupbit) { atom->v[i][0] = c1 * atom->v[i][0] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][1] = c1 * atom->v[i][1] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); atom->v[i][2] = c1 * atom->v[i][2] + c2 * sqrt(1.0 / mass[type[i]] / beta_np) * random->gaussian(); - // } + } } } } From dec0919f2b64a603ea181129c47425008d6b6110 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Wed, 22 Oct 2025 13:39:48 -0700 Subject: [PATCH 072/604] Clean up comments for pull request --- src/REPLICA/fix_pimd_langevin.cpp | 160 ++++++++++-------------------- 1 file changed, 50 insertions(+), 110 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index c120c1d9d9f..e0f6e90e1e5 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -514,15 +514,10 @@ void FixPIMDLangevin::init() void FixPIMDLangevin::setup(int vflag) { int nlocal = atom->nlocal; - int *mask = atom->mask; double **x = atom->x; imageint *image = atom->image; if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) domain->unmap(x[i], image[i]); } if (method == NMPIMD) { @@ -555,11 +550,7 @@ void FixPIMDLangevin::setup(int vflag) nmpimd_transform(bufbeads, x, M_xp2x[universe->iworld]); } if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap_inv(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) domain->unmap_inv(x[i], image[i]); } post_force(vflag); @@ -574,15 +565,10 @@ void FixPIMDLangevin::setup(int vflag) void FixPIMDLangevin::initial_integrate(int /*vflag*/) { int nlocal = atom->nlocal; - int *mask = atom->mask; double **x = atom->x; imageint *image = atom->image; if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) domain->unmap(x[i], image[i]); } if (integrator == OBABO) { if (tstat_flag) { @@ -677,11 +663,7 @@ void FixPIMDLangevin::initial_integrate(int /*vflag*/) } if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap_inv(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) { domain->unmap_inv(x[i], image[i]); } } } @@ -720,7 +702,6 @@ void FixPIMDLangevin::prepare_coordinates() void FixPIMDLangevin::post_force(int /*flag*/) { int nlocal = atom->nlocal; - int *mask = atom->mask; double **x = atom->x; double **f = atom->f; imageint *image = atom->image; @@ -729,25 +710,17 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (atom->nmax > maxunwrap) reallocate_x_unwrap(); if (atom->nmax > maxxc) reallocate_xc(); for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - x_unwrap[i][0] = x[i][0]; - x_unwrap[i][1] = x[i][1]; - x_unwrap[i][2] = x[i][2]; - // } + x_unwrap[i][0] = x[i][0]; + x_unwrap[i][1] = x[i][1]; + x_unwrap[i][2] = x[i][2]; } if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap(x_unwrap[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) { domain->unmap(x_unwrap[i], image[i]); } } for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; - xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; - xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; - // } + xc[i][0] = xcall[3 * (tag[i] - 1) + 0]; + xc[i][1] = xcall[3 * (tag[i] - 1) + 1]; + xc[i][2] = xcall[3 * (tag[i] - 1) + 2]; } compute_vir(); @@ -757,22 +730,14 @@ void FixPIMDLangevin::post_force(int /*flag*/) if (method == PIMD) { if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) { domain->unmap(x[i], image[i]); } } prepare_coordinates(); spring_force(); compute_spring_energy(); compute_t_prim(); if (mapflag) { - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - domain->unmap_inv(x[i], image[i]); - // } - } + for (int i = 0; i < nlocal; i++) { domain->unmap_inv(x[i], image[i]); } } } compute_pote(); @@ -803,31 +768,24 @@ void FixPIMDLangevin::end_of_step() void FixPIMDLangevin::collect_xc() { int nlocal = atom->nlocal; - int *mask = atom->mask; double **x = atom->x; tagint *tag = atom->tag; if (ireplica == 0) { if (cmode == SINGLE_PROC) { for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; - // } + xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; } } else if (cmode == MULTI_PROC) { for (int i = 0; i < ntotal; i++) { - // if (mask[i] & groupbit) { - xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; - // } + xcall[3 * i + 0] = xcall[3 * i + 1] = xcall[3 * i + 2] = 0.0; } } const double sqrtnp = sqrt((double) np); for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - xcall[3 * (tag[i] - 1) + 0] = x[i][0] / sqrtnp; - xcall[3 * (tag[i] - 1) + 1] = x[i][1] / sqrtnp; - xcall[3 * (tag[i] - 1) + 2] = x[i][2] / sqrtnp; - // } + xcall[3 * (tag[i] - 1) + 0] = x[i][0] / sqrtnp; + xcall[3 * (tag[i] - 1) + 1] = x[i][1] / sqrtnp; + xcall[3 * (tag[i] - 1) + 2] = x[i][2] / sqrtnp; } if (cmode == MULTI_PROC) { @@ -1259,37 +1217,28 @@ void FixPIMDLangevin::nmpimd_init() void FixPIMDLangevin::nmpimd_transform(double **src, double **des, double *vector) { if (cmode == SINGLE_PROC) { - int *mask = atom->mask; - for (int i = 0; i < ntotal; i++) { - // if (mask[i] & groupbit) { - for (int d = 0; d < 3; d++) { - bufsorted[i][d] = 0.0; - for (int j = 0; j < nreplica; j++) { - bufsorted[i][d] += src[j * ntotal + i][d] * vector[j]; - } + for (int d = 0; d < 3; d++) { + bufsorted[i][d] = 0.0; + for (int j = 0; j < nreplica; j++) { + bufsorted[i][d] += src[j * ntotal + i][d] * vector[j]; } - // } + } } for (int i = 0; i < ntotal; i++) { - // if (mask[i] & groupbit) { - tagint tagtmp = atom->tag[i]; - for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } - // } + tagint tagtmp = atom->tag[i]; + for (int d = 0; d < 3; d++) { des[i][d] = bufsorted[tagtmp - 1][d]; } } } else if (cmode == MULTI_PROC) { - int nlocal = atom->nlocal; + int n = atom->nlocal; int m = 0; - int *mask = atom->mask; - for (int i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - for (int d = 0; d < 3; d++) { - des[i][d] = 0.0; - for (int j = 0; j < np; j++) { des[i][d] += (src[j][m] * vector[j]); } - m++; - } - // } + for (int i = 0; i < n; i++) { + for (int d = 0; d < 3; d++) { + des[i][d] = 0.0; + for (int j = 0; j < np; j++) { des[i][d] += (src[j][m] * vector[j]); } + m++; + } } } } @@ -1411,30 +1360,25 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) MPI_Status statuses[2]; if (atom->nmax > maxlocal) reallocate(); int nlocal = atom->nlocal; - int *mask = atom->mask; tagint *tag = atom->tag; int i, m; // copy local values for (i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - bufbeads[ireplica][3 * i + 0] = ptr[i][0]; - bufbeads[ireplica][3 * i + 1] = ptr[i][1]; - bufbeads[ireplica][3 * i + 2] = ptr[i][2]; - // } + bufbeads[ireplica][3 * i + 0] = ptr[i][0]; + bufbeads[ireplica][3 * i + 1] = ptr[i][1]; + bufbeads[ireplica][3 * i + 2] = ptr[i][2]; } // communicate values from the other beads if (cmode == SINGLE_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - tagint tagtmp = atom->tag[i]; - bufsorted[tagtmp - 1][0] = ptr[i][0]; - bufsorted[tagtmp - 1][1] = ptr[i][1]; - bufsorted[tagtmp - 1][2] = ptr[i][2]; - m++; - // } + tagint tagtmp = atom->tag[i]; + bufsorted[tagtmp - 1][0] = ptr[i][0]; + bufsorted[tagtmp - 1][1] = ptr[i][1]; + bufsorted[tagtmp - 1][2] = ptr[i][2]; + m++; } MPI_Allgather(&m, 1, MPI_INT, counts, 1, MPI_INT, universe->uworld); for (i = 0; i < nreplica; i++) counts[i] *= 3; @@ -1445,13 +1389,11 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) } else if (cmode == MULTI_PROC) { m = 0; for (i = 0; i < nlocal; i++) { - // if (mask[i] & groupbit) { - tagsend[m] = tag[i]; - bufsend[m][0] = ptr[i][0]; - bufsend[m][1] = ptr[i][1]; - bufsend[m][2] = ptr[i][2]; - m++; - // } + tagsend[m] = tag[i]; + bufsend[m][0] = ptr[i][0]; + bufsend[m][1] = ptr[i][1]; + bufsend[m][2] = ptr[i][2]; + m++; } MPI_Gather(&m, 1, MPI_INT, counts, 1, MPI_INT, 0, world); displacements[0] = 0; @@ -1475,13 +1417,11 @@ void FixPIMDLangevin::inter_replica_comm(double **ptr) MPI_Bcast(tagrecvall, ntotal, MPI_LMP_TAGINT, 0, world); MPI_Bcast(bufrecvall[0], 3 * ntotal, MPI_DOUBLE, 0, world); for (i = 0; i < ntotal; i++) { - // if (mask[i] & groupbit) { - m = atom->map(tagrecvall[i]); - if (m < 0 || m >= nlocal) continue; - bufbeads[modeindex[iplan]][3 * m + 0] = bufrecvall[i][0]; - bufbeads[modeindex[iplan]][3 * m + 1] = bufrecvall[i][1]; - bufbeads[modeindex[iplan]][3 * m + 2] = bufrecvall[i][2]; - // } + m = atom->map(tagrecvall[i]); + if (m < 0 || m >= nlocal) continue; + bufbeads[modeindex[iplan]][3 * m + 0] = bufrecvall[i][0]; + bufbeads[modeindex[iplan]][3 * m + 1] = bufrecvall[i][1]; + bufbeads[modeindex[iplan]][3 * m + 2] = bufrecvall[i][2]; } } } From ac98c1f9da69fb147d20dbdf923b8235b10281ab Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Fri, 24 Oct 2025 00:31:11 -0400 Subject: [PATCH 073/604] json key updates --- src/REAXFF/fix_reaxff_species.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index f8656d21260..ae0b79aa0cd 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -245,10 +245,11 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // header for 'delete' keyword JSON output fprintf(fdel, "{\n"); fprintf(fdel, " \"application\": \"LAMMPS\",\n"); - fprintf(fdel, " \"format\": \"output\",\n"); - fprintf(fdel, " \"subformat\": \"fix reaxff/species: delete keyword\",\n"); + fprintf(fdel, " \"format\": \"dump\",\n"); + fprintf(fdel, " \"style\": \"molecules\",\n"); + fprintf(fdel, " \"title\": \"fix reaxff/species: delete keyword\",\n"); fprintf(fdel, " \"revision\": 1,\n"); - fprintf(fdel, " \"run_output\": [\n"); + fprintf(fdel, " \"timesteps\": [\n"); fflush(fdel); } } From f5542776cd4f39bff71237b8b011ba14fdafbbdf Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 24 Oct 2025 16:51:27 -0600 Subject: [PATCH 074/604] WIP --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 640 +++++++++++++++++++++++++- src/KOKKOS/atom_vec_hybrid_kokkos.h | 41 ++ src/KOKKOS/atom_vec_kokkos.cpp | 101 ++++ src/KOKKOS/atom_vec_kokkos.h | 11 + 4 files changed, 771 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 9bd5a9b1650..be6e2fe9291 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -15,6 +15,8 @@ #include "atom_vec_hybrid_kokkos.h" #include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain.h" #include "error.h" using namespace LAMMPS_NS; @@ -78,37 +80,631 @@ int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_1 return 0; } -int AtomVecHybridKokkos::pack_border_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_double_2d_lr /*buf*/, - int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/) +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_kkfloat_1d _q; + const typename AT::t_tagint_1d _molecule; + double _dx,_dy,_dz; + + AtomVecHybridKokkos_PackBorder( + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const typename AT::t_kkfloat_1d_3_lr &x, + const typename AT::t_tagint_1d &tag, + const typename AT::t_int_1d &type, + const typename AT::t_int_1d &mask, + const typename AT::t_kkfloat_1d &q, + const typename AT::t_tagint_1d &molecule, + const double &dx, const double &dy, const double &dz): + _buf(buf),_list(list), + _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = d_ubuf(_molecule(j)).d; + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _q(j); + _buf(i,7) = d_ubuf(_molecule(j)).d; + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; + double dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (space==Host) { + AtomVecHybridKokkos_PackBorder f( + buf.view_host(), k_sendlist.view_host(), + h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecHybridKokkos_PackBorder f( + buf.view_device(), k_sendlist.view_device(), + d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + + } else { + dx = dy = dz = 0; + if (space==Host) { + AtomVecHybridKokkos_PackBorder f( + buf.view_host(), k_sendlist.view_host(), + h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecHybridKokkos_PackBorder f( + buf.view_device(), k_sendlist.view_device(), + d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } + return n*size_border; } -void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nfirst*/, - const DAT::tdual_double_2d_lr &/*buf*/, - ExecutionSpace /*space*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + const typename AT::t_double_2d_lr_const _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + int _first; + + + AtomVecHybridKokkos_UnpackBorder( + const typename AT::t_double_2d_lr_const &buf, + typename AT::t_kkfloat_1d_3_lr &x, + typename AT::t_tagint_1d &tag, + typename AT::t_int_1d &type, + typename AT::t_int_1d &mask, + typename AT::t_kkfloat_1d &q, + typename AT::t_tagint_1d &molecule, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), + _first(first) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _q(i+_first) = _buf(i,6); + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space) { + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + + while (first+n >= nmax) grow(0); + + if (space==Host) { + struct AtomVecHybridKokkos_UnpackBorder + f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_UnpackBorder + f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); + Kokkos::parallel_for(n,f); + } + + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); } -int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_double_2d_lr &/*buf*/, - DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_int_1d /*k_copylist*/, - ExecutionSpace /*space*/) +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_kkfloat_1d_randomread _q; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_int_1d_randomread _num_angle; + typename AT::t_int_2d_randomread _angle_type; + typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d_randomread _num_dihedral; + typename AT::t_int_2d_randomread _dihedral_type; + typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d_randomread _num_improper; + typename AT::t_int_2d_randomread _improper_type; + typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_3 _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_kkfloat_1d _qw; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + typename AT::t_int_1d _num_anglew; + typename AT::t_int_2d _angle_typew; + typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; + typename AT::t_int_1d _num_dihedralw; + typename AT::t_int_2d _dihedral_typew; + typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, + _dihedral_atom3w,_dihedral_atom4w; + typename AT::t_int_1d _num_improperw; + typename AT::t_int_2d _improper_typew; + typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, + _improper_atom3w,_improper_atom4w; + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _size_exchange; + + AtomVecHybridKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d sendlist, + DAT::tdual_int_1d copylist): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _qw(atom->k_q.view()), + _moleculew(atom->k_molecule.view()), + _nspecialw(atom->k_nspecial.view()), + _specialw(atom->k_special.view()), + _num_bondw(atom->k_num_bond.view()), + _bond_typew(atom->k_bond_type.view()), + _bond_atomw(atom->k_bond_atom.view()), + _num_anglew(atom->k_num_angle.view()), + _angle_typew(atom->k_angle_type.view()), + _angle_atom1w(atom->k_angle_atom1.view()), + _angle_atom2w(atom->k_angle_atom2.view()), + _angle_atom3w(atom->k_angle_atom3.view()), + _num_dihedralw(atom->k_num_dihedral.view()), + _dihedral_typew(atom->k_dihedral_type.view()), + _dihedral_atom1w(atom->k_dihedral_atom1.view()), + _dihedral_atom2w(atom->k_dihedral_atom2.view()), + _dihedral_atom3w(atom->k_dihedral_atom3.view()), + _dihedral_atom4w(atom->k_dihedral_atom4.view()), + _num_improperw(atom->k_num_improper.view()), + _improper_typew(atom->k_improper_type.view()), + _improper_atom1w(atom->k_improper_atom1.view()), + _improper_atom2w(atom->k_improper_atom2.view()), + _improper_atom3w(atom->k_improper_atom3.view()), + _improper_atom4w(atom->k_improper_atom4.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _size_exchange(atom->avecKK->size_exchange) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + int k; + const int i = _sendlist(mysend); + _buf(mysend,0) = _size_exchange; + int m = 1; + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + _buf(mysend,m++) = _q(i); + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } + + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + + const int j = _copylist(mysend); + + if (j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + _qw(i) = _q(j); + _moleculew(i) = _molecule(j); + _num_bondw(i) = _num_bond(j); + for (k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + _num_anglew(i) = _num_angle(j); + for (k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + _num_dihedralw(i) = _num_dihedral(j); + for (k = 0; k < _num_dihedral(j); k++) { + _dihedral_typew(i,k) = _dihedral_type(j,k); + _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + } + _num_improperw(i) = _num_improper(j); + for (k = 0; k < _num_improper(j); k++) { + _improper_typew(i,k) = _improper_type(j,k); + _improper_atom1w(i,k) = _improper_atom1(j,k); + _improper_atom2w(i,k) = _improper_atom2(j,k); + _improper_atom3w(i,k) = _improper_atom3(j,k); + _improper_atom4w(i,k) = _improper_atom4(j,k); + } + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; + // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, + // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, + // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, + // and angle_atom3 + // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom + // 1 num_improper, 5*improper_per_atom + // 1 charge + // 1 to store buffer length + + size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ + 5*atom->dihedral_per_atom+5*atom->improper_per_atom; + + if (nsend > (int) (k_buf.view_host().extent(0)* + k_buf.view_host().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; + k_buf.resize(newsize,k_buf.view_host().extent(1)); + } + if (space == HostKK) { + AtomVecHybridKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist); + Kokkos::parallel_for(nsend,f); + return nsend*size_exchange; + } else { + AtomVecHybridKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist); + Kokkos::parallel_for(nsend,f); + return nsend*size_exchange; + } } -int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr & /*k_buf*/, int /*nrecv*/, - int /*nlocal*/, int /*dim*/, double /*lo*/, - double /*hi*/, ExecutionSpace /*space*/, - DAT::tdual_int_1d &/*k_indices*/) +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; + int _dim; + double _lo,_hi; + int _size_exchange; + + AtomVecHybridKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d nlocal, + DAT::tdual_int_1d indices, + int dim, double lo, double hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _q(atom->k_q.view()), + _molecule(atom->k_molecule.view()), + _nspecial(atom->k_nspecial.view()), + _special(atom->k_special.view()), + _num_bond(atom->k_num_bond.view()), + _bond_type(atom->k_bond_type.view()), + _bond_atom(atom->k_bond_atom.view()), + _num_angle(atom->k_num_angle.view()), + _angle_type(atom->k_angle_type.view()), + _angle_atom1(atom->k_angle_atom1.view()), + _angle_atom2(atom->k_angle_atom2.view()), + _angle_atom3(atom->k_angle_atom3.view()), + _num_dihedral(atom->k_num_dihedral.view()), + _dihedral_type(atom->k_dihedral_type.view()), + _dihedral_atom1(atom->k_dihedral_atom1.view()), + _dihedral_atom2(atom->k_dihedral_atom2.view()), + _dihedral_atom3(atom->k_dihedral_atom3.view()), + _dihedral_atom4(atom->k_dihedral_atom4.view()), + _num_improper(atom->k_num_improper.view()), + _improper_type(atom->k_improper_type.view()), + _improper_atom1(atom->k_improper_atom1.view()), + _improper_atom2(atom->k_improper_atom2.view()), + _improper_atom3(atom->k_improper_atom3.view()), + _improper_atom4(atom->k_improper_atom4.view()), + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + double x = _buf(myrecv,_dim+1); + int i = -1; + if (x >= _lo && x < _hi) { + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; + _q(i) = _buf(myrecv,m++); + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + int k; + for (k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + if (OUTPUT_INDICES) + _indices(myrecv) = i; + } +}; + +/* ---------------------------------------------------------------------- */ +int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, + int dim, double lo, double hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; + while (nlocal + nrecv/size_exchange >= nmax) grow(0); + + if (space == HostKK) { + if (k_indices.view_host().data()) { + k_count.view_host()(0) = nlocal; + AtomVecHybridKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.view_host()(0) = nlocal; + AtomVecHybridKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } else { + if (k_indices.view_host().data()) { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + AtomVecHybridKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify_device(); + k_count.sync_host(); + } else { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + AtomVecHybridKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify_device(); + k_count.sync_host(); + } + } + + return k_count.view_host()(0); } // TODO: move dynamic_cast into init diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 5f4eeb02119..6c9d65dffb7 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -81,6 +81,47 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { DAT::t_kkfloat_1d_3 d_omega, d_angmom; HAT::t_kkfloat_1d_3 h_omega, h_angmom; + + // FULL + + DAT::t_kkfloat_1d d_q; + HAT::t_kkfloat_1d h_q; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::t_int_1d d_num_dihedral; + DAT::t_int_2d d_dihedral_type; + DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, + d_dihedral_atom3,d_dihedral_atom4; + DAT::t_int_1d d_num_improper; + DAT::t_int_2d d_improper_type; + DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, + d_improper_atom3,d_improper_atom4; + + HAT::t_int_1d h_num_dihedral; + HAT::t_int_2d h_dihedral_type; + HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, + h_dihedral_atom3,h_dihedral_atom4; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 63e912b4915..dd3c1e19bb1 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -856,3 +856,104 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, atomKK->modified(Device,F_MASK); } } + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::field2mask(std::string field) +{ + if (field == "id") + return TAG_MASK; + else if (field == "type") + return TYPE_MASK; + else if (field == "mask") + return MASK_MASK; + else if (field == "image") + return IMAGE_MASK; + else if (field == "x") + return X_MASK; + else if (field == "v") + return V_MASK; + else if (field == "f") + return F_MASK; + else if (field == "rmass") + return RMASS_MASK; + else if (field == "q") + return Q_MASK; + else if (field == "mu") + return MU_MASK; + else if (field == "radius") + return RADIUS_MASK; + else if (field == "omega") + return OMEGA_MASK; + else if (field == "torque") + return TORQUE_MASK; + else if (field == "molecule") + return MOLECULE_MASK; + else if (field == "special") + return SPECIAL_MASK; + else if (field == "num_bond") + return BOND_MASK; + else if (field == "num_angle") + return ANGLE_MASK; + else if (field == "num_dihedral") + return DIHEDRAL_MASK; + else if (field == "num_improper") + return IMPROPER_MASK; + else if (field == "sp") + return SP_MASK; + else if (field == "fm") + return FM_MASK; + else if (field == "fm_long") + return FML_MASK; + else if (field == "rho") // conflicts with SPH package "rho" + return DPDRHO_MASK; + else if (field == "dpdTheta") + return DPDTHETA_MASK; + else if (field == "uCond") + return UCOND_MASK; + else if (field == "uMech") + return UMECH_MASK; + else if (field == "uChem") + return UCHEM_MASK; + else if (field == "uCG") + return UCG_MASK; + else if (field == "uCGnew") + return UCGNEW_MASK; + else if (field == "duChem") + return DUCHEM_MASK; + else + return EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::set_atom_masks() +{ + datamask_grow = EMPTY_MASK; + for (int i = 0; i < ngrow; i++) + datamask_grow |= field2mask(fields_grow[i]); + + datamask_comm = EMPTY_MASK; + for (int i = 0; i < ncomm; i++) + datamask_comm |= field2mask(fields_comm[i]); + + datamask_comm_vel = EMPTY_MASK; + for (int i = 0; i < ncomm_vel; i++) + datamask_comm_vel |= field2mask(fields_comm_vel[i]); + + datamask_reverse = EMPTY_MASK; + for (int i = 0; i < nreverse; i++) + datamask_reverse |= field2mask(fields_reverse[i]); + + datamask_border = EMPTY_MASK; + for (int i = 0; i < nborder; i++) + datamask_border |= field2mask(fields_border[i]); + + datamask_border_vel = EMPTY_MASK; + for (int i = 0; i < nborder_vel; i++) + datamask_border_vel |= field2mask(fields_border_vel[i]); + + datamask_exchange = EMPTY_MASK; + for (int i = 0; i < nexchange; i++) + datamask_exchange |= field2mask(fields_exchange[i]); +} diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 05e6d7624e7..8375bd93e31 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -127,6 +127,17 @@ class AtomVecKokkos : virtual public AtomVec { DAT::tdual_int_1d k_count; + int datamask_grow; + int datamask_comm; + int datamask_comm_vel; + int datamask_reverse; + int datamask_border; + int datamask_border_vel; + int datamask_exchange; + + int field2mask(std::string); + void set_atom_masks(); + public: #ifdef LMP_KOKKOS_GPU From c321df5d11db4601a79114458699fa6c92f46c11 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 24 Oct 2025 17:55:47 -0600 Subject: [PATCH 075/604] WIP --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 1133 +++++++++++++++++++++---- src/KOKKOS/atom_vec_hybrid_kokkos.h | 37 +- 2 files changed, 1017 insertions(+), 153 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index be6e2fe9291..f5cd7a7ff80 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -18,6 +18,7 @@ #include "atom_masks.h" #include "domain.h" #include "error.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -58,26 +59,807 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_1d &/*k_sendlist*/, - const DAT::tdual_double_2d_lr &/*buf*/, - const int &/*pbc_flag*/, const int /*pbc*/[]) +template +struct AtomVecHybridKokkos_PackComm { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d_const _list; + double _xprd,_yprd,_zprd,_xy,_xz,_yz; + double _pbc[6]; + + AtomVecHybridKokkos_PackComm( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc): + _x(x.view()),_list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, + const int* const pbc) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK); + if (pbc_flag) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + atomKK->sync(Device,X_MASK); + if (pbc_flag) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + + return n*size_forward; } -void AtomVecHybridKokkos::unpack_comm_kokkos(const int &/*n*/, const int &/*nfirst*/, - const DAT::tdual_double_2d_lr &/*buf*/) +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackCommSelf { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_lr _xw; + int _nfirst; + typename AT::t_int_1d_const _list; + double _xprd,_yprd,_zprd,_xy,_xz,_yz; + double _pbc[6]; + + AtomVecHybridKokkos_PackCommSelf( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc): + _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK); + if (pbc_flag) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + atomKK->modified(HostKK,X_MASK); + } else { + atomKK->sync(Device,X_MASK); + if (pbc_flag) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + atomKK->modified(Device,X_MASK); + } + + return n*3; +} + + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackCommSelfFused { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_int_2d_lr_const _list; + typename AT::t_int_2d_const _pbc; + typename AT::t_int_1d_const _pbc_flag; + typename AT::t_int_1d_const _firstrecv; + typename AT::t_int_1d_const _sendnum_scan; + typename AT::t_int_1d_const _g2l; + double _xprd,_yprd,_zprd,_xy,_xz,_yz; + + AtomVecHybridKokkos_PackCommSelfFused( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const typename DAT::tdual_int_2d_lr &list, + const typename DAT::tdual_int_2d &pbc, + const typename DAT::tdual_int_1d &pbc_flag, + const typename DAT::tdual_int_1d &firstrecv, + const typename DAT::tdual_int_1d &sendnum_scan, + const typename DAT::tdual_int_1d &g2l, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz): + _x(x.view()),_xw(x.view()), + _list(list.view()), + _pbc(pbc.view()), + _pbc_flag(pbc_flag.view()), + _firstrecv(firstrecv.view()), + _sendnum_scan(sendnum_scan.view()), + _g2l(g2l.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + + int iswap = 0; + while (ii >= _sendnum_scan[iswap]) iswap++; + int i = ii; + if (iswap > 0) + i = ii - _sendnum_scan[iswap-1]; + + const int _nfirst = _firstrecv[iswap]; + const int nlocal = _firstrecv[0]; + + int j = _list(iswap,i); + if (j >= nlocal) + j = _g2l(j-nlocal); + + if (_pbc_flag(ii) == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l) { + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK); + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + atomKK->modified(HostKK,X_MASK); + } else { + atomKK->sync(Device,X_MASK); + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + atomKK->modified(Device,X_MASK); + } + + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnpackComm { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_double_2d_lr_const _buf; + int _first; + + AtomVecHybridKokkos_UnpackComm( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const typename DAT::tdual_double_2d_lr &buf, + const int& first):_x(x.view()), + _first(first) { + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK); + struct AtomVecHybridKokkos_UnpackComm f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + atomKK->modified(HostKK,X_MASK); + } else { + atomKK->sync(Device,X_MASK); + struct AtomVecHybridKokkos_UnpackComm f(atomKK->k_x,buf,first); + Kokkos::parallel_for(n,f); + atomKK->modified(Device,X_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackCommVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_int_1d _mask; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d_const _list; + double _xprd,_yprd,_zprd,_xy,_xz,_yz; + double _pbc[6]; + double _h_rate[6]; + const int _deform_vremap; + + AtomVecHybridKokkos_PackCommVel( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const typename DAT::tdual_int_1d &mask, + const typename DAT::ttransform_kkfloat_1d_3 &v, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc, + const double * const h_rate, + const int &deform_vremap): + _x(x.view()), + _mask(mask.view()), + _v(v.view()), + _list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz), + _deform_vremap(deform_vremap) + { + const size_t elements = 6; + const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; + _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + + if (DEFORM_VREMAP == 0) { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (_mask(i) & _deform_vremap) { + _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + } else { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_vel_kokkos( + const int &n, + const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, + const int* const pbc) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK|V_MASK); + if (pbc_flag) { + if (deform_vremap) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + atomKK->sync(Device,X_MASK|V_MASK); + if (pbc_flag) { + if (deform_vremap) { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if (domain->triclinic) { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_PackCommVel f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + + return n*6; } -int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_1d &/*list*/, - const int /*nfirst*/, - const int &/*pbc_flag*/, const int pbc[]) +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnpackCommVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_double_2d_lr_const _buf; + int _first; + + AtomVecHybridKokkos_UnpackCommVel( + const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const typename DAT::ttransform_kkfloat_1d_3 &v, + const typename DAT::tdual_double_2d_lr &buf, + const int& first): + _x(x.view()), + _v(v.view()), + _first(first) + { + const size_t elements = 6; + const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; + buffer_view(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _v(i+_first,0) = _buf(i,3); + _v(i+_first,1) = _buf(i,4); + _v(i+_first,2) = _buf(i,5); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,X_MASK|V_MASK); + struct AtomVecHybridKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + atomKK->modified(HostKK,X_MASK|V_MASK); + } else { + atomKK->sync(Device,X_MASK|V_MASK); + struct AtomVecHybridKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + atomKK->modified(Device,X_MASK|V_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_PackReverse { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_double_2d_lr _buf; + int _first; + + AtomVecHybridKokkos_PackReverse( + const typename DAT::ttransform_kkacc_1d_3 &f, + const typename DAT::tdual_double_2d_lr &buf, + const int& first):_f(f.view()), + _first(first) { + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _buf(i,0) = _f(i+_first,0); + _buf(i,1) = _f(i+_first,1); + _buf(i,2) = _f(i+_first,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_reverse_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->reverse_comm_on_host) { + atomKK->sync(HostKK,F_MASK); + struct AtomVecHybridKokkos_PackReverse f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } else { + atomKK->sync(Device,F_MASK); + struct AtomVecHybridKokkos_PackReverse f(atomKK->k_f,buf,first); + Kokkos::parallel_for(n,f); + } + + return n*size_reverse; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnPackReverseSelf { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkacc_1d_3 _fw; + int _nfirst; + typename AT::t_int_1d_const _list; + + AtomVecHybridKokkos_UnPackReverseSelf( + const typename DAT::ttransform_kkacc_1d_3 &f, + const int &nfirst, + const typename DAT::tdual_int_1d &list): + _f(f.view()),_fw(f.view()),_nfirst(nfirst),_list(list.view()) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + _fw(j,0) += _f(i+_nfirst,0); + _fw(j,1) += _f(i+_nfirst,1); + _fw(j,2) += _f(i+_nfirst,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst) { + if (lmp->kokkos->reverse_comm_on_host) { + atomKK->sync(HostKK,F_MASK); + struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + Kokkos::parallel_for(n,f); + atomKK->modified(HostKK,F_MASK); + } else { + atomKK->sync(Device,F_MASK); + struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + Kokkos::parallel_for(n,f); + atomKK->modified(Device,F_MASK); + } + + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnPackReverse { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkacc_1d_3 _f; + typename AT::t_double_2d_lr_const _buf; + typename AT::t_int_1d_const _list; + + AtomVecHybridKokkos_UnPackReverse( + const typename DAT::ttransform_kkacc_1d_3 &f, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list): + _f(f.view()),_list(list.view()) { + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + _f(j,0) += _buf(i,0); + _f(j,1) += _buf(i,1); + _f(j,2) += _buf(i,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_reverse_kokkos(const int &n, + const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf) { - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; + // Check whether to always run reverse communication on the host + // Choose correct reverse UnPackReverse kernel + + if (lmp->kokkos->reverse_comm_on_host) { + atomKK->sync(HostKK,F_MASK); + struct AtomVecHybridKokkos_UnPackReverse f(atomKK->k_f,buf,list); + Kokkos::parallel_for(n,f); + atomKK->modified(HostKK,F_MASK); + } else { + atomKK->sync(Device,F_MASK); + struct AtomVecHybridKokkos_UnPackReverse f(atomKK->k_f,buf,list); + Kokkos::parallel_for(n,f); + atomKK->modified(Device,F_MASK); + } } /* ---------------------------------------------------------------------- */ @@ -254,6 +1036,7 @@ template struct AtomVecHybridKokkos_PackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; + typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_3_randomread _v; typename AT::t_tagint_1d_randomread _tag; @@ -302,16 +1085,19 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_int_2d _improper_typew; typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, _improper_atom3w,_improper_atom4w; + typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; int _size_exchange; + unsigned int _datamask; AtomVecHybridKokkos_PackExchangeFunctor( const AtomKokkos* atom, const DAT::tdual_double_2d_lr buf, DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): + DAT::tdual_int_1d copylist, + const unsigned int datamask): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -374,7 +1160,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom4w(atom->k_improper_atom4.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { + _size_exchange(atom->avecKK->size_exchange), + _datamask(datamask) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -382,10 +1169,10 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { - int k; const int i = _sendlist(mysend); _buf(mysend,0) = _size_exchange; int m = 1; + _buf(mysend,m++) = _x(i,0); _buf(mysend,m++) = _x(i,1); _buf(mysend,m++) = _x(i,2); @@ -396,91 +1183,128 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _buf(mysend,m++) = d_ubuf(_type(i)).d; _buf(mysend,m++) = d_ubuf(_mask(i)).d; _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = _q(i); - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + + if (_datamask & Q_MASK) + _buf(mysend,m++) = _q(i); + + if (_datamask & MOLECULE_MASK) + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + + if (_datamask & BOND_MASK) { + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (int k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + + if (_datamask & ANGLE_MASK) { + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (int k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } } - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + + if (_datamask & DIHEDRAL_MASK) { + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (int k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } } - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + + if (_datamask & IMPROPER_MASK) { + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (int k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } } - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + if (_datamask & SPECIAL_MASK) { + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (int k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + } const int j = _copylist(mysend); - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _num_dihedralw(i) = _num_dihedral(j); - for (k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - _num_improperw(i) = _num_improper(j); - for (k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); + if (j > -1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + + if (_datamask & Q_MASK) + _qw(i) = _q(j); + + if (_datamask & MOLECULE_MASK) + _moleculew(i) = _molecule(j); + + if (_datamask & BOND_MASK) { + _num_bondw(i) = _num_bond(j); + for (int k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + } + + if (_datamask & ANGLE_MASK) { + _num_anglew(i) = _num_angle(j); + for (int k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedralw(i) = _num_dihedral(j); + for (int k = 0; k < _num_dihedral(j); k++) { + _dihedral_typew(i,k) = _dihedral_type(j,k); + _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improperw(i) = _num_improper(j); + for (int k = 0; k < _num_improper(j); k++) { + _improper_typew(i,k) = _improper_type(j,k); + _improper_atom1w(i,k) = _improper_atom1(j,k); + _improper_atom2w(i,k) = _improper_atom2(j,k); + _improper_atom3w(i,k) = _improper_atom3(j,k); + _improper_atom4w(i,k) = _improper_atom4(j,k); + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (int k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } } } }; @@ -492,18 +1316,6 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double DAT::tdual_int_1d k_copylist, ExecutionSpace space) { - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 charge - // 1 to store buffer length - - size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom+5*atom->improper_per_atom; - if (nsend > (int) (k_buf.view_host().extent(0)* k_buf.view_host().extent(1))/size_exchange) { int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; @@ -511,12 +1323,12 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double } if (space == HostKK) { AtomVecHybridKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); Kokkos::parallel_for(nsend,f); return nsend*size_exchange; } else { AtomVecHybridKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); Kokkos::parallel_for(nsend,f); return nsend*size_exchange; } @@ -528,6 +1340,7 @@ template struct AtomVecHybridKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; + typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; typename AT::t_tagint_1d _tag; @@ -559,13 +1372,15 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { int _dim; double _lo,_hi; int _size_exchange; + unsigned int _datamask; AtomVecHybridKokkos_UnpackExchangeFunctor( const AtomKokkos* atom, const DAT::tdual_double_2d_lr buf, DAT::tdual_int_1d nlocal, DAT::tdual_int_1d indices, - int dim, double lo, double hi): + int dim, double lo, double hi, + unsigned int datamask): _x(atom->k_x.view()), _v(atom->k_v.view()), _tag(atom->k_tag.view()), @@ -598,7 +1413,8 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _improper_atom4(atom->k_improper_atom4.view()), _nlocal(nlocal.template view()), _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange), + _datamask(datamask) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; buffer_view(_buf,buf,maxsendlist,_size_exchange); @@ -621,43 +1437,62 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _q(i) = _buf(myrecv,m++); - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & Q_MASK) + _q(i) = _buf(myrecv,m++); + + if (_datamask & MOLECULE_MASK) + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & BOND_MASK) { + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & ANGLE_MASK) { + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } + if (OUTPUT_INDICES) _indices(myrecv) = i; } @@ -674,12 +1509,12 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, if (k_indices.view_host().data()) { k_count.view_host()(0) = nlocal; AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); Kokkos::parallel_for(nrecv/size_exchange,f); } else { k_count.view_host()(0) = nlocal; AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); Kokkos::parallel_for(nrecv/size_exchange,f); } } else { @@ -688,7 +1523,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, k_count.modify_host(); k_count.sync_device(); AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify_device(); k_count.sync_host(); @@ -697,7 +1532,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, k_count.modify_host(); k_count.sync_device(); AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); Kokkos::parallel_for(nrecv/size_exchange,f); k_count.modify_device(); k_count.sync_host(); diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 6c9d65dffb7..3322a0e80d0 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -36,24 +36,53 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { void grow(int) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, + int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst, + const int &pbc_flag, const int pbc[]) override; + + int pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l) override; + + int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, const DAT::tdual_double_2d_lr &buf, const int &pbc_flag, const int pbc[]) override; + void unpack_comm_kokkos(const int &n, const int &nfirst, const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; + + int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, const int pbc[]) override; + + void unpack_comm_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf) override; + + int pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst) override; + + int pack_reverse_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf) override; + + void unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf) override; + int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, int pbc_flag, int *pbc, ExecutionSpace space) override; + void unpack_border_kokkos(const int &n, const int &nfirst, const DAT::tdual_double_2d_lr &buf, ExecutionSpace space) override; + int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space) override; + int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, int dim, double lo, double hi, ExecutionSpace space, From ab05192528cd27fc2211aae7f50c8e46c47c415d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 25 Oct 2025 10:11:08 -0400 Subject: [PATCH 076/604] consolidate BUFEXTRA constant by making it an enum in comm.h --- src/KOKKOS/comm_kokkos.cpp | 5 ++--- src/KOKKOS/comm_tiled_kokkos.cpp | 5 ++--- src/MC/fix_hmc.cpp | 5 ++--- src/comm.cpp | 2 -- src/comm.h | 1 + src/irregular.cpp | 5 ++--- 6 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 618b9f20347..20510ba2844 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -38,7 +38,6 @@ using namespace LAMMPS_NS; static constexpr double BUFFACTOR = 1.5; static constexpr int BUFMIN = 10000; -static constexpr int BUFEXTRA = 1000; /* ---------------------------------------------------------------------- setup MPI and allocate buffer space @@ -1465,7 +1464,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) { maxsend = static_cast (BUFFACTOR * n); - int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border; + int maxsend_border = (maxsend+Comm::BUFEXTRA)/atomKK->avecKK->size_border; if (flag) { if (space == Device) k_buf_send.modify_device(); @@ -1496,7 +1495,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace /*space*/) { maxrecv = static_cast (BUFFACTOR * n); - int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border; + int maxrecv_border = (maxrecv+Comm::BUFEXTRA)/atomKK->avecKK->size_border; MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border, atomKK->avecKK->size_border); diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp index 5287708a2bc..3a542681233 100644 --- a/src/KOKKOS/comm_tiled_kokkos.cpp +++ b/src/KOKKOS/comm_tiled_kokkos.cpp @@ -30,7 +30,6 @@ using namespace LAMMPS_NS; static constexpr double BUFFACTOR = 1.5; static constexpr int BUFMIN = 1024; -static constexpr int BUFEXTRA = 1000; /* ---------------------------------------------------------------------- */ @@ -587,7 +586,7 @@ void CommTiledKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) { maxsend = static_cast (BUFFACTOR * n); - int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border; + int maxsend_border = (maxsend+Comm::BUFEXTRA)/atomKK->avecKK->size_border; if (flag) { if (space == Device) k_buf_send.modify_device(); @@ -620,7 +619,7 @@ void CommTiledKokkos::grow_recv_kokkos(int n, int flag, ExecutionSpace /*space*/ if (flag) maxrecv = n; else maxrecv = static_cast (BUFFACTOR * n); - int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border; + int maxrecv_border = (maxrecv+Comm::BUFEXTRA)/atomKK->avecKK->size_border; MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border, atomKK->avecKK->size_border); diff --git a/src/MC/fix_hmc.cpp b/src/MC/fix_hmc.cpp index ab19d4570e8..aa13e2c7ec3 100644 --- a/src/MC/fix_hmc.cpp +++ b/src/MC/fix_hmc.cpp @@ -49,7 +49,6 @@ using namespace LAMMPS_NS; using namespace FixConst; static constexpr double BUFFACTOR = 1.5; -static constexpr int BUFEXTRA = 1024; static constexpr auto SIX = sizeof(double) * 6; /* ---------------------------------------------------------------------- */ @@ -408,9 +407,9 @@ void FixHMC::setup(int vflag) for (const auto &fix : modify->get_fix_list()) maxexchange_fix += fix->maxexchange; maxexchange = maxexchange_atom + maxexchange_fix; - bufextra = maxexchange + BUFEXTRA; + bufextra = maxexchange + Comm::BUFEXTRA; - maxstore = BUFEXTRA; + maxstore = Comm::BUFEXTRA; grow_store(maxstore, 2); save_current_state(); } diff --git a/src/comm.cpp b/src/comm.cpp index 5a9adbf2fec..cd6dce4374a 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -42,8 +42,6 @@ using namespace LAMMPS_NS; -static constexpr int BUFEXTRA = 1024; - enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM}; enum{CART,CARTREORDER,XYZ}; diff --git a/src/comm.h b/src/comm.h index df879709e52..384d8937d84 100644 --- a/src/comm.h +++ b/src/comm.h @@ -31,6 +31,7 @@ class Comm : protected Pointers { enum { SINGLE, MULTI }; int mode; // SINGLE = single cutoff // MULTI = multi-collection cutoff + enum { BUFEXTRA = 1024 }; // standard communication buffer size for fixed size per-atom-data int me, nprocs; // proc info int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not diff --git a/src/irregular.cpp b/src/irregular.cpp index f16669e5be6..8f5e0f81161 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -38,7 +38,6 @@ static int compare_standalone(const int, const int, void *); static constexpr double BUFFACTOR = 1.5; static constexpr int BUFMIN = 1024; -static constexpr int BUFEXTRA = 1024; /* ---------------------------------------------------------------------- */ @@ -72,7 +71,7 @@ Irregular::Irregular(LAMMPS *lmp) : // these can persist for multiple irregular operations maxsend = maxrecv = BUFMIN; - bufextra = BUFEXTRA; + bufextra = Comm::BUFEXTRA; grow_send(maxsend, 2); memory->create(buf_recv, maxrecv, "comm:buf_recv"); } @@ -1001,7 +1000,7 @@ void Irregular::init_exchange() for (const auto &ifix : modify->get_fix_list()) maxexchange_fix = MAX(maxexchange_fix, ifix->maxexchange); - bufextra = atom->avec->maxexchange + maxexchange_fix + BUFEXTRA; + bufextra = atom->avec->maxexchange + maxexchange_fix + Comm::BUFEXTRA; } /* ---------------------------------------------------------------------- From 6f42be93758a292fef398421ab997171c708ece5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 25 Oct 2025 10:14:37 -0400 Subject: [PATCH 077/604] set AtomVec::maxexchange to size comm buffer for bonds, angles, dihedrals, and impropers --- src/atom_vec.h | 3 +-- src/create_box.cpp | 9 +++++++++ src/read_data.cpp | 19 ++++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/atom_vec.h b/src/atom_vec.h index 76493cbe505..edbb3a3bebf 100644 --- a/src/atom_vec.h +++ b/src/atom_vec.h @@ -39,8 +39,7 @@ class AtomVec : protected Pointers { int size_data_atom; // number of values in Atom line int size_data_vel; // number of values in Velocity line int xcol_data; // column (1-N) where x is in Atom line - int maxexchange; // max size of exchanged atom - // only needs to be set if size > BUFEXTRA + int maxexchange; // max size of exchanged atom from bonds, angles, etc. int bonus_flag; // 1 if stores bonus data int size_forward_bonus; // # in forward bonus comm diff --git a/src/create_box.cpp b/src/create_box.cpp index 49e3cf5ba94..8b0fb0f67f9 100644 --- a/src/create_box.cpp +++ b/src/create_box.cpp @@ -193,6 +193,7 @@ void CreateBox::command(int narg, char **arg) // process optional args that can overwrite default settings + int maxexchange = 0; while (iarg < narg) { if (strcmp(arg[iarg], "bond/types") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "create_box bond/type", error); @@ -223,12 +224,14 @@ void CreateBox::command(int narg, char **arg) if (!atom->avec->bonds_allow) error->all(FLERR, "No bonds allowed with atom style {}", atom->get_style()); atom->bond_per_atom = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + maxexchange += 2 * atom->bond_per_atom; iarg += 2; } else if (strcmp(arg[iarg], "extra/angle/per/atom") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "create_box extra/angle/per/atom", error); if (!atom->avec->angles_allow) error->all(FLERR, "No angles allowed with atom style {}", atom->get_style()); atom->angle_per_atom = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + maxexchange += 4 * atom->angle_per_atom; iarg += 2; } else if (strcmp(arg[iarg], "extra/dihedral/per/atom") == 0) { if (iarg + 2 > narg) @@ -236,6 +239,7 @@ void CreateBox::command(int narg, char **arg) if (!atom->avec->dihedrals_allow) error->all(FLERR, "No dihedrals allowed with atom style {}", atom->get_style()); atom->dihedral_per_atom = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + maxexchange += 5 * atom->dihedral_per_atom; iarg += 2; } else if (strcmp(arg[iarg], "extra/improper/per/atom") == 0) { if (iarg + 2 > narg) @@ -243,6 +247,7 @@ void CreateBox::command(int narg, char **arg) if (!atom->avec->impropers_allow) error->all(FLERR, "No impropers allowed with atom style {}", atom->get_style()); atom->improper_per_atom = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + maxexchange += 5 * atom->improper_per_atom; iarg += 2; } else if (strcmp(arg[iarg], "extra/special/per/atom") == 0) { if (iarg + 2 > narg) @@ -254,6 +259,10 @@ void CreateBox::command(int narg, char **arg) error->all(FLERR, "Unknown create_box keyword: {}", arg[iarg]); } + // set per-atom communication buffersize for contributions like bonds, angles, etc. + + atom->avec->maxexchange = maxexchange; + // setup the simulation box and initial system // deallocate/grow ensures any extra settings are used for topology arrays // necessary in case no create_atoms is performed diff --git a/src/read_data.cpp b/src/read_data.cpp index 0b1e88745dc..6ee7595f3a4 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -710,6 +710,8 @@ void ReadData::command(int narg, char **arg) if (firstpass) { delete lmap; lmap = new LabelMap(lmp, ntypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes); + // reset maxexchange + atom->avec->maxexchange = 0; } // ------------------------------------------------------------------------------------- @@ -1722,8 +1724,10 @@ void ReadData::bonds(int firstpass) error->all(FLERR, "Subsequent read data induced " "too many bonds per atom"); - } else + } else { atom->bond_per_atom = maxall; + atom->avec->maxexchange += 2 * maxall; + } memory->destroy(count); return; @@ -1801,9 +1805,10 @@ void ReadData::angles(int firstpass) error->all(FLERR, "Subsequent read data induced " "too many angles per atom"); - } else + } else { atom->angle_per_atom = maxall; - + atom->avec->maxexchange += 4 * maxall; + } memory->destroy(count); return; } @@ -1881,8 +1886,10 @@ void ReadData::dihedrals(int firstpass) error->all(FLERR, "Subsequent read data induced " "too many dihedrals per atom"); - } else + } else { atom->dihedral_per_atom = maxall; + atom->avec->maxexchange += 5 * maxall; + } memory->destroy(count); return; @@ -1959,8 +1966,10 @@ void ReadData::impropers(int firstpass) if (addflag != NONE) { if (maxall > atom->improper_per_atom) error->all(FLERR, "Subsequent read data induced too many impropers per atom"); - } else + } else { atom->improper_per_atom = maxall; + atom->avec->maxexchange += 5 * maxall; + } memory->destroy(count); return; From bb44c1441a4450cc78cea8d4ad399b500840227b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 25 Oct 2025 10:15:00 -0400 Subject: [PATCH 078/604] reformat code --- src/read_data.cpp | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index 6ee7595f3a4..a30482f73e0 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -1699,9 +1699,7 @@ void ReadData::bonds(int firstpass) eof = utils::read_lines_from_file(fp, nchunk, MAXLINE, buffer, me, world); if (eof) error->all(FLERR, "Unexpected end of data file"); if (blabelflag && !lmap->is_complete(Atom::BOND)) - error->all(FLERR, - "Label map is incomplete: " - "all types must be assigned a unique type label"); + error->all(FLERR, "Label map is incomplete: all types must be assigned a unique type label"); atom->data_bonds(nchunk, buffer, count, id_offset, boffset, blabelflag, lmap->lmap2lmap.bond); nread += nchunk; } @@ -1721,9 +1719,7 @@ void ReadData::bonds(int firstpass) if (addflag != NONE) { if (maxall > atom->bond_per_atom) - error->all(FLERR, - "Subsequent read data induced " - "too many bonds per atom"); + error->all(FLERR,"Subsequent read data induced too many bonds per atom"); } else { atom->bond_per_atom = maxall; atom->avec->maxexchange += 2 * maxall; @@ -1780,9 +1776,7 @@ void ReadData::angles(int firstpass) eof = utils::read_lines_from_file(fp, nchunk, MAXLINE, buffer, me, world); if (eof) error->all(FLERR, "Unexpected end of data file"); if (alabelflag && !lmap->is_complete(Atom::ANGLE)) - error->all(FLERR, - "Label map is incomplete: " - "all types must be assigned a unique type label"); + error->all(FLERR,"Label map is incomplete: all types must be assigned a unique type label"); atom->data_angles(nchunk, buffer, count, id_offset, aoffset, alabelflag, lmap->lmap2lmap.angle); nread += nchunk; } @@ -1802,10 +1796,8 @@ void ReadData::angles(int firstpass) if (addflag != NONE) { if (maxall > atom->angle_per_atom) - error->all(FLERR, - "Subsequent read data induced " - "too many angles per atom"); - } else { + error->all(FLERR,"Subsequent read data induced too many angles per atom"); + } else { atom->angle_per_atom = maxall; atom->avec->maxexchange += 4 * maxall; } @@ -1860,9 +1852,7 @@ void ReadData::dihedrals(int firstpass) eof = utils::read_lines_from_file(fp, nchunk, MAXLINE, buffer, me, world); if (eof) error->all(FLERR, "Unexpected end of data file"); if (dlabelflag && !lmap->is_complete(Atom::DIHEDRAL)) - error->all(FLERR, - "Label map is incomplete: " - "all types must be assigned a unique type label"); + error->all(FLERR,"Label map is incomplete: all types must be assigned a unique type label"); atom->data_dihedrals(nchunk, buffer, count, id_offset, doffset, dlabelflag, lmap->lmap2lmap.dihedral); nread += nchunk; @@ -1883,9 +1873,7 @@ void ReadData::dihedrals(int firstpass) if (addflag != NONE) { if (maxall > atom->dihedral_per_atom) - error->all(FLERR, - "Subsequent read data induced " - "too many dihedrals per atom"); + error->all(FLERR,"Subsequent read data induced too many dihedrals per atom"); } else { atom->dihedral_per_atom = maxall; atom->avec->maxexchange += 5 * maxall; @@ -1942,9 +1930,7 @@ void ReadData::impropers(int firstpass) eof = utils::read_lines_from_file(fp, nchunk, MAXLINE, buffer, me, world); if (eof) error->all(FLERR, "Unexpected end of data file"); if (ilabelflag && !lmap->is_complete(Atom::IMPROPER)) - error->all(FLERR, - "Label map is incomplete: " - "all types must be assigned a unique type label"); + error->all(FLERR,"Label map is incomplete: all types must be assigned a unique type label"); atom->data_impropers(nchunk, buffer, count, id_offset, ioffset, ilabelflag, lmap->lmap2lmap.improper); nread += nchunk; From f8f6f322959fb69fb4252a8daac974896e7a7471 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 25 Oct 2025 10:42:48 -0400 Subject: [PATCH 079/604] write out and restore AtomVec::maxexchange with binary restart files --- src/create_box.cpp | 2 +- src/lmprestart.h | 2 +- src/read_restart.cpp | 2 ++ src/write_restart.cpp | 5 +++++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/create_box.cpp b/src/create_box.cpp index 8b0fb0f67f9..c103991b32e 100644 --- a/src/create_box.cpp +++ b/src/create_box.cpp @@ -262,7 +262,7 @@ void CreateBox::command(int narg, char **arg) // set per-atom communication buffersize for contributions like bonds, angles, etc. atom->avec->maxexchange = maxexchange; - + // setup the simulation box and initial system // deallocate/grow ensures any extra settings are used for topology arrays // necessary in case no create_atoms is performed diff --git a/src/lmprestart.h b/src/lmprestart.h index 40abcf4fe2b..f9549139102 100644 --- a/src/lmprestart.h +++ b/src/lmprestart.h @@ -38,7 +38,7 @@ enum{VERSION,SMALLINT,TAGINT,BIGINT, EXTRA_BOND_PER_ATOM,EXTRA_ANGLE_PER_ATOM,EXTRA_DIHEDRAL_PER_ATOM, EXTRA_IMPROPER_PER_ATOM,EXTRA_SPECIAL_PER_ATOM,ATOM_MAXSPECIAL, NELLIPSOIDS,NLINES,NTRIS,NBODIES,ATIME,ATIMESTEP,LABELMAP, - TRICLINIC_GENERAL,ROTATE_G2R}; + TRICLINIC_GENERAL,ROTATE_G2R,ATOM_MAXEXCHANGE}; #define LB_FACTOR 1.1 diff --git a/src/read_restart.cpp b/src/read_restart.cpp index f946952defd..671bbc7f105 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -835,6 +835,8 @@ void ReadRestart::header() atom->extra_improper_per_atom = read_int(); } else if (flag == ATOM_MAXSPECIAL) { atom->maxspecial = read_int(); + } else if (flag == ATOM_MAXEXCHANGE) { + if (atom->avec) atom->avec->maxexchange = read_int(); } else if (flag == NELLIPSOIDS) { atom->nellipsoids = read_bigint(); } else if (flag == NLINES) { diff --git a/src/write_restart.cpp b/src/write_restart.cpp index 525e2c8feae..80455107935 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -473,6 +473,11 @@ void WriteRestart::header() write_int(EXTRA_IMPROPER_PER_ATOM,atom->extra_improper_per_atom); write_int(ATOM_MAXSPECIAL,atom->maxspecial); + // write out AtomVec::maxexchange (extra storage for communicating + // per-atom bond, angle, dihedral, and improper data). added 25 Oct 2025 + + write_int(ATOM_MAXEXCHANGE,atom->avec->maxexchange); + write_bigint(NELLIPSOIDS,atom->nellipsoids); write_bigint(NLINES,atom->nlines); write_bigint(NTRIS,atom->ntris); From 59b4d3fc2731a5fa6bd6b4e34b02dd0f20f555ed Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Sat, 25 Oct 2025 12:19:34 -0600 Subject: [PATCH 080/604] WIP --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 728 ++++++++++++++------------ src/KOKKOS/atom_vec_hybrid_kokkos.h | 3 + 2 files changed, 386 insertions(+), 345 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index f5cd7a7ff80..a8fc2c0dd08 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -69,18 +69,21 @@ struct AtomVecHybridKokkos_PackComm { typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + unsigned int _datamask; AtomVecHybridKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_list(list.view()), + const double &xy, const double &xz, const double &yz, const int* const pbc, + const unsigned int &datamask): + _x(atomKK->k_x.view()),_list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; @@ -89,19 +92,20 @@ struct AtomVecHybridKokkos_PackComm { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); + int m = 0; if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); } else { if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } } } @@ -119,56 +123,56 @@ int AtomVecHybridKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } @@ -190,23 +194,25 @@ struct AtomVecHybridKokkos_PackCommSelf { typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + unsigned int _datamask; AtomVecHybridKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const int &nfirst, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()), + const double &xy, const double &xz, const double &yz, const int* const pbc, + const unsigned int datamask): + _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()),_nfirst(nfirst),_list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); + const int j = _list(i); if (PBC_FLAG == 0) { _xw(i+_nfirst,0) = _x(j,0); _xw(i+_nfirst,1) = _x(j,1); @@ -231,64 +237,64 @@ struct AtomVecHybridKokkos_PackCommSelf { int AtomVecHybridKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst, const int &pbc_flag, const int* const pbc) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } @@ -308,9 +314,10 @@ struct AtomVecHybridKokkos_PackCommSelfFused { typename AT::t_int_1d_const _sendnum_scan; typename AT::t_int_1d_const _g2l; double _xprd,_yprd,_zprd,_xy,_xz,_yz; + unsigned int _datamask; AtomVecHybridKokkos_PackCommSelfFused( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_int_2d_lr &list, const typename DAT::tdual_int_2d &pbc, const typename DAT::tdual_int_1d &pbc_flag, @@ -318,8 +325,9 @@ struct AtomVecHybridKokkos_PackCommSelfFused { const typename DAT::tdual_int_1d &sendnum_scan, const typename DAT::tdual_int_1d &g2l, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz): - _x(x.view()),_xw(x.view()), + const double &xy, const double &xz, const double &yz, + const unsigned int datamask): + _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -327,7 +335,7 @@ struct AtomVecHybridKokkos_PackCommSelfFused { _sendnum_scan(sendnum_scan.view()), _g2l(g2l.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) {}; + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& ii) const { @@ -369,36 +377,36 @@ int AtomVecHybridKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, const DAT::tdual_int_1d &g2l) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } /* ---------------------------------------------------------------------- */ @@ -411,14 +419,16 @@ struct AtomVecHybridKokkos_UnpackComm { typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_double_2d_lr_const _buf; int _first; + unsigned int _datamask; AtomVecHybridKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int& first):_x(x.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const int &first, const unsigned int &datamask):_x(atomKK->k_x.view()), + _first(first),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; buffer_view(_buf,buf,maxsend,elements); }; @@ -435,15 +445,15 @@ struct AtomVecHybridKokkos_UnpackComm { void AtomVecHybridKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); - struct AtomVecHybridKokkos_UnpackComm f(atomKK->k_x,buf,first); + atomKK->sync(HostKK,datamask_comm); + struct AtomVecHybridKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); - struct AtomVecHybridKokkos_UnpackComm f(atomKK->k_x,buf,first); + atomKK->sync(Device,datamask_comm); + struct AtomVecHybridKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } } @@ -463,26 +473,27 @@ struct AtomVecHybridKokkos_PackCommVel { double _pbc[6]; double _h_rate[6]; const int _deform_vremap; + unsigned int _datamask; AtomVecHybridKokkos_PackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_int_1d &mask, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, const double * const h_rate, - const int &deform_vremap): - _x(x.view()), - _mask(mask.view()), - _v(v.view()), + const int &deform_vremap, + const unsigned int &datamask): + _x(atomKK->k_x.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap) + _deform_vremap(deform_vremap), + _datamask(datamask) { - const size_t elements = 6; + const size_t elements = atomKK->avecKK->size_forward; //////////?? const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -541,126 +552,126 @@ int AtomVecHybridKokkos::pack_comm_vel_kokkos( const int* const pbc) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); + atomKK->sync(HostKK,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { - atomKK->sync(Device,X_MASK|V_MASK); + atomKK->sync(Device,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } - return n*6; + return n*6; /////////////??? } /* ---------------------------------------------------------------------- */ @@ -674,29 +685,30 @@ struct AtomVecHybridKokkos_UnpackCommVel { typename AT::t_kkfloat_1d_3 _v; typename AT::t_double_2d_lr_const _buf; int _first; + unsigned int _datamask; AtomVecHybridKokkos_UnpackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _v(v.view()), - _first(first) + const int &first, const int &datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _first(first),_datamask(datamask) { - const size_t elements = 6; + const size_t elements = 6; //////////?? const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _v(i+_first,0) = _buf(i,3); - _v(i+_first,1) = _buf(i,4); - _v(i+_first,2) = _buf(i,5); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); } }; @@ -705,15 +717,15 @@ struct AtomVecHybridKokkos_UnpackCommVel { void AtomVecHybridKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); - struct AtomVecHybridKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + atomKK->sync(HostKK,datamask_comm_vel); + struct AtomVecHybridKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK|V_MASK); + atomKK->modified(HostKK,datamask_comm_vel); } else { - atomKK->sync(Device,X_MASK|V_MASK); - struct AtomVecHybridKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + atomKK->sync(Device,datamask_comm_vel); + struct AtomVecHybridKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK|V_MASK); + atomKK->modified(Device,datamask_comm_vel); } } @@ -727,22 +739,24 @@ struct AtomVecHybridKokkos_PackReverse { typename AT::t_kkacc_1d_3_randomread _f; typename AT::t_double_2d_lr _buf; int _first; + unsigned int _datamask; AtomVecHybridKokkos_PackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int& first):_f(f.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const int &first, const unsigned int &datamask):_f(atomKK->k_f.view()), + _first(first),_datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _buf(i,0) = _f(i+_first,0); - _buf(i,1) = _f(i+_first,1); - _buf(i,2) = _f(i+_first,2); + int m = 0; + _buf(i,m++) = _f(i+_first,0); + _buf(i,m++) = _f(i+_first,1); + _buf(i,m++) = _f(i+_first,2); } }; @@ -751,12 +765,12 @@ struct AtomVecHybridKokkos_PackReverse { int AtomVecHybridKokkos::pack_reverse_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecHybridKokkos_PackReverse f(atomKK->k_f,buf,first); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecHybridKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); Kokkos::parallel_for(n,f); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecHybridKokkos_PackReverse f(atomKK->k_f,buf,first); + atomKK->sync(Device,datamask_reverse); + struct AtomVecHybridKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); Kokkos::parallel_for(n,f); } @@ -772,14 +786,18 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { typename AT::t_kkacc_1d_3_randomread _f; typename AT::t_kkacc_1d_3 _fw; - int _nfirst; typename AT::t_int_1d_const _list; + int _nfirst; + unsigned int _datamask; AtomVecHybridKokkos_UnPackReverseSelf( - const typename DAT::ttransform_kkacc_1d_3 &f, + const AtomKokkos* atomKK, const int &nfirst, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_fw(f.view()),_nfirst(nfirst),_list(list.view()) { + const typename DAT::tdual_int_1d &list, + const unsigned int &datamask): + _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), + _nfirst(nfirst),_list(list.view()), + _datamask(datamask) { }; KOKKOS_INLINE_FUNCTION @@ -796,18 +814,18 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { int AtomVecHybridKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); + atomKK->modified(HostKK,datamask_reverse); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + atomKK->sync(Device,datamask_reverse); + struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); + atomKK->modified(Device,datamask_reverse); } - return n*3; + return n*size_reverse; } /* ---------------------------------------------------------------------- */ @@ -820,14 +838,17 @@ struct AtomVecHybridKokkos_UnPackReverse { typename AT::t_kkacc_1d_3 _f; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; + unsigned int _datamask; AtomVecHybridKokkos_UnPackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_list(list.view()) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const typename DAT::tdual_int_1d &list, + const unsigned int datamask): + _f(atomKK->k_f.view()),_list(list.view()), + _datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; @@ -850,15 +871,15 @@ void AtomVecHybridKokkos::unpack_reverse_kokkos(const int &n, // Choose correct reverse UnPackReverse kernel if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecHybridKokkos_UnPackReverse f(atomKK->k_f,buf,list); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecHybridKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); + atomKK->modified(HostKK,datamask_reverse); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecHybridKokkos_UnPackReverse f(atomKK->k_f,buf,list); + atomKK->sync(Device,datamask_reverse); + struct AtomVecHybridKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); + atomKK->modified(Device,datamask_reverse); } } @@ -878,43 +899,53 @@ struct AtomVecHybridKokkos_PackBorder { const typename AT::t_kkfloat_1d _q; const typename AT::t_tagint_1d _molecule; double _dx,_dy,_dz; + unsigned int _datamask; AtomVecHybridKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const unsigned int &datamask): + _buf(buf),_list(list), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + + if (_datamask & Q_MASK) + _buf(i,6) = _q(j); + + if (_datamask & MOLECULE_MASK) + _buf(i,7) = d_ubuf(_molecule(j)).d; + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + + if (_datamask & Q_MASK) + _buf(i,6) = _q(j); + + if (_datamask & MOLECULE_MASK) + _buf(i,7) = d_ubuf(_molecule(j)).d; + } } }; @@ -924,6 +955,8 @@ int AtomVecHybridKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, int pbc_flag, int *pbc, ExecutionSpace space) { + atomKK->sync(space,datamask_border); + double dx,dy,dz; if (pbc_flag != 0) { @@ -936,29 +969,29 @@ int AtomVecHybridKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, dy = pbc[1]; dz = pbc[2]; } - if (space==Host) { + if (space == Host) { AtomVecHybridKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); Kokkos::parallel_for(n,f); } else { AtomVecHybridKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); Kokkos::parallel_for(n,f); } } else { dx = dy = dz = 0; - if (space==Host) { + if (space == Host) { AtomVecHybridKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); Kokkos::parallel_for(n,f); } else { AtomVecHybridKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); Kokkos::parallel_for(n,f); } } @@ -980,30 +1013,35 @@ struct AtomVecHybridKokkos_UnpackBorder { typename AT::t_kkfloat_1d _q; typename AT::t_tagint_1d _molecule; int _first; - + unsigned int _datamask; AtomVecHybridKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _first(first) { + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, const unsigned int &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _first(first),_datamask(datamask) { }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + + if (_datamask & Q_MASK) _q(i+_first) = _buf(i,6); + + if (_datamask & MOLECULE_MASK) _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; } }; @@ -1013,21 +1051,21 @@ struct AtomVecHybridKokkos_UnpackBorder { void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf, ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->sync(space,datamask_border); while (first+n >= nmax) grow(0); if (space==Host) { struct AtomVecHybridKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); + f(atomKK,buf.view_host(),first,datamask_border); Kokkos::parallel_for(n,f); } else { struct AtomVecHybridKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); + f(atomKK,buf.view_device(),first,datamask_border); Kokkos::parallel_for(n,f); } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(space,datamask_border); } /* ---------------------------------------------------------------------- */ @@ -1093,74 +1131,74 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { unsigned int _datamask; AtomVecHybridKokkos_PackExchangeFunctor( - const AtomKokkos* atom, + const AtomKokkos* atomKK, const DAT::tdual_double_2d_lr buf, DAT::tdual_int_1d sendlist, DAT::tdual_int_1d copylist, const unsigned int datamask): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _xw(atomKK->k_x.view()), + _vw(atomKK->k_v.view()), + _tagw(atomKK->k_tag.view()), + _typew(atomKK->k_type.view()), + _maskw(atomKK->k_mask.view()), + _imagew(atomKK->k_image.view()), + _qw(atomKK->k_q.view()), + _moleculew(atomKK->k_molecule.view()), + _nspecialw(atomKK->k_nspecial.view()), + _specialw(atomKK->k_special.view()), + _num_bondw(atomKK->k_num_bond.view()), + _bond_typew(atomKK->k_bond_type.view()), + _bond_atomw(atomKK->k_bond_atom.view()), + _num_anglew(atomKK->k_num_angle.view()), + _angle_typew(atomKK->k_angle_type.view()), + _angle_atom1w(atomKK->k_angle_atom1.view()), + _angle_atom2w(atomKK->k_angle_atom2.view()), + _angle_atom3w(atomKK->k_angle_atom3.view()), + _num_dihedralw(atomKK->k_num_dihedral.view()), + _dihedral_typew(atomKK->k_dihedral_type.view()), + _dihedral_atom1w(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2w(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3w(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4w(atomKK->k_dihedral_atom4.view()), + _num_improperw(atomKK->k_num_improper.view()), + _improper_typew(atomKK->k_improper_type.view()), + _improper_atom1w(atomKK->k_improper_atom1.view()), + _improper_atom2w(atomKK->k_improper_atom2.view()), + _improper_atom3w(atomKK->k_improper_atom3.view()), + _improper_atom4w(atomKK->k_improper_atom4.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange), + _size_exchange(atomKK->avecKK->size_exchange), _datamask(datamask) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; @@ -1375,45 +1413,45 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { unsigned int _datamask; AtomVecHybridKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, + const AtomKokkos* atomKK, const DAT::tdual_double_2d_lr buf, DAT::tdual_int_1d nlocal, DAT::tdual_int_1d indices, int dim, double lo, double hi, unsigned int datamask): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), _nlocal(nlocal.template view()), _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atomKK->avecKK->size_exchange), _datamask(datamask) { const int maxsendlist = (buf.template view().extent(0)* buf.template view().extent(1))/_size_exchange; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 3322a0e80d0..369a5159d7a 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -151,6 +151,9 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { HAT::t_int_2d h_dihedral_type; HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, h_dihedral_atom3,h_dihedral_atom4; + + DAT::t_kkfloat_1d_4 d_mu; + HAT::t_kkfloat_1d_4 h_mu; }; } // namespace LAMMPS_NS From 69c1ee8f517378276ed377e407b84975421b04d7 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Sat, 25 Oct 2025 13:50:38 -0600 Subject: [PATCH 081/604] WIP --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 245 ++++++++++++++++++++------ 1 file changed, 188 insertions(+), 57 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index a8fc2c0dd08..7394f04fcfa 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -65,6 +65,7 @@ struct AtomVecHybridKokkos_PackComm { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -78,7 +79,9 @@ struct AtomVecHybridKokkos_PackComm { const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, const unsigned int &datamask): - _x(atomKK->k_x.view()),_list(list.view()), + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { const int size_forward = atomKK->avecKK->size_forward; @@ -97,15 +100,33 @@ struct AtomVecHybridKokkos_PackComm { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } } else { if (TRICLINIC == 0) { _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } } else { _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } } } } @@ -190,6 +211,8 @@ struct AtomVecHybridKokkos_PackCommSelf { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4 _muw; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -203,7 +226,9 @@ struct AtomVecHybridKokkos_PackCommSelf { const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, const unsigned int datamask): - _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()),_nfirst(nfirst),_list(list.view()), + _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), + _nfirst(nfirst),_list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -212,23 +237,40 @@ struct AtomVecHybridKokkos_PackCommSelf { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + const int j = _list(i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); } } - + } } }; @@ -307,6 +349,8 @@ struct AtomVecHybridKokkos_PackCommSelfFused { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4 _muw; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -328,6 +372,7 @@ struct AtomVecHybridKokkos_PackCommSelfFused { const double &xy, const double &xz, const double &yz, const unsigned int datamask): _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -357,15 +402,33 @@ struct AtomVecHybridKokkos_PackCommSelfFused { _xw(i+_nfirst,0) = _x(j,0); _xw(i+_nfirst,1) = _x(j,1); _xw(i+_nfirst,2) = _x(j,2); + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } else { if (TRICLINIC == 0) { _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } else { _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } } } @@ -417,15 +480,18 @@ struct AtomVecHybridKokkos_UnpackComm { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; typename AT::t_double_2d_lr_const _buf; int _first; unsigned int _datamask; AtomVecHybridKokkos_UnpackComm( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const int &first, const unsigned int &datamask):_x(atomKK->k_x.view()), - _first(first),_datamask(datamask) { + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const unsigned int &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _first(first),_datamask(datamask) { const int size_forward = atomKK->avecKK->size_forward; const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; const size_t elements = size_forward; @@ -434,9 +500,16 @@ struct AtomVecHybridKokkos_UnpackComm { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } } }; @@ -896,8 +969,9 @@ struct AtomVecHybridKokkos_PackBorder { const typename AT::t_tagint_1d _tag; const typename AT::t_int_1d _type; const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; const typename AT::t_tagint_1d _molecule; + const typename AT::t_kkfloat_1d _q; + const typename AT::t_kkfloat_1d_4 _mu; double _dx,_dy,_dz; unsigned int _datamask; @@ -912,39 +986,55 @@ struct AtomVecHybridKokkos_PackBorder { _tag(atomKK->k_tag.view()), _type(atomKK->k_type.view()), _mask(atomKK->k_mask.view()), - _q(atomKK->k_q.view()), _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); + int m = 0; if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; if (_datamask & Q_MASK) - _buf(i,6) = _q(j); + _buf(i,m++) = _q(j); - if (_datamask & MOLECULE_MASK) - _buf(i,7) = d_ubuf(_molecule(j)).d; + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; if (_datamask & Q_MASK) - _buf(i,6) = _q(j); + _buf(i,m++) = _q(j); + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } - if (_datamask & MOLECULE_MASK) - _buf(i,7) = d_ubuf(_molecule(j)).d; } } }; @@ -1010,8 +1100,9 @@ struct AtomVecHybridKokkos_UnpackBorder { typename AT::t_tagint_1d _tag; typename AT::t_int_1d _type; typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; typename AT::t_tagint_1d _molecule; + typename AT::t_kkfloat_1d _q; + typename AT::t_kkfloat_1d_4 _mu; int _first; unsigned int _datamask; @@ -1024,38 +1115,47 @@ struct AtomVecHybridKokkos_UnpackBorder { _tag(atomKK->k_tag.view()), _type(atomKK->k_type.view()), _mask(atomKK->k_mask.view()), - _q(atomKK->k_q.view()), _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), _first(first),_datamask(datamask) { }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,6); + _q(i+_first) = _buf(i,m++); - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } } }; /* ---------------------------------------------------------------------- */ void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space) { atomKK->sync(space,datamask_border); while (first+n >= nmax) grow(0); - if (space==Host) { + if (space == Host) { struct AtomVecHybridKokkos_UnpackBorder f(atomKK,buf.view_host(),first,datamask_border); Kokkos::parallel_for(n,f); @@ -1099,6 +1199,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_int_2d_randomread _improper_type; typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_3 _vw; typename AT::t_tagint_1d _tagw; @@ -1123,6 +1225,7 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_int_2d _improper_typew; typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, _improper_atom3w,_improper_atom4w; + typename AT::t_kkfloat_1d_4 _muw; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _sendlist; @@ -1166,6 +1269,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom2(atomKK->k_improper_atom2.view()), _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _xw(atomKK->k_x.view()), _vw(atomKK->k_v.view()), _tagw(atomKK->k_tag.view()), @@ -1196,6 +1301,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom2w(atomKK->k_improper_atom2.view()), _improper_atom3w(atomKK->k_improper_atom3.view()), _improper_atom4w(atomKK->k_improper_atom4.view()), + _muw(atomKK->k_mu.view()), + _sendlist(sendlist.template view()), _copylist(copylist.template view()), _size_exchange(atomKK->avecKK->size_exchange), @@ -1276,6 +1383,13 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _buf(mysend,m++) = d_ubuf(_special(i,k)).d; } + if (_datamask & MU_MASK) { + _buf(mysend,m++) = _mu(i,0); + _buf(mysend,m++) = _mu(i,1); + _buf(mysend,m++) = _mu(i,2); + _buf(mysend,m++) = _mu(i,3); + } + const int j = _copylist(mysend); if (j > -1) { @@ -1343,6 +1457,13 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { for (int k = 0; k < _nspecial(j,2); k++) _specialw(i,k) = _special(j,k); } + + if (_datamask & MU_MASK) { + _muw(i,0) = _mu(j,0); + _muw(i,1) = _mu(j,1); + _muw(i,2) = _mu(j,2); + _muw(i,3) = _mu(j,3); + } } } }; @@ -1403,6 +1524,7 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { typename AT::t_int_2d _improper_type; typename AT::t_tagint_2d _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d _nlocal; @@ -1449,6 +1571,8 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _improper_atom2(atomKK->k_improper_atom2.view()), _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _nlocal(nlocal.template view()), _indices(indices.template view()), _dim(dim),_lo(lo),_hi(hi),_size_exchange(atomKK->avecKK->size_exchange), @@ -1529,6 +1653,13 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { for (int k = 0; k < _nspecial(i,2); k++) _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; } + + if (_datamask & MU_MASK) { + _mu(i,0) = _buf(myrecv,m++); + _mu(i,1) = _buf(myrecv,m++); + _mu(i,2) = _buf(myrecv,m++); + _mu(i,3) = _buf(myrecv,m++); + } } if (OUTPUT_INDICES) From 691000a5aecd63a67fa90814ebc4a2e4e1ea3a26 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 26 Oct 2025 14:31:11 -0400 Subject: [PATCH 082/604] json key updates --- src/REAXFF/fix_reaxff_species.cpp | 3 ++- src/output.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index ae0b79aa0cd..a0351b2fabc 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -245,10 +245,11 @@ FixReaxFFSpecies::FixReaxFFSpecies(LAMMPS *lmp, int narg, char **arg) : // header for 'delete' keyword JSON output fprintf(fdel, "{\n"); fprintf(fdel, " \"application\": \"LAMMPS\",\n"); + fprintf(fdel, " \"units\": \"%s\",\n", update->unit_style); fprintf(fdel, " \"format\": \"dump\",\n"); fprintf(fdel, " \"style\": \"molecules\",\n"); - fprintf(fdel, " \"title\": \"fix reaxff/species: delete keyword\",\n"); fprintf(fdel, " \"revision\": 1,\n"); + fprintf(fdel, " \"title\": \"fix reaxff/species: delete keyword\",\n"); fprintf(fdel, " \"timesteps\": [\n"); fflush(fdel); } diff --git a/src/output.cpp b/src/output.cpp index f32852d98be..756682e9c7f 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -709,7 +709,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i fprintf(fp, "%s\"types\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); if (printflag == 1 && json_init == 1) - fprintf(fp, "%s\"format\": [\"atom-tag\", \"type\"],\n", indent.c_str()); + fprintf(fp, "%s\"format\": [\"atom-id\", \"type\"],\n", indent.c_str()); fprintf(fp, "%s\"data\": [\n", indent.c_str()); indent.resize(++json_level*tab, ' '); auto it = atoms_root.begin(); @@ -730,7 +730,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i fprintf(fp, "%s\"coords\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); if (printflag == 1 && json_init == 1) - fprintf(fp, "%s\"format\": [\"atom-tag\", \"x\", \"y\", \"z\"],\n", indent.c_str()); + fprintf(fp, "%s\"format\": [\"atom-id\", \"x\", \"y\", \"z\"],\n", indent.c_str()); if (json_init == 1) json_init++; fprintf(fp, "%s\"data\": [\n", indent.c_str()); indent.resize(++json_level*tab, ' '); From e656c95c40ffb7c310beb44ffb0f0e2f2839266a Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 26 Oct 2025 14:48:58 -0400 Subject: [PATCH 083/604] docs+tools update --- doc/src/fix_reaxff_species.rst | 36 ++++---- tools/json/README.md | 5 +- tools/json/dump-molecules-schema.json | 121 ++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 19 deletions(-) create mode 100644 tools/json/dump-molecules-schema.json diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index 02848c83380..90996b7106a 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -180,16 +180,18 @@ deleted on the first timestep: { "application": "LAMMPS", - "format": "output", - "subformat": "fix reaxff/species: delete keyword", + "units": "real", + "format": "dump", + "style": "molecules", "revision": 1, - "run_output": [ + "title": "fix reaxff/species: delete keyword", + "timesteps": [ { "timestep": 1, "molecules": [ { "types": { - "format": ["atom-tag", "type"], + "format": ["atom-id", "type"], "data": [ [1368, "H"], [1366, "O"], @@ -197,7 +199,7 @@ deleted on the first timestep: ] }, "coords": { - "format": ["atom-tag", "x", "y", "z"], + "format": ["atom-id", "x", "y", "z"], "data": [ [1368, 26.787767440427466, 29.785528640296768, 25.85197353660144], [1366, 26.641801222582824, 29.868106247702887, 24.91285138212243], @@ -210,18 +212,18 @@ deleted on the first timestep: ] } -The first-level keys of the JSON format output are "application", "format", -"subformat", "revision", and "run_output". The value of the "run_output" -key is an array of objects that contain data for each timestep on which a -molecule was deleted, and the other first-level keys identify this JSON -schema. The "run_output" objects contain two keys, "timestep" and -"molecules". The "molecules" key is an array of :doc:`LAMMPS -molecule JSON ` objects, one for each deleted molecule. The -"format" keys within molecule JSON objects are only listed once per output -file, for brevity. The "atom-tag" values are atom IDs from the simulation, -and the "type" values are atom types. In the above example, the types were -reported as strings corresponding to elements using :doc:`type labels -`. +The required first-level keys of the JSON format output are "application", +"format", "style", "revision", and "timesteps", and optional keys are +"units" and "title". The value of the "timesteps" key is an array of +objects that contain data for each timestep on which a molecule was +deleted, and the other first-level keys identify this JSON schema. The +objects in "timesteps" contains two keys, "timestep" and "molecules". The +"molecules" key is an array of :doc:`LAMMPS molecule JSON ` +objects, one for each deleted molecule. The "format" keys within molecule +JSON objects are only listed once per output file, for brevity. The +"atom-id" values are atom IDs from the simulation, and the "type" values +are atom types. In the above example, the types were reported as strings +corresponding to elements using :doc:`type labels `. ---------- diff --git a/tools/json/README.md b/tools/json/README.md index a4b2a11f1ff..f7f4b10e27d 100644 --- a/tools/json/README.md +++ b/tools/json/README.md @@ -1,6 +1,6 @@ This folder contains files and tools for creating, modifying, and validating files in JSON format. This is work in progress while we are adding JSON -support +support. # JSON file format validation. @@ -10,7 +10,8 @@ We provide schema files for the file formats that LAMMPS supports following the specifications available on [JSON-Schema](https://json-schema.org) webpage. The following files are currently available. -- `molecule-schema.json` Schema file for the JSON format molecule files. +- `molecule-schema.json` Schema file for JSON-format molecule files. +- `dump-molecules-schema.json` Schema file for the 'dump' format in the 'molecules' style. These files provide a concise description of the hierarchy and supported fields in JSON file formats. Thus they provide a detailed documentation and can also diff --git a/tools/json/dump-molecules-schema.json b/tools/json/dump-molecules-schema.json new file mode 100644 index 00000000000..5fc24895032 --- /dev/null +++ b/tools/json/dump-molecules-schema.json @@ -0,0 +1,121 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://download.lammps.org/json/dump-molecules-schema.json", + "title": "JSON schema for LAMMPS 'dump' format in the 'molecules' style", + "description": "Version 0.1; last updated 2025-10-26", + "type": "object", + "required": ["application", "format", "style", "revision", "timesteps"], + "properties": { + "application": { + "type": "string", + "const": "LAMMPS" + }, + "units": { + "enum": ["lj", "real", "metal", "si", "cgs", "electron", "micro", "nano" ] + }, + "format": { + "type": "string", + "const": "dump" + }, + "style": { + "type": "string", + "const": "molecules" + }, + "revision": { + "type": "integer", + "minimum": 1, + "maximum": 1 + }, + "title": {"type": "string"}, + "schema": {"type": "string"}, + "timesteps": { + "type": "array", + "items": { + "type": "object", + "required": ["timestep", "molecules"], + "properties": { + "timestep": { + "type": "integer", + "minimum": 0 + }, + "molecules": { + "type": "array", + "items": { + "type": "object", + "required": ["types", "coords"], + "properties": { + "types": { + "type": "object", + "required": ["data"], + "properties": { + "format": { + "type": "array", + "const": ["atom-id", "type"] + }, + "data": { + "type": "array", + "items": { + "type": "array", + "prefixItems": [ + {"type": "number"}, + {"type": ["number", "string"]} + ], + "items": false + } + } + } + }, + "coords": { + "type": "object", + "required": ["data"], + "properties": { + "format": { + "type": "array", + "const": ["atom-id", "x", "y", "z"] + }, + "data": { + "type": "array", + "items": { + "type": "array", + "prefixItems": [ + {"type": "integer"}, + {"type": "number"}, + {"type": "number"}, + {"type": "number"} + ], + "items": false + } + } + } + + }, + "bonds": { + "type": "object", + "required": ["data"], + "properties": { + "format": { + "type": "array", + "const": ["bond-type", "atom1", "atom2"] + }, + "data": { + "type": "array", + "items": { + "type": "array", + "prefixItems": [ + {"type": ["integer", "string"]}, + {"type": "integer"}, + {"type": "integer"} + ], + "items": false + } + } + } + } + } + } + } + } + } + } + } +} From 15a009b6949d82e39c60a5cecec871d2d42549d2 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 27 Oct 2025 10:43:56 -0600 Subject: [PATCH 084/604] Removing extra volume factor --- src/RHEO/pair_rheo.cpp | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/RHEO/pair_rheo.cpp b/src/RHEO/pair_rheo.cpp index 0135c83882e..eee171dac35 100644 --- a/src/RHEO/pair_rheo.cpp +++ b/src/RHEO/pair_rheo.cpp @@ -272,21 +272,21 @@ void PairRHEO::compute(int eflag, int vflag) alpha_ave = 0.5 * (alphai + alphaj); } dT_prefactor = - 2.0 * alpha_ave * (Ti - Tj) * rinv * rinv * voli * volj; + 2.0 * alpha_ave * (Ti - Tj) * rinv * rinv; dT = dot3(dx, dWij); - heatflow[i] += dT * dT_prefactor; + heatflow[i] += dT * dT_prefactor * volj; if (newton_pair || j < nlocal) { dT = dot3(dx, dWji); - heatflow[j] += dT * dT_prefactor; + heatflow[j] += dT * dT_prefactor * voli; } } if (pair_force_flag) { //Hydrostatic pressure forces - fp_prefactor = voli * volj * (Pj + Pi); + fp_prefactor = Pj + Pi; sub3(vi, vj, dv); if (harmonic_means_flag) { @@ -307,17 +307,17 @@ void PairRHEO::compute(int eflag, int vflag) mu /= (rsq * cutkinv3 * cutkinv3 + EPSILON); mu = MIN(0.0, mu); q = av * (-2.0 * cs_ave * mu + mu * mu); - fp_prefactor += voli * volj * q * (rhoj + rhoi); + fp_prefactor += q * (rhoj + rhoi); } // -Grad[P + Q] - scale3(-fp_prefactor, dWij, dfp); + scale3(-fp_prefactor * volj, dWij, dfp); // Now compute viscous eta*Lap[v] terms for (a = 0; a < dim; a++) { fv[a] = 0.0; for (b = 0; b < dim; b++) fv[a] += dv[a] * dx[b] * dWij[b]; - fv[a] *= 2.0 * eta_ave * voli * volj * rinv * rinv; + fv[a] *= 2.0 * eta_ave * volj * rinv * rinv; } add3(fv, dfp, ft); @@ -326,6 +326,11 @@ void PairRHEO::compute(int eflag, int vflag) f[i][0] += ft[0]; f[i][1] += ft[1]; f[i][2] += ft[2]; + if (compute_interface) { + fp_store[i][0] += dfp[0]; + fp_store[i][1] += dfp[1]; + fp_store[i][2] += dfp[2]; + } // Note the virial's definition is hazy, e.g. viscous contributions will depend on rotation if (evflag) @@ -336,33 +341,26 @@ void PairRHEO::compute(int eflag, int vflag) for (a = 0; a < dim; a++) { fv[a] = 0.0; for (b = 0; b < dim; b++) fv[a] += (vi[a] - vj[a]) * dx[b] * dWji[b]; - fv[a] *= -2.0 * eta_ave * voli * volj * rinv * rinv; + fv[a] *= -2.0 * eta_ave * voli * rinv * rinv; // flip sign here b/c -= at accummulator } - scale3(fp_prefactor, dWji, dfp); + scale3(fp_prefactor * voli, dWji, dfp); add3(fv, dfp, ft); add3(fsolid, ft, ft); f[j][0] -= ft[0]; f[j][1] -= ft[1]; f[j][2] -= ft[2]; - - if (evflag) - ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, ft[0], ft[1], ft[2], -dx[0], -dx[1], - -dx[2]); - } - - if (compute_interface) { - fp_store[i][0] += dfp[0]; - fp_store[i][1] += dfp[1]; - fp_store[i][2] += dfp[2]; - - if (newton_pair || j < nlocal) { + if (compute_interface) { fp_store[j][0] -= dfp[0]; fp_store[j][1] -= dfp[1]; fp_store[j][2] -= dfp[2]; } + + if (evflag) + ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, ft[0], ft[1], ft[2], -dx[0], -dx[1], + -dx[2]); } } From 87b978f77b95bb6ea9aed192bf54ad5686b92a59 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 27 Oct 2025 11:12:54 -0600 Subject: [PATCH 085/604] WIP --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 604 ++++++++++++++++++++------ src/KOKKOS/atom_vec_hybrid_kokkos.h | 15 + 2 files changed, 488 insertions(+), 131 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 7394f04fcfa..0d3e551a5c5 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -27,8 +27,6 @@ using namespace LAMMPS_NS; AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecHybrid(lmp) { - no_comm_vel_flag = 1; - no_border_vel_flag = 1; } /* ---------------------------------------------------------------------- */ @@ -100,35 +98,23 @@ struct AtomVecHybridKokkos_PackComm { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } } else { if (TRICLINIC == 0) { _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } } else { _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } } } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } } }; @@ -210,9 +196,10 @@ struct AtomVecHybridKokkos_PackCommSelf { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d _radiusw,_rmassw; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -242,35 +229,23 @@ struct AtomVecHybridKokkos_PackCommSelf { _xw(i+_nfirst,0) = _x(j,0); _xw(i+_nfirst,1) = _x(j,1); _xw(i+_nfirst,2) = _x(j,2); - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } else { if (TRICLINIC == 0) { _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } else { _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } } + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } }; @@ -339,7 +314,6 @@ int AtomVecHybridKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &l return n*size_forward; } - /* ---------------------------------------------------------------------- */ template @@ -348,9 +322,10 @@ struct AtomVecHybridKokkos_PackCommSelfFused { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d _radiusw,_rmassw; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -402,35 +377,23 @@ struct AtomVecHybridKokkos_PackCommSelfFused { _xw(i+_nfirst,0) = _x(j,0); _xw(i+_nfirst,1) = _x(j,1); _xw(i+_nfirst,2) = _x(j,2); - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } else { if (TRICLINIC == 0) { _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } else { _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } } } + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } } }; @@ -540,6 +503,8 @@ struct AtomVecHybridKokkos_PackCommVel { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_int_1d _mask; typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_3 _omega; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -560,13 +525,14 @@ struct AtomVecHybridKokkos_PackCommVel { _x(atomKK->k_x.view()), _mask(atomKK->k_mask.view()), _v(atomKK->k_v.view()), + _omega(atomKK->k_omega.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz), _deform_vremap(deform_vremap), _datamask(datamask) { - const size_t elements = atomKK->avecKK->size_forward; //////////?? + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -577,41 +543,54 @@ struct AtomVecHybridKokkos_PackCommVel { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } if (DEFORM_VREMAP == 0) { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (_mask(i) & _deform_vremap) { - _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + _buf(i,m++) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,m++) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,m++) = _v(j,2) + _pbc[2]*_h_rate[2]; } else { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } } } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } } }; @@ -744,7 +723,7 @@ int AtomVecHybridKokkos::pack_comm_vel_kokkos( } } - return n*6; /////////////??? + return n*(size_forward + size_velocity); } /* ---------------------------------------------------------------------- */ @@ -756,6 +735,8 @@ struct AtomVecHybridKokkos_UnpackCommVel { typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_3 _omega; typename AT::t_double_2d_lr_const _buf; int _first; unsigned int _datamask; @@ -766,9 +747,11 @@ struct AtomVecHybridKokkos_UnpackCommVel { const int &first, const int &datamask): _x(atomKK->k_x.view()), _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _omega(atomKK->k_omega.view()), _first(first),_datamask(datamask) { - const size_t elements = 6; //////////?? + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; @@ -782,6 +765,18 @@ struct AtomVecHybridKokkos_UnpackCommVel { _v(i+_first,0) = _buf(i,m++); _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } } }; @@ -810,15 +805,18 @@ struct AtomVecHybridKokkos_PackReverse { typedef ArrayTypes AT; typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkfloat_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; unsigned int _datamask; AtomVecHybridKokkos_PackReverse( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const int &first, const unsigned int &datamask):_f(atomKK->k_f.view()), - _first(first),_datamask(datamask) { + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const unsigned int &datamask): + _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), + _first(first),_datamask(datamask) { const size_t elements = atomKK->avecKK->size_reverse; const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); @@ -830,6 +828,12 @@ struct AtomVecHybridKokkos_PackReverse { _buf(i,m++) = _f(i+_first,0); _buf(i,m++) = _f(i+_first,1); _buf(i,m++) = _f(i+_first,2); + + if (_datamask & TORQUE_MASK) { + _buf(i,m++) = _torque(i+_first,0); + _buf(i,m++) = _torque(i+_first,1); + _buf(i,m++) = _torque(i+_first,2); + } } }; @@ -858,20 +862,22 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { typedef ArrayTypes AT; typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkfloat_1d_3_randomread _torque; typename AT::t_kkacc_1d_3 _fw; + typename AT::t_kkfloat_1d_3 _torquew; typename AT::t_int_1d_const _list; int _nfirst; unsigned int _datamask; AtomVecHybridKokkos_UnPackReverseSelf( - const AtomKokkos* atomKK, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const unsigned int &datamask): + const AtomKokkos* atomKK, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const unsigned int &datamask): _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()),_torquew(atomKK->k_torque.view()), _nfirst(nfirst),_list(list.view()), - _datamask(datamask) { - }; + _datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { @@ -879,6 +885,12 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { _fw(j,0) += _f(i+_nfirst,0); _fw(j,1) += _f(i+_nfirst,1); _fw(j,2) += _f(i+_nfirst,2); + + if (_datamask & TORQUE_MASK) { + _torquew(j,0) += _torque(i+_nfirst,0); + _torquew(j,1) += _torque(i+_nfirst,1); + _torquew(j,2) += _torque(i+_nfirst,2); + } } }; @@ -909,28 +921,38 @@ struct AtomVecHybridKokkos_UnPackReverse { typedef ArrayTypes AT; typename AT::t_kkacc_1d_3 _f; + typename AT::t_kkfloat_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; unsigned int _datamask; AtomVecHybridKokkos_UnPackReverse( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const unsigned int datamask): - _f(atomKK->k_f.view()),_list(list.view()), + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const unsigned int datamask): + _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), + _list(list.view()), _datamask(datamask) { const size_t elements = atomKK->avecKK->size_reverse; const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); - }; + }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); - _f(j,0) += _buf(i,0); - _f(j,1) += _buf(i,1); - _f(j,2) += _buf(i,2); + _f(j,0) += _buf(i,m++); + _f(j,1) += _buf(i,m++); + _f(j,2) += _buf(i,m++); + + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _buf(i,m++); + _torque(j,1) += _buf(i,m++); + _torque(j,2) += _buf(i,m++); + } } }; @@ -972,6 +994,7 @@ struct AtomVecHybridKokkos_PackBorder { const typename AT::t_tagint_1d _molecule; const typename AT::t_kkfloat_1d _q; const typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; double _dx,_dy,_dz; unsigned int _datamask; @@ -989,6 +1012,8 @@ struct AtomVecHybridKokkos_PackBorder { _molecule(atomKK->k_molecule.view()), _q(atomKK->k_q.view()), _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} KOKKOS_INLINE_FUNCTION @@ -999,43 +1024,34 @@ struct AtomVecHybridKokkos_PackBorder { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); - _buf(i,m++) = d_ubuf(_tag(j)).d; - _buf(i,m++) = d_ubuf(_type(j)).d; - _buf(i,m++) = d_ubuf(_mask(j)).d; - - if (_datamask & MOLECULE_MASK) - _buf(i,m++) = d_ubuf(_molecule(j)).d; - - if (_datamask & Q_MASK) - _buf(i,m++) = _q(j); - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } } else { _buf(i,m++) = _x(j,0) + _dx; _buf(i,m++) = _x(j,1) + _dy; _buf(i,m++) = _x(j,2) + _dz; - _buf(i,m++) = d_ubuf(_tag(j)).d; - _buf(i,m++) = d_ubuf(_type(j)).d; - _buf(i,m++) = d_ubuf(_mask(j)).d; + } - if (_datamask & MOLECULE_MASK) - _buf(i,m++) = d_ubuf(_molecule(j)).d; + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; - if (_datamask & Q_MASK) - _buf(i,m++) = _q(j); + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - _buf(i,m++) = _mu(j,3); - } + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); } }; @@ -1103,6 +1119,7 @@ struct AtomVecHybridKokkos_UnpackBorder { typename AT::t_tagint_1d _molecule; typename AT::t_kkfloat_1d _q; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; int _first; unsigned int _datamask; @@ -1118,6 +1135,8 @@ struct AtomVecHybridKokkos_UnpackBorder { _molecule(atomKK->k_molecule.view()), _q(atomKK->k_q.view()), _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), _first(first),_datamask(datamask) { }; @@ -1143,6 +1162,12 @@ struct AtomVecHybridKokkos_UnpackBorder { _mu(i+_first,2) = _buf(i,m++); _mu(i+_first,3) = _buf(i,m++); } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); } }; @@ -1170,6 +1195,284 @@ void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &first, /* ---------------------------------------------------------------------- */ +template +struct AtomVecHybridKokkos_PackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_um _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3 _v; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + double _dx,_dy,_dz, _dvx, _dvy, _dvz; + const int _deform_groupbit; + const unsigned int _datamask; + + AtomVecHybridKokkos_PackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const double &dvx, const double &dvy, const double &dvz, + const int &deform_groupbit, + const unsigned int &datamask): + _buf(buf),_list(list),_datamask(datamask), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dx(dx),_dy(dy),_dz(dz), + _dvx(dvx),_dvy(dvy),_dvz(dvz), + _deform_groupbit(deform_groupbit) { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + } + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if (DEFORM_VREMAP) { + if (_mask(i) & _deform_groupbit) { + _buf(i,m++) = _v(j,0) + _dvx; + _buf(i,m++) = _v(j,1) + _dvy; + _buf(i,m++) = _v(j,2) + _dvz; + } + } else { + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); + } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_border_vel_kokkos( + int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + double dx = 0, dy = 0, dz = 0; + double dvx = 0, dvy = 0, dvz = 0; + + atomKK->sync(space,datamask_border_vel); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + if (space==Host) { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + if (space == Host) { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + } else { + if (space == Host) { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecHybridKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + + atomKK->modified(space,datamask_border_vel); + + return n*(size_border + size_velocity); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecHybridKokkos_UnpackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_const_um _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + int _first; + unsigned int _datamask; + + AtomVecHybridKokkos_UnpackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, + const unsigned int &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _first(first),_datamask(datamask) + { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _type(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _mask(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _radius(i+_first) = _buf(i,m++); + _rmass(i+_first) = _buf(i,m++); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_border_vel_kokkos( + const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { + while (first+n >= nmax) grow(0); + if (space == Host) { + struct AtomVecHybridKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecHybridKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + + atomKK->modified(space,datamask_border_vel); +} + +/* ---------------------------------------------------------------------- */ + template struct AtomVecHybridKokkos_PackExchangeFunctor { typedef DeviceType device_type; @@ -1200,6 +1503,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_3 _vw; @@ -1226,6 +1531,8 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, _improper_atom3w,_improper_atom4w; typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d _radiusw,_rmassw; + typename AT::t_kkfloat_1d_3 _omegaw; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _sendlist; @@ -1270,6 +1577,9 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), _xw(atomKK->k_x.view()), _vw(atomKK->k_v.view()), @@ -1302,6 +1612,9 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom3w(atomKK->k_improper_atom3.view()), _improper_atom4w(atomKK->k_improper_atom4.view()), _muw(atomKK->k_mu.view()), + _radiusw(atomKK->k_radius.view()), + _rmassw(atomKK->k_rmass.view()), + _omegaw(atomKK->k_omega.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()), @@ -1464,6 +1777,18 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _muw(i,2) = _mu(j,2); _muw(i,3) = _mu(j,3); } + + if (_datamask & RADIUS_MASK) + _radiusw(i) = _radius(j); + + if (_datamask & RMASS_MASK) + _rmassw(i) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _omegaw(i,0) = _omega(j,0); + _omegaw(i,1) = _omega(j,1); + _omegaw(i,2) = _omega(j,2); + } } } }; @@ -1525,6 +1850,8 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { typename AT::t_tagint_2d _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d _nlocal; @@ -1572,6 +1899,9 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), _mu(atomKK->k_mu.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), _nlocal(nlocal.template view()), _indices(indices.template view()), @@ -1660,6 +1990,18 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _mu(i,2) = _buf(myrecv,m++); _mu(i,3) = _buf(myrecv,m++); } + + if (_datamask & RADIUS_MASK) + _radius(i) = _buf(myrecv,m++); + + if (_datamask & RMASS_MASK) + _rmass(i) = _buf(myrecv,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _buf(myrecv,m++); + _omega(i,1) = _buf(myrecv,m++); + _omega(i,2) = _buf(myrecv,m++); + } } if (OUTPUT_INDICES) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 369a5159d7a..5ed86c5525f 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -78,6 +78,14 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { const DAT::tdual_double_2d_lr &buf, ExecutionSpace space) override; + int pack_border_vel_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) override; + + void unpack_border_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space) override; + int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, DAT::tdual_int_1d k_sendlist, DAT::tdual_int_1d k_copylist, @@ -154,6 +162,13 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { DAT::t_kkfloat_1d_4 d_mu; HAT::t_kkfloat_1d_4 h_mu; + + DAT::t_kkfloat_1d d_radius; + HAT::t_kkfloat_1d h_radius; + DAT::t_kkfloat_1d d_rmass; + HAT::t_kkfloat_1d h_rmass; + DAT::t_kkfloat_1d_3 d_torque; + HAT::t_kkfloat_1d_3 h_torque; }; } // namespace LAMMPS_NS From 13f45b5679439c3ce5952c19922d7d80d9c88fb9 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 11:56:39 -0700 Subject: [PATCH 086/604] Updates to LAMMPS/Kokkos utility routines to support mixed precision casting work --- src/KOKKOS/kokkos_type.h | 21 +++++++++-- src/KOKKOS/math_special_kokkos.h | 60 +++++++++++++------------------- src/KOKKOS/memory_kokkos.h | 2 +- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index f43bd2b2458..8696910669a 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -478,6 +478,22 @@ struct alignas(2*sizeof(double)) s_KK_double2 { }; typedef struct s_KK_double2 KK_double2; +struct alignas(2*sizeof(KK_FLOAT)) s_KK_FLOAT2 { + KK_FLOAT v[2]; + + KOKKOS_INLINE_FUNCTION + s_KK_FLOAT2() { + v[0] = v[1] = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const s_KK_FLOAT2 &rhs) { + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + } +}; +typedef struct s_KK_FLOAT2 KK_FLOAT2; + template struct BinOp3DLAMMPS { int max_bins_[3] = {}; @@ -1324,7 +1340,7 @@ struct params_lj_coul { params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; KOKKOS_INLINE_FUNCTION params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; - double cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; + KK_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset; }; // ReaxFF @@ -1335,7 +1351,8 @@ struct alignas(4 * sizeof(int)) reax_int4 { // Pair SNAP -#define SNAP_KOKKOS_REAL double +#define SNAP_KOKKOS_REAL KK_FLOAT +#define SNAP_KOKKOS_ACCUM KK_ACC_FLOAT #define SNAP_KOKKOS_HOST_VECLEN 1 #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/math_special_kokkos.h b/src/KOKKOS/math_special_kokkos.h index a14244d05c0..886eed75e67 100644 --- a/src/KOKKOS/math_special_kokkos.h +++ b/src/KOKKOS/math_special_kokkos.h @@ -72,18 +72,18 @@ namespace MathSpecialKokkos { * \return value of 2^x as double precision number */ KOKKOS_INLINE_FUNCTION - static KK_FLOAT exp2_x86(KK_FLOAT x) + static double exp2_x86(double x) { #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - KK_FLOAT ipart, fpart, px, qx; + double ipart, fpart, px, qx; udi_t epart; - const KK_FLOAT fm_exp2_q[2] = { + const double fm_exp2_q[2] = { /* 1.00000000000000000000e0, */ 2.33184211722314911771e2, 4.36821166879210612817e3 }; - const KK_FLOAT fm_exp2_p[3] = { + const double fm_exp2_p[3] = { 2.30933477057345225087e-2, 2.02020656693165307700e1, 1.51390680115615096133e3 @@ -124,7 +124,7 @@ namespace MathSpecialKokkos { * \return value of e^x as double precision number */ KOKKOS_INLINE_FUNCTION - static KK_FLOAT fm_exp(KK_FLOAT x) + static double fm_exp(double x) { #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) if (x < -1022.0/FM_DOUBLE_LOG2OFE) return 0; @@ -184,17 +184,17 @@ namespace MathSpecialKokkos { * * \param x argument * \return x*x */ - + template KOKKOS_INLINE_FUNCTION - static KK_FLOAT square(const KK_FLOAT &x) { return x * x; } + static T square(const T &x) { return x * x; } /*! Fast inline version of pow(x, 3.0) * * \param x argument * \return x*x */ - + template KOKKOS_INLINE_FUNCTION - static KK_FLOAT cube(const KK_FLOAT &x) { return x * x * x; } + static T cube(const T &x) { return x * x * x; } /* Fast inline version of pow(-1.0, n) * @@ -202,7 +202,7 @@ namespace MathSpecialKokkos { * \return -1 if n is odd, 1.0 if n is even */ KOKKOS_INLINE_FUNCTION - static KK_FLOAT powsign(const int n) { return (n & 1) ? -1.0 : 1.0; } + static KK_FLOAT powsign(const int n) { return (n & 1) ? static_cast(-1.0) : static_cast(1.0); } /* Fast inline version of pow(x,n) for integer n * @@ -212,19 +212,20 @@ namespace MathSpecialKokkos { * \param n argument (integer) * \return value of x^n */ + template KOKKOS_INLINE_FUNCTION - static KK_FLOAT powint(const KK_FLOAT &x, const int n) + static T powint(const T &x, const int n) { - KK_FLOAT yy, ww; + T yy, ww; - if (x == 0.0) return 0.0; + if (x == static_cast(0)) return static_cast(0); int nn = (n > 0) ? n : -n; ww = x; - for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww) + for (yy = static_cast(1); nn != 0; nn >>= 1, ww *= ww) if (nn & 1) yy *= ww; - return (n > 0) ? yy : 1.0 / yy; + return (n > 0) ? yy : static_cast(1) / yy; } /* Fast inline version of (sin(x)/x)^n as used by PPPM kspace styles @@ -234,16 +235,17 @@ namespace MathSpecialKokkos { * \param n argument (integer). Expected to be positive. * \return value of (sin(x)/x)^n */ + template KOKKOS_INLINE_FUNCTION - static KK_FLOAT powsinxx(const KK_FLOAT &x, int n) + static T powsinxx(const T &x, int n) { - KK_FLOAT yy, ww; + T yy, ww; - if (x == 0.0) return 1.0; + if (x == static_cast(0)) return static_cast(1); ww = sin(x) / x; - for (yy = 1.0; n != 0; n >>= 1, ww *= ww) + for (yy = static_cast(1); n != 0; n >>= 1, ww *= ww) if (n & 1) yy *= ww; return yy; @@ -253,16 +255,9 @@ namespace MathSpecialKokkos { ans = v1 - v2 ------------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION - static void sub3(const float *v1, const float *v2, float *ans) - { - ans[0] = v1[0] - v2[0]; - ans[1] = v1[1] - v2[1]; - ans[2] = v1[2] - v2[2]; - } - - KOKKOS_INLINE_FUNCTION - static void sub3(const double *v1, const double *v2, double *ans) + static void sub3(const T *v1, const T *v2, T *ans) { ans[0] = v1[0] - v2[0]; ans[1] = v1[1] - v2[1]; @@ -273,14 +268,9 @@ namespace MathSpecialKokkos { dot product of 2 vectors ------------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION - static KK_FLOAT dot3(const float *v1, const float *v2) - { - return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; - } - - KOKKOS_INLINE_FUNCTION - static KK_FLOAT dot3(const double *v1, const double *v2) + static T dot3(const T *v1, const T *v2) { return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; } diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index b50edf953f4..b84fe0e33cf 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -444,7 +444,7 @@ static std::enable_if_t static double memory_usage(TYPE &data) { - return data.span() * sizeof(typename TYPE::value_type); + return static_cast(data.span() * sizeof(typename TYPE::value_type)); } /* ---------------------------------------------------------------------- From 1a944de3f5ef00caef6405c5749c7dcc219d30fe Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 27 Oct 2025 13:18:05 -0600 Subject: [PATCH 087/604] Compiles --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 460 +++++++++++++++++++++++++- src/KOKKOS/atom_vec_hybrid_kokkos.h | 10 + 2 files changed, 464 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 0d3e551a5c5..fe121077ede 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -64,6 +64,8 @@ struct AtomVecHybridKokkos_PackComm { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -79,6 +81,11 @@ struct AtomVecHybridKokkos_PackComm { const unsigned int &datamask): _x(atomKK->k_x.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { @@ -115,6 +122,25 @@ struct AtomVecHybridKokkos_PackComm { _buf(i,m++) = _mu(j,1); _buf(i,m++) = _mu(j,2); } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); } }; @@ -197,9 +223,13 @@ struct AtomVecHybridKokkos_PackCommSelf { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d_4 _spw; typename AT::t_kkfloat_1d _radiusw,_rmassw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -215,6 +245,11 @@ struct AtomVecHybridKokkos_PackCommSelf { const unsigned int datamask): _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), _nfirst(nfirst),_list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { @@ -246,6 +281,24 @@ struct AtomVecHybridKokkos_PackCommSelf { _muw(i+_nfirst,1) = _mu(j,1); _muw(i+_nfirst,2) = _mu(j,2); } + + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); } }; @@ -323,9 +376,12 @@ struct AtomVecHybridKokkos_PackCommSelfFused { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d _radiusw,_rmassw; + typename AT::t_kkfloat_1d_4 _spw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -348,6 +404,11 @@ struct AtomVecHybridKokkos_PackCommSelfFused { const unsigned int datamask): _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -394,6 +455,25 @@ struct AtomVecHybridKokkos_PackCommSelfFused { _muw(i+_nfirst,1) = _mu(j,1); _muw(i+_nfirst,2) = _mu(j,2); } + + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); + } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); } }; @@ -444,6 +524,8 @@ struct AtomVecHybridKokkos_UnpackComm { typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; unsigned int _datamask; @@ -454,6 +536,11 @@ struct AtomVecHybridKokkos_UnpackComm { const int &first, const unsigned int &datamask): _x(atomKK->k_x.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _first(first),_datamask(datamask) { const int size_forward = atomKK->avecKK->size_forward; const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; @@ -473,6 +560,25 @@ struct AtomVecHybridKokkos_UnpackComm { _mu(i+_first,1) = _buf(i,m++); _mu(i+_first,2) = _buf(i,m++); } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); } }; @@ -504,7 +610,9 @@ struct AtomVecHybridKokkos_PackCommVel { typename AT::t_int_1d _mask; typename AT::t_kkfloat_1d_3 _v; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -526,6 +634,10 @@ struct AtomVecHybridKokkos_PackCommVel { _mask(atomKK->k_mask.view()), _v(atomKK->k_v.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz), @@ -586,11 +698,30 @@ struct AtomVecHybridKokkos_PackCommVel { _buf(i,m++) = _mu(j,2); } + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + if (_datamask & OMEGA_MASK) { _buf(i,m++) = _omega(j,0); _buf(i,m++) = _omega(j,1); _buf(i,m++) = _omega(j,2); } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); } }; @@ -736,7 +867,9 @@ struct AtomVecHybridKokkos_UnpackCommVel { typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; unsigned int _datamask; @@ -748,7 +881,12 @@ struct AtomVecHybridKokkos_UnpackCommVel { _x(atomKK->k_x.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _first(first),_datamask(datamask) { const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; @@ -772,11 +910,29 @@ struct AtomVecHybridKokkos_UnpackCommVel { _mu(i+_first,2) = _buf(i,m++); } + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + } + if (_datamask & OMEGA_MASK) { _omega(i+_first,0) = _buf(i,m++); _omega(i+_first,1) = _buf(i,m++); _omega(i+_first,2) = _buf(i,m++); } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); } }; @@ -804,7 +960,7 @@ struct AtomVecHybridKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; typename AT::t_kkfloat_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; @@ -815,6 +971,8 @@ struct AtomVecHybridKokkos_PackReverse { const typename DAT::tdual_double_2d_lr &buf, const int &first, const unsigned int &datamask): _f(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), _torque(atomKK->k_torque.view()), _first(first),_datamask(datamask) { const size_t elements = atomKK->avecKK->size_reverse; @@ -829,6 +987,18 @@ struct AtomVecHybridKokkos_PackReverse { _buf(i,m++) = _f(i+_first,1); _buf(i,m++) = _f(i+_first,2); + if (_datamask & FM_MASK) { + _buf(i,m++) = _fm(i+_first,0); + _buf(i,m++) = _fm(i+_first,1); + _buf(i,m++) = _fm(i+_first,2); + } + + if (_datamask & FML_MASK) { + _buf(i,m++) = _fm_long(i+_first,0); + _buf(i,m++) = _fm_long(i+_first,1); + _buf(i,m++) = _fm_long(i+_first,2); + } + if (_datamask & TORQUE_MASK) { _buf(i,m++) = _torque(i+_first,0); _buf(i,m++) = _torque(i+_first,1); @@ -861,9 +1031,9 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; typename AT::t_kkfloat_1d_3_randomread _torque; - typename AT::t_kkacc_1d_3 _fw; + typename AT::t_kkacc_1d_3 _fw,_fmw,_fm_longw; typename AT::t_kkfloat_1d_3 _torquew; typename AT::t_int_1d_const _list; int _nfirst; @@ -875,6 +1045,8 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { const typename DAT::tdual_int_1d &list, const unsigned int &datamask): _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()),_fmw(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()),_fm_longw(atomKK->k_fm_long.view()), _torque(atomKK->k_torque.view()),_torquew(atomKK->k_torque.view()), _nfirst(nfirst),_list(list.view()), _datamask(datamask) {}; @@ -886,6 +1058,18 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { _fw(j,1) += _f(i+_nfirst,1); _fw(j,2) += _f(i+_nfirst,2); + if (_datamask & FM_MASK) { + _fmw(j,0) += _fm(i+_nfirst,0); + _fmw(j,1) += _fm(i+_nfirst,1); + _fmw(j,2) += _fm(i+_nfirst,2); + } + + if (_datamask & FML_MASK) { + _fm_longw(j,0) += _fm_long(i+_nfirst,0); + _fm_longw(j,1) += _fm_long(i+_nfirst,1); + _fm_longw(j,2) += _fm_long(i+_nfirst,2); + } + if (_datamask & TORQUE_MASK) { _torquew(j,0) += _torque(i+_nfirst,0); _torquew(j,1) += _torque(i+_nfirst,1); @@ -920,7 +1104,7 @@ struct AtomVecHybridKokkos_UnPackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3 _f; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; typename AT::t_kkfloat_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; @@ -932,6 +1116,8 @@ struct AtomVecHybridKokkos_UnPackReverse { const typename DAT::tdual_int_1d &list, const unsigned int datamask): _f(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), _torque(atomKK->k_torque.view()), _list(list.view()), _datamask(datamask) { @@ -948,6 +1134,18 @@ struct AtomVecHybridKokkos_UnPackReverse { _f(j,1) += _buf(i,m++); _f(j,2) += _buf(i,m++); + if (_datamask & FM_MASK) { + _fm(j,0) += _buf(i,m++); + _fm(j,1) += _buf(i,m++); + _fm(j,2) += _buf(i,m++); + } + + if (_datamask & FML_MASK) { + _fm_long(j,0) += _buf(i,m++); + _fm_long(j,1) += _buf(i,m++); + _fm_long(j,2) += _buf(i,m++); + } + if (_datamask & TORQUE_MASK) { _torque(j,0) += _buf(i,m++); _torque(j,1) += _buf(i,m++); @@ -994,7 +1192,9 @@ struct AtomVecHybridKokkos_PackBorder { const typename AT::t_tagint_1d _molecule; const typename AT::t_kkfloat_1d _q; const typename AT::t_kkfloat_1d_4 _mu; + const typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz; unsigned int _datamask; @@ -1012,8 +1212,15 @@ struct AtomVecHybridKokkos_PackBorder { _molecule(atomKK->k_molecule.view()), _q(atomKK->k_q.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} KOKKOS_INLINE_FUNCTION @@ -1047,11 +1254,36 @@ struct AtomVecHybridKokkos_PackBorder { _buf(i,m++) = _mu(j,3); } + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + if (_datamask & RADIUS_MASK) _buf(i,m++) = _radius(j); if (_datamask & RMASS_MASK) _buf(i,m++) = _rmass(j); + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); + + if (_datamask & UCG_MASK) + _buf(i,m++) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _buf(i,m++) = _uCGnew(j); } }; @@ -1119,7 +1351,9 @@ struct AtomVecHybridKokkos_UnpackBorder { typename AT::t_tagint_1d _molecule; typename AT::t_kkfloat_1d _q; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; int _first; unsigned int _datamask; @@ -1135,8 +1369,15 @@ struct AtomVecHybridKokkos_UnpackBorder { _molecule(atomKK->k_molecule.view()), _q(atomKK->k_q.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _first(first),_datamask(datamask) { }; @@ -1163,11 +1404,36 @@ struct AtomVecHybridKokkos_UnpackBorder { _mu(i+_first,3) = _buf(i,m++); } + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + if (_datamask & RADIUS_MASK) _radius(i+_first) = _buf(i,m++); if (_datamask & RMASS_MASK) _rmass(i+_first) = _buf(i,m++); + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); + + if (_datamask & UCG_MASK) + _uCG(i+_first) = _buf(i,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i+_first) = _buf(i,m++); } }; @@ -1208,8 +1474,10 @@ struct AtomVecHybridKokkos_PackBorderVel { const typename AT::t_int_1d _type; const typename AT::t_int_1d _mask; const typename AT::t_kkfloat_1d_4 _mu; + const typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz, _dvx, _dvy, _dvz; const int _deform_groupbit; const unsigned int _datamask; @@ -1229,9 +1497,16 @@ struct AtomVecHybridKokkos_PackBorderVel { _mask(atomKK->k_mask.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _dx(dx),_dy(dy),_dz(dz), _dvx(dvx),_dvy(dvy),_dvz(dvz), _deform_groupbit(deform_groupbit) { @@ -1276,6 +1551,13 @@ struct AtomVecHybridKokkos_PackBorderVel { _buf(i,m++) = _mu(j,3); } + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + if (_datamask & RADIUS_MASK) _buf(i,m++) = _radius(j); @@ -1287,6 +1569,24 @@ struct AtomVecHybridKokkos_PackBorderVel { _buf(i,m++) = _omega(j,1); _buf(i,m++) = _omega(j,2); } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); + + if (_datamask & UCG_MASK) + _buf(i,m++) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _buf(i,m++) = _uCGnew(j); } }; @@ -1312,7 +1612,7 @@ int AtomVecHybridKokkos::pack_border_vel_kokkos( dz = pbc[2]; } if (!deform_vremap) { - if (space==Host) { + if (space == Host) { AtomVecHybridKokkos_PackBorderVel f( atomKK, buf.view_host(), k_sendlist.view_host(), @@ -1385,8 +1685,10 @@ struct AtomVecHybridKokkos_UnpackBorderVel { typename AT::t_int_1d _mask; typename AT::t_kkfloat_1d_3 _v; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; int _first; unsigned int _datamask; @@ -1402,9 +1704,16 @@ struct AtomVecHybridKokkos_UnpackBorderVel { _mask(atomKK->k_mask.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _first(first),_datamask(datamask) { const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; @@ -1434,6 +1743,13 @@ struct AtomVecHybridKokkos_UnpackBorderVel { _mu(i+_first,3) = _buf(i,m++); } + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + if (_datamask & RADIUS_MASK) _radius(i+_first) = _buf(i,m++); @@ -1445,6 +1761,24 @@ struct AtomVecHybridKokkos_UnpackBorderVel { _omega(i+_first,1) = _buf(i,m++); _omega(i+_first,2) = _buf(i,m++); } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); + + if (_datamask & UCG_MASK) + _uCG(i+_first) = _buf(i,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i+_first) = _buf(i,m++); } }; @@ -1503,8 +1837,10 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; typename AT::t_kkfloat_1d_3_lr _xw; typename AT::t_kkfloat_1d_3 _vw; @@ -1531,8 +1867,10 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, _improper_atom3w,_improper_atom4w; typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d_4 _spw; typename AT::t_kkfloat_1d _radiusw,_rmassw; typename AT::t_kkfloat_1d_3 _omegaw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _sendlist; @@ -1577,9 +1915,16 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _xw(atomKK->k_x.view()), _vw(atomKK->k_v.view()), @@ -1612,9 +1957,16 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _improper_atom3w(atomKK->k_improper_atom3.view()), _improper_atom4w(atomKK->k_improper_atom4.view()), _muw(atomKK->k_mu.view()), + _spw(atomKK->k_sp.view()), _radiusw(atomKK->k_radius.view()), _rmassw(atomKK->k_rmass.view()), _omegaw(atomKK->k_omega.view()), + _dpdThetaw(atomKK->k_dpdTheta.view()), + _uCondw(atomKK->k_uCond.view()), + _uMechw(atomKK->k_uMech.view()), + _uChemw(atomKK->k_uChem.view()), + _uCGw(atomKK->k_uCG.view()), + _uCGneww(atomKK->k_uCGnew.view()), _sendlist(sendlist.template view()), _copylist(copylist.template view()), @@ -1703,6 +2055,43 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _buf(mysend,m++) = _mu(i,3); } + if (_datamask & SP_MASK) { + _buf(mysend,m++) = _sp(i,0); + _buf(mysend,m++) = _sp(i,1); + _buf(mysend,m++) = _sp(i,2); + _buf(mysend,m++) = _sp(i,3); + } + + if (_datamask & RADIUS_MASK) + _buf(mysend,m++) = _radius(i); + + if (_datamask & RMASS_MASK) + _buf(mysend,m++) = _rmass(i); + + if (_datamask & OMEGA_MASK) { + _buf(mysend,m++) = _omega(i,0); + _buf(mysend,m++) = _omega(i,1); + _buf(mysend,m++) = _omega(i,2); + } + + if (_datamask & DPDTHETA_MASK) + _buf(mysend,m++) = _dpdTheta(i); + + if (_datamask & UCOND_MASK) + _buf(mysend,m++) = _uCond(i); + + if (_datamask & UMECH_MASK) + _buf(mysend,m++) = _uMech(i); + + if (_datamask & UCHEM_MASK) + _buf(mysend,m++) = _uChem(i); + + if (_datamask & UCG_MASK) + _buf(mysend,m++) = _uCG(i); + + if (_datamask & UCGNEW_MASK) + _buf(mysend,m++) = _uCGnew(i); + const int j = _copylist(mysend); if (j > -1) { @@ -1778,6 +2167,13 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _muw(i,3) = _mu(j,3); } + if (_datamask & SP_MASK) { + _spw(i,0) = _sp(j,0); + _spw(i,1) = _sp(j,1); + _spw(i,2) = _sp(j,2); + _spw(i,3) = _sp(j,3); + } + if (_datamask & RADIUS_MASK) _radiusw(i) = _radius(j); @@ -1789,6 +2185,24 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { _omegaw(i,1) = _omega(j,1); _omegaw(i,2) = _omega(j,2); } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i) = _uChem(j); + + if (_datamask & UCG_MASK) + _uCGw(i) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _uCGneww(i) = _uCGnew(j); } } }; @@ -1850,8 +2264,10 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { typename AT::t_tagint_2d _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d _nlocal; @@ -1899,9 +2315,16 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _improper_atom3(atomKK->k_improper_atom3.view()), _improper_atom4(atomKK->k_improper_atom4.view()), _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), _radius(atomKK->k_radius.view()), _rmass(atomKK->k_rmass.view()), _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), _nlocal(nlocal.template view()), _indices(indices.template view()), @@ -1991,6 +2414,13 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _mu(i,3) = _buf(myrecv,m++); } + if (_datamask & SP_MASK) { + _sp(i,0) = _buf(myrecv,m++); + _sp(i,1) = _buf(myrecv,m++); + _sp(i,2) = _buf(myrecv,m++); + _sp(i,3) = _buf(myrecv,m++); + } + if (_datamask & RADIUS_MASK) _radius(i) = _buf(myrecv,m++); @@ -2002,6 +2432,24 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { _omega(i,1) = _buf(myrecv,m++); _omega(i,2) = _buf(myrecv,m++); } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i) = _buf(myrecv,m++); + + if (_datamask & UCOND_MASK) + _uCond(i) = _buf(myrecv,m++); + + if (_datamask & UMECH_MASK) + _uMech(i) = _buf(myrecv,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i) = _buf(myrecv,m++); + + if (_datamask & UCG_MASK) + _uCG(i) = _buf(myrecv,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i) = _buf(myrecv,m++); } if (OUTPUT_INDICES) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 5ed86c5525f..5de73296e5b 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -163,12 +163,22 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { DAT::t_kkfloat_1d_4 d_mu; HAT::t_kkfloat_1d_4 h_mu; + DAT::t_kkfloat_1d_4 d_sp; + DAT::t_kkacc_1d_3 d_fm; + DAT::t_kkacc_1d_3 d_fm_long; + HAT::t_kkfloat_1d_4 h_sp; + HAT::t_kkacc_1d_3 h_fm; + HAT::t_kkacc_1d_3 h_fm_long; + DAT::t_kkfloat_1d d_radius; HAT::t_kkfloat_1d h_radius; DAT::t_kkfloat_1d d_rmass; HAT::t_kkfloat_1d h_rmass; DAT::t_kkfloat_1d_3 d_torque; HAT::t_kkfloat_1d_3 h_torque; + + DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; + HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; }; } // namespace LAMMPS_NS From 2cb0b2f6c9a719167fd0528138c83a2ffd93f37b Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 27 Oct 2025 13:15:44 -0600 Subject: [PATCH 088/604] Precomputing more quantities in ij loop --- src/RHEO/pair_rheo.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/RHEO/pair_rheo.cpp b/src/RHEO/pair_rheo.cpp index eee171dac35..5ad3661b775 100644 --- a/src/RHEO/pair_rheo.cpp +++ b/src/RHEO/pair_rheo.cpp @@ -88,7 +88,7 @@ void PairRHEO::compute(int eflag, int vflag) double dx[3], du[3], dv[3], fv[3], dfp[3], fsolid[3], ft[3], vi[3], vj[3]; int *ilist, *jlist, *numneigh, **firstneigh; - double imass, jmass, rsq, r, rinv; + double imass, jmass, rsq, r, rinv, rinvsq; int nlocal = atom->nlocal; int newton_pair = force->newton_pair; @@ -154,12 +154,14 @@ void PairRHEO::compute(int eflag, int vflag) imass = rmass[i]; else imass = mass[itype]; + rho0i = rho0[itype]; etai = viscosity[i]; fluidi = !(status[i] & PHASECHECK); if (thermal_flag) { kappai = conductivity[i]; Ti = temperature[i]; cpi = fix_thermal->calc_cv(itype); + alphai = kappai / (rho0i * cpi); } for (jj = 0; jj < jnum; jj++) { @@ -175,6 +177,7 @@ void PairRHEO::compute(int eflag, int vflag) if (rsq < cutksq) { r = sqrt(rsq); rinv = 1 / r; + rinvsq = rinv * rinv; if (rmass) jmass = rmass[i]; @@ -219,7 +222,6 @@ void PairRHEO::compute(int eflag, int vflag) // Add corrections for walls rhoi = rho[i]; rhoj = rho[j]; - rho0i = rho0[itype]; rho0j = rho0[jtype]; Pi = pressure[i]; Pj = pressure[j]; @@ -264,7 +266,6 @@ void PairRHEO::compute(int eflag, int vflag) // Thermal Evolution if (thermal_flag) { cpj = fix_thermal->calc_cv(jtype); - alphai = kappai / (rho0i * cpi); alphaj = kappaj / (rho0j * cpj); if (harmonic_means_flag) { alpha_ave = 2.0 * alphai * alphaj / (alphai + alphaj); @@ -272,7 +273,7 @@ void PairRHEO::compute(int eflag, int vflag) alpha_ave = 0.5 * (alphai + alphaj); } dT_prefactor = - 2.0 * alpha_ave * (Ti - Tj) * rinv * rinv; + 2.0 * alpha_ave * (Ti - Tj) * rinvsq; dT = dot3(dx, dWij); heatflow[i] += dT * dT_prefactor * volj; @@ -317,7 +318,7 @@ void PairRHEO::compute(int eflag, int vflag) for (a = 0; a < dim; a++) { fv[a] = 0.0; for (b = 0; b < dim; b++) fv[a] += dv[a] * dx[b] * dWij[b]; - fv[a] *= 2.0 * eta_ave * volj * rinv * rinv; + fv[a] *= 2.0 * eta_ave * volj * rinvsq; } add3(fv, dfp, ft); @@ -341,7 +342,7 @@ void PairRHEO::compute(int eflag, int vflag) for (a = 0; a < dim; a++) { fv[a] = 0.0; for (b = 0; b < dim; b++) fv[a] += (vi[a] - vj[a]) * dx[b] * dWji[b]; - fv[a] *= -2.0 * eta_ave * voli * rinv * rinv; + fv[a] *= -2.0 * eta_ave * voli * rinvsq; // flip sign here b/c -= at accummulator } @@ -370,7 +371,7 @@ void PairRHEO::compute(int eflag, int vflag) if (rho_damp_flag && pair_rho_flag) { if (laplacian_order >= 1) { psi_ij = rhoj - rhoi; - Fij = -rinv * rinv * dot3(dx, dWij); + Fij = -rinvsq * dot3(dx, dWij); for (a = 0; a < dim; a++) psi_ij += 0.5 * (gradr[i][a] + gradr[j][a]) * dx[a]; drho[i] += 2 * rho_damp * psi_ij * Fij * volj; } else { @@ -380,7 +381,7 @@ void PairRHEO::compute(int eflag, int vflag) if (newton_pair || j < nlocal) { if (laplacian_order >= 1) { - Fij = rinv * rinv * dot3(dx, dWji); + Fij = rinvsq * dot3(dx, dWji); psi_ij *= -1; drho[j] += 2 * rho_damp * psi_ij * Fij * voli; } else { From 9852e053285e94452a1f92a4649d74714cb88c32 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 27 Oct 2025 13:22:33 -0600 Subject: [PATCH 089/604] Clarifying NONE comment --- src/EXTRA-FIX/fix_deform_pressure.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/EXTRA-FIX/fix_deform_pressure.cpp b/src/EXTRA-FIX/fix_deform_pressure.cpp index 634d1f01289..044cef4748d 100644 --- a/src/EXTRA-FIX/fix_deform_pressure.cpp +++ b/src/EXTRA-FIX/fix_deform_pressure.cpp @@ -306,7 +306,8 @@ FixDeformPressure::FixDeformPressure(LAMMPS *lmp, int narg, char **arg) : } } - // set strain_flag, also sets defaults for NONE + // set strain_flag, note that this flag is also set for NONE as + // apply_strain() handles its behavior in parent fix deform strain_flag = 0; for (int i = 0; i < 6; i++) From 44487dec87267b07f08546d2be69d37f0c9b3d56 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 12:29:41 -0700 Subject: [PATCH 090/604] Promoted LAMMPS/Kokkos torque Views to KK_ACC_FLOAT to match forces --- src/KOKKOS/atom_kokkos.h | 2 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 4 +- src/KOKKOS/atom_vec_sphere_kokkos.h | 4 +- src/KOKKOS/fix_efield_kokkos.h | 2 +- src/KOKKOS/fix_enforce2d_kokkos.h | 2 +- src/KOKKOS/fix_freeze_kokkos.h | 2 +- src/KOKKOS/fix_nve_sphere_kokkos.h | 2 +- src/KOKKOS/fix_wall_gran_kokkos.h | 2 +- src/KOKKOS/pair_brownian_kokkos.cpp | 2 +- src/KOKKOS/pair_brownian_kokkos.h | 2 +- src/KOKKOS/pair_gran_hooke_history_kokkos.cpp | 166 ++++++++---------- src/KOKKOS/pair_gran_hooke_history_kokkos.h | 4 +- src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp | 2 +- src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h | 2 +- src/KOKKOS/verlet_kokkos.cpp | 6 +- 15 files changed, 98 insertions(+), 106 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 92c56b32ed3..02b757ce180 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -43,7 +43,7 @@ class AtomKokkos : public Atom { DAT::ttransform_kkfloat_1d_4 k_mu; DAT::ttransform_kkfloat_1d_3 k_omega; DAT::ttransform_kkfloat_1d_3 k_angmom; - DAT::ttransform_kkfloat_1d_3 k_torque; + DAT::ttransform_kkacc_1d_3 k_torque; DAT::tdual_tagint_1d k_molecule; DAT::ttransform_int_2d k_nspecial; DAT::ttransform_tagint_2d k_special; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 3b875286025..53a7f69346f 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1720,7 +1720,7 @@ void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync_device()) perform_pinned_copy_transform(atomKK->k_omega,space,async_flag); if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync_device()) - perform_pinned_copy_transform(atomKK->k_torque,space,async_flag); + perform_pinned_copy_transform(atomKK->k_torque,space,async_flag); } else { if ((mask & X_MASK) && atomKK->k_x.need_sync_host()) perform_pinned_copy_transform(atomKK->k_x,space,async_flag); @@ -1743,7 +1743,7 @@ void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync_host()) perform_pinned_copy_transform(atomKK->k_omega,space,async_flag); if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync_host()) - perform_pinned_copy_transform(atomKK->k_torque,space,async_flag); + perform_pinned_copy_transform(atomKK->k_torque,space,async_flag); } } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index c9c3221a054..157d4f3f07d 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -93,8 +93,8 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { HAT::t_kkfloat_1d h_rmass; DAT::t_kkfloat_1d_3 d_omega; HAT::t_kkfloat_1d_3 h_omega; - DAT::t_kkfloat_1d_3 d_torque; - HAT::t_kkfloat_1d_3 h_torque; + DAT::t_kkacc_1d_3 d_torque; + HAT::t_kkacc_1d_3 h_torque; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index 05feb2bd205..5013c11c10f 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -67,7 +67,7 @@ class FixEfieldKokkos : public FixEfield { typename AT::t_kkfloat_1d_randomread d_q; typename AT::t_kkfloat_1d_4_randomread d_mu; typename AT::t_kkacc_1d_3 d_f; - typename AT::t_kkfloat_1d_3 d_torque; + typename AT::t_kkacc_1d_3 d_torque; typename AT::t_imageint_1d_randomread d_image; typename AT::t_int_1d_randomread d_mask; diff --git a/src/KOKKOS/fix_enforce2d_kokkos.h b/src/KOKKOS/fix_enforce2d_kokkos.h index 4ec408ed3fc..e6b2d1afc63 100644 --- a/src/KOKKOS/fix_enforce2d_kokkos.h +++ b/src/KOKKOS/fix_enforce2d_kokkos.h @@ -52,7 +52,7 @@ class FixEnforce2DKokkos : public FixEnforce2D { typename AT::t_kkfloat_1d_3 omega; typename AT::t_kkfloat_1d_3 angmom; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d mask; }; diff --git a/src/KOKKOS/fix_freeze_kokkos.h b/src/KOKKOS/fix_freeze_kokkos.h index 94cadf86ebb..59a4abc162b 100644 --- a/src/KOKKOS/fix_freeze_kokkos.h +++ b/src/KOKKOS/fix_freeze_kokkos.h @@ -60,7 +60,7 @@ class FixFreezeKokkos : public FixFreeze { private: typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d mask; }; diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.h b/src/KOKKOS/fix_nve_sphere_kokkos.h index 620f740cf57..d2b7f665a91 100644 --- a/src/KOKKOS/fix_nve_sphere_kokkos.h +++ b/src/KOKKOS/fix_nve_sphere_kokkos.h @@ -54,7 +54,7 @@ class FixNVESphereKokkos : public FixNVESphere { typename AT::t_kkfloat_1d_3 omega; typename AT::t_kkfloat_1d_4 mu; typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_kkfloat_1d rmass; typename AT::t_kkfloat_1d radius; typename AT::t_int_1d mask; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 49a573eb9b2..d3915ddb81f 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -80,7 +80,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { typename AT::t_kkfloat_1d_3 v; typename AT::t_kkfloat_1d_3 d_omega; typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d mask; typename AT::t_kkfloat_1d rmass; typename AT::t_kkfloat_1d d_radius; diff --git a/src/KOKKOS/pair_brownian_kokkos.cpp b/src/KOKKOS/pair_brownian_kokkos.cpp index 49b9623353e..9702082d0e2 100644 --- a/src/KOKKOS/pair_brownian_kokkos.cpp +++ b/src/KOKKOS/pair_brownian_kokkos.cpp @@ -272,7 +272,7 @@ void PairBrownianKokkos::operator()(TagPairBrownianCompute::value,Kokkos::MemoryTraits::value> > a_f = f; - Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; rand_type rand_gen = rand_pool.get_state(); diff --git a/src/KOKKOS/pair_brownian_kokkos.h b/src/KOKKOS/pair_brownian_kokkos.h index 9c3ce80280b..f66c8877afd 100644 --- a/src/KOKKOS/pair_brownian_kokkos.h +++ b/src/KOKKOS/pair_brownian_kokkos.h @@ -67,7 +67,7 @@ class PairBrownianKokkos : public PairBrownian, public KokkosBase { typename AT::t_kkfloat_1d_3_lr_randomread x; typename AT::t_kkfloat_1d_3_lr c_x; typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d_randomread type; typename AT::t_kkfloat_1d_randomread radius; diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 6ae91bf6266..889413d5f94 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -150,6 +150,13 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; + kt_kk = static_cast(kt); + kn_kk = static_cast(kn); + xmu_kk = static_cast(xmu); + gammat_kk = static_cast(gammat); + gamman_kk = static_cast(gamman); + dt_kk = static_cast(dt); + if (d_numneigh.extent(0) != d_numneigh_touch.extent(0)) d_numneigh_touch = typename AT::t_int_1d("pair:numneigh_touch",d_numneigh.extent(0)); if (d_neighbors.extent(0) != d_neighbors_touch.extent(0) || @@ -234,12 +241,12 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) } if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (vflag_atom) { @@ -259,7 +266,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // The f and torque arrays are atomic for Half/Thread neighbor style Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; - Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; const int i = d_ilist[ii]; const KK_FLOAT xtmp = x(i,0); @@ -288,7 +295,7 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); - KK_FLOAT factor_lj = special_lj[sbmask(j)]; + KK_FLOAT factor_lj = static_cast(special_lj[sbmask(j)]); j &= NEIGHMASK; if (factor_lj == 0) continue; @@ -313,8 +320,8 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC d_firsttouch(i,jj) = 1; const KK_FLOAT r = sqrt(rsq); - const KK_FLOAT rinv = 1.0/r; - const KK_FLOAT rsqinv = 1/rsq; + const KK_FLOAT rinv = static_cast(1.0)/r; + const KK_FLOAT rsqinv = static_cast(1.0)/rsq; // relative translational velocity @@ -345,9 +352,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC if (mask_i & freeze_group_bit) meff = jmass; if (mask[j] & freeze_group_bit) meff = imass; - KK_FLOAT damp = meff*gamman*vnnr*rsqinv; - KK_FLOAT ccel = kn*(radsum-r)*rinv - damp; - if (limit_damping && (ccel < 0.0)) ccel = 0.0; + KK_FLOAT damp = meff*gamman_kk*vnnr*rsqinv; + KK_FLOAT ccel = kn_kk*(radsum-r)*rinv - damp; + if (limit_damping && (ccel < static_cast(0.0))) ccel = static_cast(0.0); // relative velocities @@ -362,9 +369,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC KK_FLOAT shear3 = d_firstshear(i,3*jj+2); if (SHEARUPDATE) { - shear1 += vtr1*dt; - shear2 += vtr2*dt; - shear3 += vtr3*dt; + shear1 += vtr1*dt_kk; + shear2 += vtr2*dt_kk; + shear3 += vtr3*dt_kk; } KK_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + shear3*shear3); @@ -382,27 +389,27 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC // tangential forces = shear + tangential velocity damping - KK_FLOAT fs1 = - (kt*shear1 + meff*gammat*vtr1); - KK_FLOAT fs2 = - (kt*shear2 + meff*gammat*vtr2); - KK_FLOAT fs3 = - (kt*shear3 + meff*gammat*vtr3); + KK_FLOAT fs1 = - (kt_kk*shear1 + meff*gammat_kk*vtr1); + KK_FLOAT fs2 = - (kt_kk*shear2 + meff*gammat_kk*vtr2); + KK_FLOAT fs3 = - (kt_kk*shear3 + meff*gammat_kk*vtr3); // rescale frictional displacements and forces if needed KK_FLOAT fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); - KK_FLOAT fn = xmu * fabs(ccel*r); + KK_FLOAT fn = xmu_kk * fabs(ccel*r); if (fs > fn) { - if (shrmag != 0.0) { - shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) - - meff*gammat*vtr1/kt; - shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) - - meff*gammat*vtr2/kt; - shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) - - meff*gammat*vtr3/kt; + if (shrmag != static_cast(0.0)) { + shear1 = (fn/fs) * (shear1 + meff*gammat_kk*vtr1/kt_kk) - + meff*gammat_kk*vtr1/kt_kk; + shear2 = (fn/fs) * (shear2 + meff*gammat_kk*vtr2/kt_kk) - + meff*gammat_kk*vtr2/kt_kk; + shear3 = (fn/fs) * (shear3 + meff*gammat_kk*vtr3/kt_kk) - + meff*gammat_kk*vtr3/kt_kk; fs1 *= fn/fs; fs2 *= fn/fs; fs3 *= fn/fs; - } else fs1 = fs2 = fs3 = 0.0; + } else fs1 = fs2 = fs3 = 0; } if (SHEARUPDATE) { @@ -419,9 +426,9 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC fx *= factor_lj; fy *= factor_lj; fz *= factor_lj; - fx_i += fx; - fy_i += fy; - fz_i += fz; + fx_i += static_cast(fx); + fy_i += static_cast(fy); + fz_i += static_cast(fz); KK_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); KK_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); @@ -429,17 +436,17 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC tor1 *= factor_lj; tor2 *= factor_lj; tor3 *= factor_lj; - torquex_i -= irad*tor1; - torquey_i -= irad*tor2; - torquez_i -= irad*tor3; + torquex_i -= static_cast(irad*tor1); + torquey_i -= static_cast(irad*tor2); + torquez_i -= static_cast(irad*tor3); if (NEWTON_PAIR || j < nlocal) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; - a_torque(j,0) -= jrad*tor1; - a_torque(j,1) -= jrad*tor2; - a_torque(j,2) -= jrad*tor3; + a_f(j,0) -= static_cast(fx); + a_f(j,1) -= static_cast(fy); + a_f(j,2) -= static_cast(fz); + a_torque(j,0) -= static_cast(jrad*tor1); + a_torque(j,1) -= static_cast(jrad*tor2); + a_torque(j,2) -= static_cast(jrad*tor3); } if (VFLAG) @@ -471,58 +478,41 @@ void PairGranHookeHistoryKokkos::ev_tally_xyz(EV_FLOAT &ev, int i, i { Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = d_vatom; - const KK_FLOAT v0 = delx*fx; - const KK_FLOAT v1 = dely*fy; - const KK_FLOAT v2 = delz*fz; - const KK_FLOAT v3 = delx*fy; - const KK_FLOAT v4 = delx*fz; - const KK_FLOAT v5 = dely*fz; - - if (vflag_global) { - if (NEWTON_PAIR) { // neigh half, newton on - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; - } else { // neigh half, newton off - if (i < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - if (j < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + if (vflag_global || vflag_atom) { + const KK_ACC_FLOAT v_acc[6] = + { static_cast(delx*fx), + static_cast(dely*fy), + static_cast(delz*fz), + static_cast(delx*fy), + static_cast(delx*fz), + static_cast(dely*fz) }; + + if (vflag_global) { + if (NEWTON_PAIR) { // neigh half, newton on + for (int n = 0; n < 6; n++) + ev.v[n] += v_acc[n]; + } else { // neigh half, newton off + if (i < nlocal) { + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(0.5) * v_acc[n]; + } + if (j < nlocal) { + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(0.5) * v_acc[n]; + } } } - } - if (vflag_atom) { + if (vflag_atom) { - if (NEWTON_PAIR || i < nlocal) { - v_vatom(i,0) += 0.5*v0; - v_vatom(i,1) += 0.5*v1; - v_vatom(i,2) += 0.5*v2; - v_vatom(i,3) += 0.5*v3; - v_vatom(i,4) += 0.5*v4; - v_vatom(i,5) += 0.5*v5; - } - if (NEWTON_PAIR || j < nlocal) { - v_vatom(j,0) += 0.5*v0; - v_vatom(j,1) += 0.5*v1; - v_vatom(j,2) += 0.5*v2; - v_vatom(j,3) += 0.5*v3; - v_vatom(j,4) += 0.5*v4; - v_vatom(j,5) += 0.5*v5; + if (NEWTON_PAIR || i < nlocal) { + for (int n = 0; n < 6; n++) + v_vatom(i,n) += static_cast(0.5) * v_acc[n]; + } + if (NEWTON_PAIR || j < nlocal) { + for (int n = 0; n < 6; n++) + v_vatom(j,n) += static_cast(0.5) * v_acc[n]; + } } } } diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index a98a942492e..aceead44b4a 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -66,7 +66,7 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { typename AT::t_kkfloat_1d_3_randomread v; typename AT::t_kkfloat_1d_3_randomread omega; typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d_randomread type; typename AT::t_int_1d_randomread mask; typename AT::t_kkfloat_1d_randomread rmass; @@ -92,6 +92,8 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { int neighflag; int nlocal,nall,eflag,vflag; + KK_FLOAT kt_kk, kn_kk, xmu_kk, gammat_kk, gamman_kk, dt_kk; + FixNeighHistoryKokkos *fix_historyKK; KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp index fe00725b2df..fc750925695 100644 --- a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.cpp @@ -225,7 +225,7 @@ void PairLJCutDipoleCutKokkos::operator()(TagPairLJCutDipoleCutKerne // The f and torque arrays are atomic for Half/Thread neighbor style Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; - Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; const int i = d_ilist[ii]; const KK_FLOAT xtmp = x(i,0); diff --git a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h index 87f1eaa03ee..9fa3a2a8cbc 100644 --- a/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_dipole_cut_kokkos.h @@ -77,7 +77,7 @@ class PairLJCutDipoleCutKokkos : public PairLJCutDipoleCut { typename AT::t_kkfloat_1d_3_lr_randomread x; typename AT::t_kkfloat_1d_3_lr c_x; typename AT::t_kkacc_1d_3 f; - typename AT::t_kkfloat_1d_3 torque; + typename AT::t_kkacc_1d_3 torque; typename AT::t_int_1d_randomread type; typename AT::t_kkfloat_1d_randomread q; typename AT::t_kkfloat_1d_4_randomread mu; diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index e68a3cb0e75..81c42441919 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -557,7 +557,7 @@ void VerletKokkos::force_clear() atomKK->modified(Device,F_MASK); if (torqueflag) { - Kokkos::parallel_for(nall, Zero(atomKK->k_torque.view_device())); + Kokkos::parallel_for(nall, Zero(atomKK->k_torque.view_device())); atomKK->modified(Device,TORQUE_MASK); } @@ -579,7 +579,7 @@ void VerletKokkos::force_clear() atomKK->modified(Device,F_MASK); if (torqueflag) { - Kokkos::parallel_for(atomKK->nfirst, Zero(atomKK->k_torque.view_device())); + Kokkos::parallel_for(atomKK->nfirst, Zero(atomKK->k_torque.view_device())); atomKK->modified(Device,TORQUE_MASK); } @@ -598,7 +598,7 @@ void VerletKokkos::force_clear() atomKK->modified(Device,F_MASK); if (torqueflag) { - Kokkos::parallel_for(range, Zero(atomKK->k_torque.view_device())); + Kokkos::parallel_for(range, Zero(atomKK->k_torque.view_device())); atomKK->modified(Device,TORQUE_MASK); } From af100effc1b2e898335f3ff08cee1dd7d25fc7fa Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 27 Oct 2025 15:59:49 -0400 Subject: [PATCH 091/604] must not dereference null pointers and get compute/fix name from argi instead --- src/thermo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/thermo.cpp b/src/thermo.cpp index bcd26a15fb5..5798a50ea8f 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -454,7 +454,7 @@ void Thermo::compute(int flag) // add each thermo value to line with its specific format if (update_field_data) { lock_cache(); - if ((int)field_data.size() != nfield) field_data.resize(nfield); + if ((int) field_data.size() != nfield) field_data.resize(nfield); } for (ifield = 0; ifield < nfield; ifield++) { @@ -1103,7 +1103,7 @@ void Thermo::parse_fields(const std::string &str) auto *icompute = modify->get_compute_by_id(argi.get_name()); if (!icompute) error->all(FLERR, nfield + 1, "Could not find thermo custom compute ID: {}", - icompute->id); + argi.get_name()); if (argi.get_dim() == 0) { // scalar if (icompute->scalar_flag == 0) error->all(FLERR, nfield + 1, "Thermo custom compute {} does not compute a scalar", @@ -1143,7 +1143,7 @@ void Thermo::parse_fields(const std::string &str) } else if (argi.get_type() == ArgInfo::FIX) { auto *ifix = modify->get_fix_by_id(argi.get_name()); if (!ifix) - error->all(FLERR, nfield + 1, "Could not find thermo custom fix ID: {}", ifix->id); + error->all(FLERR, nfield + 1, "Could not find thermo custom fix ID: {}", argi.get_name()); if (argi.get_dim() == 0) { // scalar if (ifix->scalar_flag == 0) error->all(FLERR, nfield + 1, "Thermo custom fix {} does not compute a scalar", From acfcfa95aa1c5172ea54be66e3933ae28c317227 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 27 Oct 2025 14:22:31 -0600 Subject: [PATCH 092/604] Fix a few issues --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 38 ++++++++++++++------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index fe121077ede..3f6373a97e3 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -286,6 +286,7 @@ struct AtomVecHybridKokkos_PackCommSelf { _spw(i+_nfirst,0) = _sp(j,0); _spw(i+_nfirst,1) = _sp(j,1); _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); } if (_datamask & DPDTHETA_MASK) @@ -914,6 +915,7 @@ struct AtomVecHybridKokkos_UnpackCommVel { _sp(i+_first,0) = _buf(i,m++); _sp(i+_first,1) = _buf(i,m++); _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); } if (_datamask & OMEGA_MASK) { @@ -1204,24 +1206,24 @@ struct AtomVecHybridKokkos_PackBorder { const typename AT::t_int_1d_const &list, const double &dx, const double &dy, const double &dz, const unsigned int &datamask): - _buf(buf),_list(list), - _x(atomKK->k_x.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _molecule(atomKK->k_molecule.view()), - _q(atomKK->k_q.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} + _buf(buf),_list(list), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { From 915fa1e01b8f744803cdddc82280aeaa20b6f000 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 27 Oct 2025 16:57:41 -0400 Subject: [PATCH 093/604] may reset AtomVec::maxexchange only when the data file is not added Note that *only* in that case this value is updated with the communication size requirements. Otherwise the code only checks that the number of xxx/per/atom for the added data file does not exceed the limit. --- src/read_data.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index a30482f73e0..391891a13c3 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -710,8 +710,8 @@ void ReadData::command(int narg, char **arg) if (firstpass) { delete lmap; lmap = new LabelMap(lmp, ntypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes); - // reset maxexchange - atom->avec->maxexchange = 0; + // reset maxexchange if this not an added data file + if (addflag == NONE) atom->avec->maxexchange = 0; } // ------------------------------------------------------------------------------------- From d96aba6f79ab53e82eb76f7b98e26f36ef2d1746 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:05:04 -0700 Subject: [PATCH 094/604] Removed silent conversions from pair_kokkos.h, along with the pairwise potentials that utilize that abstraction as used in LJ, Chain, Rhodopsin, and SPC/E --- src/KOKKOS/pair_kokkos.h | 348 ++++++++---------- .../pair_lj_charmm_coul_long_kokkos.cpp | 129 +++---- src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h | 5 + src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp | 115 +++--- src/KOKKOS/pair_lj_cut_coul_long_kokkos.h | 2 + src/KOKKOS/pair_lj_cut_kokkos.cpp | 40 +- 6 files changed, 301 insertions(+), 338 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 1f2b1a325bd..1ded42d5430 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -140,14 +140,14 @@ struct PairComputeFunctor { const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; - KK_ACC_FLOAT fxtmp = 0.0; - KK_ACC_FLOAT fytmp = 0.0; - KK_ACC_FLOAT fztmp = 0.0; + KK_ACC_FLOAT fxtmp = 0; + KK_ACC_FLOAT fytmp = 0; + KK_ACC_FLOAT fztmp = 0; if (NEIGHFLAG == FULL && ZEROFLAG) { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + f(i,0) = 0; + f(i,1) = 0; + f(i,2) = 0; } for (int jj = 0; jj < jnum; jj++) { @@ -164,21 +164,22 @@ struct PairComputeFunctor { const KK_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; + fxtmp += static_cast(delx*fpair); + fytmp += static_cast(dely*fpair); + fztmp += static_cast(delz*fpair); if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) { - a_f(j,0) -= delx*fpair; - a_f(j,1) -= dely*fpair; - a_f(j,2) -= delz*fpair; + a_f(j,0) -= static_cast(delx*fpair); + a_f(j,1) -= static_cast(dely*fpair); + a_f(j,2) -= static_cast(delz*fpair); } if (EVFLAG) { KK_FLOAT evdwl = 0.0; if (c.eflag_either) { evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); - ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD)&&(NEWTON_PAIR||(j(1.0):static_cast(0.5)); + ev.evdwl += static_cast(scale * evdwl); } if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); @@ -187,9 +188,9 @@ struct PairComputeFunctor { } - a_f(i,0) += fxtmp; - a_f(i,1) += fytmp; - a_f(i,2) += fztmp; + a_f(i,0) += static_cast(fxtmp); + a_f(i,1) += static_cast(fytmp); + a_f(i,2) += static_cast(fztmp); return ev; } @@ -215,14 +216,14 @@ struct PairComputeFunctor { const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; - KK_ACC_FLOAT fxtmp = 0.0; - KK_ACC_FLOAT fytmp = 0.0; - KK_ACC_FLOAT fztmp = 0.0; + KK_ACC_FLOAT fxtmp = 0; + KK_ACC_FLOAT fytmp = 0; + KK_ACC_FLOAT fztmp = 0; if (NEIGHFLAG == FULL && ZEROFLAG) { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + f(i,0) = 0; + f(i,1) = 0; + f(i,2) = 0; } for (int jj = 0; jj < jnum; jj++) { @@ -245,14 +246,14 @@ struct PairComputeFunctor { if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) fpair+=c.template compute_fcoul(rsq,i,j,itype,jtype,factor_coul,qtmp); - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; + fxtmp += static_cast(delx*fpair); + fytmp += static_cast(dely*fpair); + fztmp += static_cast(delz*fpair); if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) { - a_f(j,0) -= delx*fpair; - a_f(j,1) -= dely*fpair; - a_f(j,2) -= delz*fpair; + a_f(j,0) -= static_cast(delx*fpair); + a_f(j,1) -= static_cast(dely*fpair); + a_f(j,2) -= static_cast(delz*fpair); } if (EVFLAG) { @@ -261,11 +262,13 @@ struct PairComputeFunctor { if (c.eflag_either) { if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) { evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); - ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*evdwl; + const auto scale = (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD)&&(NEWTON_PAIR||(j(1.0):static_cast(0.5)); + ev.evdwl += static_cast(scale * evdwl); } if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) { ecoul = c.template compute_ecoul(rsq,i,j,itype,jtype,factor_coul,qtmp); - ev.ecoul += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*ecoul; + const auto scale = (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD)&&(NEWTON_PAIR||(j(1.0):static_cast(0.5)); + ev.ecoul += static_cast(scale * ecoul); } } @@ -274,9 +277,9 @@ struct PairComputeFunctor { } } - a_f(i,0) += fxtmp; - a_f(i,1) += fytmp; - a_f(i,2) += fztmp; + a_f(i,0) += static_cast(fxtmp); + a_f(i,1) += static_cast(fytmp); + a_f(i,2) += static_cast(fztmp); return ev; } @@ -340,18 +343,18 @@ struct PairComputeFunctor { ftmp.z += fz; if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; + a_f(j,0) -= static_cast(fx); + a_f(j,1) -= static_cast(fy); + a_f(j,2) -= static_cast(fz); } } },fsum); Kokkos::single(Kokkos::PerThread(team), [&] () { - a_f(i,0) += fsum.x; - a_f(i,1) += fsum.y; - a_f(i,2) += fsum.z; + a_f(i,0) += static_cast(fsum.x); + a_f(i,1) += static_cast(fsum.y); + a_f(i,2) += static_cast(fsum.z); }); }); @@ -382,9 +385,9 @@ struct PairComputeFunctor { if (NEIGHFLAG == FULL && ZEROFLAG) { Kokkos::single(Kokkos::PerThread(team), [&] () { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + f(i,0) = 0; + f(i,1) = 0; + f(i,2) = 0; }); } @@ -423,18 +426,18 @@ struct PairComputeFunctor { ftmp.z += fz; if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; + a_f(j,0) -= static_cast(fx); + a_f(j,1) -= static_cast(fy); + a_f(j,2) -= static_cast(fz); } } },fsum); Kokkos::single(Kokkos::PerThread(team), [&] () { - a_f(i,0) += fsum.x; - a_f(i,1) += fsum.y; - a_f(i,2) += fsum.z; + a_f(i,0) += static_cast(fsum.x); + a_f(i,1) += static_cast(fsum.y); + a_f(i,2) += static_cast(fsum.z); }); }); } @@ -467,9 +470,9 @@ struct PairComputeFunctor { if (NEIGHFLAG == FULL && ZEROFLAG) { Kokkos::single(Kokkos::PerThread(team), [&] () { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + f(i,0) = 0; + f(i,1) = 0; + f(i,2) = 0; }); } @@ -498,67 +501,57 @@ struct PairComputeFunctor { const KK_FLOAT fy = dely*fpair; const KK_FLOAT fz = delz*fpair; - fev_tmp.f[0] += fx; - fev_tmp.f[1] += fy; - fev_tmp.f[2] += fz; + fev_tmp.f[0] += static_cast(fx); + fev_tmp.f[1] += static_cast(fy); + fev_tmp.f[2] += static_cast(fz); const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD); const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal); - const KK_FLOAT factor = J_CONTRIB?1.0:0.5; + const KK_FLOAT factor = J_CONTRIB?static_cast(1.0):static_cast(0.5); if (J_CONTRIB) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; + a_f(j,0) -= static_cast(fx); + a_f(j,1) -= static_cast(fy); + a_f(j,2) -= static_cast(fz); } KK_FLOAT evdwl = 0.0; if (c.eflag_either) { evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); - fev_tmp.evdwl += factor * evdwl; + fev_tmp.evdwl += static_cast(factor * evdwl); if (c.eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * evdwl; + const KK_FLOAT epairhalf = static_cast(0.5) * evdwl; if (I_CONTRIB) - a_eatom[i] += epairhalf; + a_eatom[i] += static_cast(epairhalf); if (J_CONTRIB) - a_eatom[j] += epairhalf; + a_eatom[j] += static_cast(epairhalf); } } if (c.vflag_either) { - const KK_FLOAT v0 = delx*delx*fpair; - const KK_FLOAT v1 = dely*dely*fpair; - const KK_FLOAT v2 = delz*delz*fpair; - const KK_FLOAT v3 = delx*dely*fpair; - const KK_FLOAT v4 = delx*delz*fpair; - const KK_FLOAT v5 = dely*delz*fpair; - - fev_tmp.v[0] += factor*v0; - fev_tmp.v[1] += factor*v1; - fev_tmp.v[2] += factor*v2; - fev_tmp.v[3] += factor*v3; - fev_tmp.v[4] += factor*v4; - fev_tmp.v[5] += factor*v5; + const KK_FLOAT v_acc[6] = { delx*delx*fpair, + dely*dely*fpair, + delz*delz*fpair, + delx*dely*fpair, + delx*delz*fpair, + dely*delz*fpair }; + + const auto one_half = static_cast(0.5); + + for (int n = 0; n < 6; n++) + fev_tmp.v[n] += static_cast(factor *v_acc[n]); if (c.vflag_atom) { if (I_CONTRIB) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(i, n) += static_cast(one_half * v_acc[n]); } if (J_CONTRIB) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(j, n) += static_cast(one_half * v_acc[n]); } } } @@ -566,20 +559,15 @@ struct PairComputeFunctor { },fev); Kokkos::single(Kokkos::PerThread(team), [&] () { - a_f(i,0) += fev.f[0]; - a_f(i,1) += fev.f[1]; - a_f(i,2) += fev.f[2]; + for (int n = 0; n < 3; n++) + a_f(i,n) += static_cast(fev.f[n]); if (c.eflag_global) ev.evdwl += fev.evdwl; if (c.vflag_global) { - ev.v[0] += fev.v[0]; - ev.v[1] += fev.v[1]; - ev.v[2] += fev.v[2]; - ev.v[3] += fev.v[3]; - ev.v[4] += fev.v[4]; - ev.v[5] += fev.v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += fev.v[n]; } if (NEIGHFLAG == FULL) { @@ -588,12 +576,8 @@ struct PairComputeFunctor { a_eatom(i) += fev.evdwl; if (c.vflag_atom) { - a_vatom(i,0) += fev.v[0]; - a_vatom(i,1) += fev.v[1]; - a_vatom(i,2) += fev.v[2]; - a_vatom(i,3) += fev.v[3]; - a_vatom(i,4) += fev.v[4]; - a_vatom(i,5) += fev.v[5]; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += fev.v[n]; } } }); @@ -666,18 +650,18 @@ struct PairComputeFunctor { const KK_FLOAT fy = dely*fpair; const KK_FLOAT fz = delz*fpair; - fev_tmp.f[0] += fx; - fev_tmp.f[1] += fy; - fev_tmp.f[2] += fz; + fev_tmp.f[0] += static_cast(fx); + fev_tmp.f[1] += static_cast(fy); + fev_tmp.f[2] += static_cast(fz); const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD); const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal); - const KK_FLOAT factor = J_CONTRIB?1.0:0.5; + const KK_FLOAT factor = J_CONTRIB?static_cast(1.0):static_cast(0.5); if (J_CONTRIB) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; + a_f(j,0) -= static_cast(fx); + a_f(j,1) -= static_cast(fy); + a_f(j,2) -= static_cast(fz); } KK_FLOAT evdwl = 0.0; @@ -685,16 +669,16 @@ struct PairComputeFunctor { if (c.eflag_either) { if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) { evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); - fev_tmp.evdwl += factor * evdwl; + fev_tmp.evdwl += static_cast(factor * evdwl); } if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) { ecoul = c.template compute_ecoul(rsq,i,j,itype,jtype,factor_coul,qtmp); - fev_tmp.ecoul += factor * ecoul; + fev_tmp.ecoul += static_cast(factor * ecoul); } if (c.eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * (evdwl + ecoul); + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * (evdwl + ecoul)); if (I_CONTRIB) a_eatom[i] += epairhalf; @@ -705,36 +689,25 @@ struct PairComputeFunctor { } if (c.vflag_either) { - const KK_FLOAT v0 = delx*delx*fpair; - const KK_FLOAT v1 = dely*dely*fpair; - const KK_FLOAT v2 = delz*delz*fpair; - const KK_FLOAT v3 = delx*dely*fpair; - const KK_FLOAT v4 = delx*delz*fpair; - const KK_FLOAT v5 = dely*delz*fpair; - - fev_tmp.v[0] += factor*v0; - fev_tmp.v[1] += factor*v1; - fev_tmp.v[2] += factor*v2; - fev_tmp.v[3] += factor*v3; - fev_tmp.v[4] += factor*v4; - fev_tmp.v[5] += factor*v5; + const KK_FLOAT v_acc[6] = { delx*delx*fpair, + dely*dely*fpair, + delz*delz*fpair, + delx*dely*fpair, + delx*delz*fpair, + dely*delz*fpair }; + const auto one_half = static_cast(0.5); + + for (int n = 0; n < 6; n++) + fev_tmp.v[n] += static_cast(factor * v_acc[n]); if (c.vflag_atom) { if (I_CONTRIB) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += static_cast(one_half * v_acc[n]); } if (J_CONTRIB) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(j,n) += static_cast(one_half * v_acc[n]); } } } @@ -742,9 +715,8 @@ struct PairComputeFunctor { },fev); Kokkos::single(Kokkos::PerThread(team), [&] () { - a_f(i,0) += fev.f[0]; - a_f(i,1) += fev.f[1]; - a_f(i,2) += fev.f[2]; + for (int n = 0; n < 3; n++) + a_f(i,n) += fev.f[n]; if (c.eflag_global) { ev.evdwl += fev.evdwl; @@ -752,12 +724,8 @@ struct PairComputeFunctor { } if (c.vflag_global) { - ev.v[0] += fev.v[0]; - ev.v[1] += fev.v[1]; - ev.v[2] += fev.v[2]; - ev.v[3] += fev.v[3]; - ev.v[4] += fev.v[4]; - ev.v[5] += fev.v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += fev.v[n]; } if (NEIGHFLAG == FULL) { @@ -766,12 +734,8 @@ struct PairComputeFunctor { a_eatom(i) += fev.evdwl + fev.ecoul; if (c.vflag_atom) { - a_vatom(i,0) += fev.v[0]; - a_vatom(i,1) += fev.v[1]; - a_vatom(i,2) += fev.v[2]; - a_vatom(i,3) += fev.v[3]; - a_vatom(i,4) += fev.v[4]; - a_vatom(i,5) += fev.v[5]; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += fev.v[n]; } } }); @@ -793,7 +757,7 @@ struct PairComputeFunctor { if (EFLAG) { if (c.eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * epair; + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * epair); if (NEWTON_PAIR || i < c.nlocal) a_eatom[i] += epairhalf; if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) a_eatom[j] += epairhalf; } @@ -806,60 +770,44 @@ struct PairComputeFunctor { const KK_FLOAT v3 = delx*dely*fpair; const KK_FLOAT v4 = delx*delz*fpair; const KK_FLOAT v5 = dely*delz*fpair; + const auto one_half = static_cast(0.5); + + const KK_ACC_FLOAT v_acc[6] = { static_cast(one_half*v0), + static_cast(one_half*v1), + static_cast(one_half*v2), + static_cast(one_half*v3), + static_cast(one_half*v4), + static_cast(one_half*v5) }; if (c.vflag_global) { if (NEIGHFLAG != FULL) { if (NEWTON_PAIR) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(2) * v_acc[n]; } else { if (i < c.nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_acc[n]; } if (j < c.nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_acc[n]; } } } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_acc[n]; } } if (c.vflag_atom) { if (NEWTON_PAIR || i < c.nlocal) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += v_acc[n]; } if ((NEWTON_PAIR || j < c.nlocal) && NEIGHFLAG != FULL) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(j,n) += v_acc[n]; } } } @@ -961,7 +909,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P lastcall = fpair->lmp->update->ntimestep; vectorsize = GetMaxNeighs(list); if (vectorsize == 0) vectorsize = 1; - vectorsize = MathSpecial::powint(2.0,(int(log2(double(vectorsize)) + 0.5))); // round to nearest power of 2 + vectorsize = static_cast(MathSpecial::powint(2.0,(int(log2(double(vectorsize)) + 0.5)))); // round to nearest power of 2 #if defined(KOKKOS_ENABLE_HIP) int max_vectorsize = 64; @@ -1062,12 +1010,12 @@ struct PairVirialFDotRCompute { KOKKOS_INLINE_FUNCTION void operator()(const int j, value_type &energy_virial) const { const int i = j + offset; - energy_virial.v[0] += f(i,0)*x(i,0); - energy_virial.v[1] += f(i,1)*x(i,1); - energy_virial.v[2] += f(i,2)*x(i,2); - energy_virial.v[3] += f(i,1)*x(i,0); - energy_virial.v[4] += f(i,2)*x(i,0); - energy_virial.v[5] += f(i,2)*x(i,1); + energy_virial.v[0] += f(i,0)*static_cast(x(i,0)); + energy_virial.v[1] += f(i,1)*static_cast(x(i,1)); + energy_virial.v[2] += f(i,2)*static_cast(x(i,2)); + energy_virial.v[3] += f(i,1)*static_cast(x(i,0)); + energy_virial.v[4] += f(i,2)*static_cast(x(i,0)); + energy_virial.v[5] += f(i,2)*static_cast(x(i,1)); } }; @@ -1084,12 +1032,8 @@ void pair_virial_fdotr_compute(PairStyle* fpair) { virial+=virial_ghost; } fpair->vflag_fdotr = 0; - fpair->virial[0] = virial.v[0]; - fpair->virial[1] = virial.v[1]; - fpair->virial[2] = virial.v[2]; - fpair->virial[3] = virial.v[3]; - fpair->virial[4] = virial.v[4]; - fpair->virial[5] = virial.v[5]; + for (int n = 0; n < 6; n++) + fpair->virial[n] = static_cast(virial.v[n]); } } diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index 08ec87499b4..9d51d5dc158 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -103,17 +103,22 @@ void PairLJCharmmCoulLongKokkos::compute(int eflag_in, int vflag_in) type = atomKK->k_type.view(); nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; - special_coul[0] = force->special_coul[0]; - special_coul[1] = force->special_coul[1]; - special_coul[2] = force->special_coul[2]; - special_coul[3] = force->special_coul[3]; - qqrd2e = force->qqrd2e; + special_lj[0] = static_cast(force->special_lj[0]); + special_lj[1] = static_cast(force->special_lj[1]); + special_lj[2] = static_cast(force->special_lj[2]); + special_lj[3] = static_cast(force->special_lj[3]); + special_coul[0] = static_cast(force->special_coul[0]); + special_coul[1] = static_cast(force->special_coul[1]); + special_coul[2] = static_cast(force->special_coul[2]); + special_coul[3] = static_cast(force->special_coul[3]); + qqrd2e = static_cast(force->qqrd2e); newton_pair = force->newton_pair; + g_ewald_kk = static_cast(g_ewald); + denom_lj_inv_kk = static_cast(1.0 / denom_lj); + cut_ljsq_kk = static_cast(cut_ljsq); + cut_lj_innersq_kk = static_cast(cut_lj_innersq); + // loop over neighbors of my atoms copymode = 1; @@ -128,16 +133,16 @@ void PairLJCharmmCoulLongKokkos::compute(int eflag_in, int vflag_in) if (eflag) { - eng_vdwl += ev.evdwl; - eng_coul += ev.ecoul; + eng_vdwl += static_cast(ev.evdwl); + eng_coul += static_cast(ev.ecoul); } if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -164,7 +169,7 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCharmmCoulLongKokkos:: compute_fpair(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; KK_FLOAT forcelj, switch1, switch2, englj; @@ -172,10 +177,10 @@ compute_fpair(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; - switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj; + if (rsq > cut_lj_innersq_kk) { + switch1 = (cut_ljsq_kk-rsq) * (cut_ljsq_kk-rsq) * + (cut_ljsq_kk + static_cast(2.0)*rsq - static_cast(3.0)*cut_lj_innersq_kk) * denom_lj_inv_kk; + switch2 = static_cast(12.0)*rsq * (cut_ljsq_kk-rsq) * (rsq-cut_lj_innersq_kk) * denom_lj_inv_kk; englj = r6inv * ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); @@ -194,7 +199,7 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCharmmCoulLongKokkos:: compute_evdwl(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; KK_FLOAT englj, switch1; @@ -202,9 +207,9 @@ compute_evdwl(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)); - if (rsq > cut_lj_innersq) { - switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * - (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + if (rsq > cut_lj_innersq_kk) { + switch1 = (cut_ljsq_kk-rsq) * (cut_ljsq_kk-rsq) * + (cut_ljsq_kk + static_cast(2.0)*rsq - static_cast(3.0)*cut_lj_innersq_kk) * denom_lj_inv_kk; englj *= switch1; } return englj; @@ -220,29 +225,29 @@ KK_FLOAT PairLJCharmmCoulLongKokkos:: compute_fcoul(const KK_FLOAT& rsq, const int& /*i*/, const int&j, const int& /*itype*/, const int& /*jtype*/, const KK_FLOAT& factor_coul, const KK_FLOAT& qtmp) const { - if (Specialisation::DoTable && rsq > tabinnersq) { + if (Specialisation::DoTable && rsq > tabinnersq_kk) { union_int_float_t rsq_lookup; rsq_lookup.f = rsq; const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; const KK_FLOAT fraction = ((KK_FLOAT)rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; const KK_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; KK_FLOAT forcecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { + if (factor_coul < static_cast(1.0)) { const KK_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; const KK_FLOAT prefactor = qtmp*q[j] * table; - forcecoul -= (1.0-factor_coul)*prefactor; + forcecoul -= (static_cast(1.0)-factor_coul)*prefactor; } return forcecoul/rsq; } else { const KK_FLOAT r = sqrt(rsq); - const KK_FLOAT grij = g_ewald * r; + const KK_FLOAT grij = g_ewald_kk * r; const KK_FLOAT expm2 = exp(-grij*grij); - const KK_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); - const KK_FLOAT rinv = 1.0/r; - const KK_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const KK_FLOAT t = static_cast(1.0) / (static_cast(1.0) + static_cast(EWALD_P)*grij); + const KK_FLOAT rinv = static_cast(1.0) / r; + const KK_FLOAT erfc = t * (static_cast(A1)+t*(static_cast(A2)+t*(static_cast(A3)+t*(static_cast(A4)+t*static_cast(A5))))) * expm2; const KK_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; - KK_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + KK_FLOAT forcecoul = prefactor * (erfc + static_cast(EWALD_F)*grij*expm2); + if (factor_coul < static_cast(1.0)) forcecoul -= (static_cast(1.0)-factor_coul)*prefactor; return forcecoul*rinv*rinv; } @@ -257,28 +262,28 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCharmmCoulLongKokkos:: compute_ecoul(const KK_FLOAT& rsq, const int& /*i*/, const int&j, const int& /*itype*/, const int& /*jtype*/, const KK_FLOAT& factor_coul, const KK_FLOAT& qtmp) const { - if (Specialisation::DoTable && rsq > tabinnersq) { + if (Specialisation::DoTable && rsq > tabinnersq_kk) { union_int_float_t rsq_lookup; rsq_lookup.f = rsq; const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; const KK_FLOAT fraction = ((KK_FLOAT)rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; const KK_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; KK_FLOAT ecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { + if (factor_coul < static_cast(1.0)) { const KK_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; const KK_FLOAT prefactor = qtmp*q[j] * table; - ecoul -= (1.0-factor_coul)*prefactor; + ecoul -= (static_cast(1.0)-factor_coul)*prefactor; } return ecoul; } else { const KK_FLOAT r = sqrt(rsq); - const KK_FLOAT grij = g_ewald * r; + const KK_FLOAT grij = g_ewald_kk * r; const KK_FLOAT expm2 = exp(-grij*grij); - const KK_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); - const KK_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const KK_FLOAT t = static_cast(1.0) / (static_cast(1.0) + static_cast(EWALD_P)*grij); + const KK_FLOAT erfc = t * (static_cast(A1)+t*(static_cast(A2)+t*(static_cast(A3)+t*(static_cast(A4)+t*static_cast(A5))))) * expm2; const KK_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; KK_FLOAT ecoul = prefactor * erfc; - if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + if (factor_coul < static_cast(1.0)) ecoul -= (static_cast(1.0)-factor_coul)*prefactor; return ecoul; } } @@ -316,14 +321,14 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double int ntable = 1; for (int i = 0; i < ncoultablebits; i++) ntable *= 2; - + tabinnersq_kk = static_cast(tabinnersq); // Copy rtable and drtable { host_table_type h_table("HostTable",ntable); table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = rtable[i]; + h_table(i) = static_cast(rtable[i]); } Kokkos::deep_copy(d_table,h_table); d_rtable = d_table; @@ -333,7 +338,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double host_table_type h_table("HostTable",ntable); table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = drtable[i]; + h_table(i) = static_cast(drtable[i]); } Kokkos::deep_copy(d_table,h_table); d_drtable = d_table; @@ -345,7 +350,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double // Copy ftable and dftable for (int i = 0; i < ntable; i++) { - h_table(i) = ftable[i]; + h_table(i) = static_cast(ftable[i]); } Kokkos::deep_copy(d_table,h_table); d_ftable = d_table; @@ -356,7 +361,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = dftable[i]; + h_table(i) = static_cast(dftable[i]); } Kokkos::deep_copy(d_table,h_table); d_dftable = d_table; @@ -368,7 +373,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double // Copy ctable and dctable for (int i = 0; i < ntable; i++) { - h_table(i) = ctable[i]; + h_table(i) = static_cast(ctable[i]); } Kokkos::deep_copy(d_table,h_table); d_ctable = d_table; @@ -379,7 +384,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = dctable[i]; + h_table(i) = static_cast(dctable[i]); } Kokkos::deep_copy(d_table,h_table); d_dctable = d_table; @@ -391,7 +396,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double // Copy etable and detable for (int i = 0; i < ntable; i++) { - h_table(i) = etable[i]; + h_table(i) = static_cast(etable[i]); } Kokkos::deep_copy(d_table,h_table); d_etable = d_table; @@ -402,7 +407,7 @@ void PairLJCharmmCoulLongKokkos::init_tables(double cut_coul, double table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = detable[i]; + h_table(i) = static_cast(detable[i]); } Kokkos::deep_copy(d_table,h_table); d_detable = d_table; @@ -418,8 +423,8 @@ void PairLJCharmmCoulLongKokkos::init_style() { PairLJCharmmCoulLong::init_style(); - Kokkos::deep_copy(d_cut_ljsq,cut_ljsq); - Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + Kokkos::deep_copy(d_cut_ljsq,static_cast(cut_ljsq)); + Kokkos::deep_copy(d_cut_coulsq,static_cast(cut_coulsq)); // error if rRESPA with inner levels @@ -450,20 +455,20 @@ double PairLJCharmmCoulLongKokkos::init_one(int i, int j) { double cutone = PairLJCharmmCoulLong::init_one(i,j); - k_params.view_host()(i,j).lj1 = lj1[i][j]; - k_params.view_host()(i,j).lj2 = lj2[i][j]; - k_params.view_host()(i,j).lj3 = lj3[i][j]; - k_params.view_host()(i,j).lj4 = lj4[i][j]; + k_params.view_host()(i,j).lj1 = static_cast(lj1[i][j]); + k_params.view_host()(i,j).lj2 = static_cast(lj2[i][j]); + k_params.view_host()(i,j).lj3 = static_cast(lj3[i][j]); + k_params.view_host()(i,j).lj4 = static_cast(lj4[i][j]); //k_params.view_host()(i,j).offset = offset[i][j]; - k_params.view_host()(i,j).cut_ljsq = cut_ljsq; - k_params.view_host()(i,j).cut_coulsq = cut_coulsq; + k_params.view_host()(i,j).cut_ljsq = static_cast(cut_ljsq); + k_params.view_host()(i,j).cut_coulsq = static_cast(cut_coulsq); k_params.view_host()(j,i) = k_params.view_host()(i,j); if (i(cutone*cutone); + m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = static_cast(cut_ljsq); + m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = static_cast(cut_coulsq); } k_cutsq.view_host()(i,j) = k_cutsq.view_host()(j,i) = cutone*cutone; diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h index dfee4ab4c12..08e9d3b8a8b 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h @@ -103,6 +103,11 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong { KK_FLOAT special_coul[4]; KK_FLOAT special_lj[4]; KK_FLOAT qqrd2e; + KK_FLOAT g_ewald_kk; + KK_FLOAT denom_lj_inv_kk; + KK_FLOAT cut_lj_innersq_kk; + KK_FLOAT cut_ljsq_kk; + KK_FLOAT tabinnersq_kk; void allocate() override; diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp index 300dbf76400..30bbfd1ef3e 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -103,17 +103,19 @@ void PairLJCutCoulLongKokkos::compute(int eflag_in, int vflag_in) type = atomKK->k_type.view(); nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; - special_coul[0] = force->special_coul[0]; - special_coul[1] = force->special_coul[1]; - special_coul[2] = force->special_coul[2]; - special_coul[3] = force->special_coul[3]; - qqrd2e = force->qqrd2e; + special_lj[0] = static_cast(force->special_lj[0]); + special_lj[1] = static_cast(force->special_lj[1]); + special_lj[2] = static_cast(force->special_lj[2]); + special_lj[3] = static_cast(force->special_lj[3]); + special_coul[0] = static_cast(force->special_coul[0]); + special_coul[1] = static_cast(force->special_coul[1]); + special_coul[2] = static_cast(force->special_coul[2]); + special_coul[3] = static_cast(force->special_coul[3]); + qqrd2e = static_cast(force->qqrd2e); newton_pair = force->newton_pair; + g_ewald_kk = static_cast(g_ewald); + // loop over neighbors of my atoms EV_FLOAT ev; @@ -125,17 +127,17 @@ void PairLJCutCoulLongKokkos::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos*)list); if (eflag) { - eng_vdwl += ev.evdwl; - eng_coul += ev.ecoul; + eng_vdwl += static_cast(ev.evdwl); + eng_coul += static_cast(ev.ecoul); } if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -161,7 +163,7 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCutCoulLongKokkos:: compute_fpair(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; KK_FLOAT forcelj; @@ -181,29 +183,31 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCutCoulLongKokkos:: compute_fcoul(const KK_FLOAT& rsq, const int& /*i*/, const int&j, const int& /*itype*/, const int& /*jtype*/, const KK_FLOAT& factor_coul, const KK_FLOAT& qtmp) const { - if (Specialisation::DoTable && rsq > tabinnersq) { + if (Specialisation::DoTable && rsq > tabinnersq_kk) { union_int_float_t rsq_lookup; rsq_lookup.f = rsq; const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; const KK_FLOAT fraction = ((KK_FLOAT)rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; const KK_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable]; KK_FLOAT forcecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { + if (factor_coul < static_cast(1.0)) { const KK_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; const KK_FLOAT prefactor = qtmp*q[j] * table; - forcecoul -= (1.0-factor_coul)*prefactor; + forcecoul -= (static_cast(1.0)-factor_coul)*prefactor; } return forcecoul/rsq; } else { const KK_FLOAT r = sqrt(rsq); - const KK_FLOAT grij = g_ewald * r; + const KK_FLOAT grij = g_ewald_kk * r; const KK_FLOAT expm2 = exp(-grij*grij); - const KK_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); - const KK_FLOAT rinv = 1.0/r; - const KK_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const KK_FLOAT t = static_cast(1.0) / (static_cast(1.0) + static_cast(EWALD_P)*grij); + const KK_FLOAT rinv = static_cast(1.0) / r; + const KK_FLOAT erfc = t * (static_cast(A1)+t*(static_cast(A2)+ + t * (static_cast(A3)+t*(static_cast(A4)+ + t * static_cast(A5))))) * expm2; const KK_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv; - KK_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + KK_FLOAT forcecoul = prefactor * (erfc + static_cast(EWALD_F)*grij*expm2); + if (factor_coul < static_cast(1.0)) forcecoul -= (static_cast(1.0)-factor_coul)*prefactor; return forcecoul*rinv*rinv; } @@ -218,7 +222,7 @@ KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCutCoulLongKokkos:: compute_evdwl(const KK_FLOAT& rsq, const int& /*i*/, const int& /*j*/, const int& itype, const int& jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; return r6inv* @@ -238,28 +242,30 @@ KK_FLOAT PairLJCutCoulLongKokkos:: compute_ecoul(const KK_FLOAT& rsq, const int& /*i*/, const int&j, const int& /*itype*/, const int& /*jtype*/, const KK_FLOAT& factor_coul, const KK_FLOAT& qtmp) const { - if (Specialisation::DoTable && rsq > tabinnersq) { + if (Specialisation::DoTable && rsq > tabinnersq_kk) { union_int_float_t rsq_lookup; rsq_lookup.f = rsq; const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; const KK_FLOAT fraction = ((KK_FLOAT)rsq_lookup.f - d_rtable[itable]) * d_drtable[itable]; const KK_FLOAT table = d_etable[itable] + fraction*d_detable[itable]; KK_FLOAT ecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { + if (factor_coul < static_cast(1.0)) { const KK_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable]; const KK_FLOAT prefactor = qtmp*q[j] * table; - ecoul -= (1.0-factor_coul)*prefactor; + ecoul -= (static_cast(1.0)-factor_coul)*prefactor; } return ecoul; } else { const KK_FLOAT r = sqrt(rsq); - const KK_FLOAT grij = g_ewald * r; + const KK_FLOAT grij = g_ewald_kk * r; const KK_FLOAT expm2 = exp(-grij*grij); - const KK_FLOAT t = 1.0 / (1.0 + EWALD_P*grij); - const KK_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const KK_FLOAT t = static_cast(1.0) / (static_cast(1.0) + static_cast(EWALD_P)*grij); + const KK_FLOAT erfc = t * (static_cast(A1)+t*(static_cast(A2)+ + t * (static_cast(A3)+t*(static_cast(A4)+ + t * static_cast(A5))))) * expm2; const KK_FLOAT prefactor = qqrd2e * qtmp*q[j]/r; KK_FLOAT ecoul = prefactor * erfc; - if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + if (factor_coul < static_cast(1.0)) ecoul -= (static_cast(1.0)-factor_coul)*prefactor; return ecoul; } } @@ -299,13 +305,14 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c int ntable = 1; for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + tabinnersq_kk = static_cast(tabinnersq); // Copy rtable and drtable { host_table_type h_table("HostTable",ntable); table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = rtable[i]; + h_table(i) = static_cast(rtable[i]); } Kokkos::deep_copy(d_table,h_table); d_rtable = d_table; @@ -315,7 +322,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c host_table_type h_table("HostTable",ntable); table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = drtable[i]; + h_table(i) = static_cast(drtable[i]); } Kokkos::deep_copy(d_table,h_table); d_drtable = d_table; @@ -327,7 +334,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c // Copy ftable and dftable for (int i = 0; i < ntable; i++) { - h_table(i) = ftable[i]; + h_table(i) = static_cast(ftable[i]); } Kokkos::deep_copy(d_table,h_table); d_ftable = d_table; @@ -338,7 +345,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = dftable[i]; + h_table(i) = static_cast(dftable[i]); } Kokkos::deep_copy(d_table,h_table); d_dftable = d_table; @@ -350,7 +357,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c // Copy ctable and dctable for (int i = 0; i < ntable; i++) { - h_table(i) = ctable[i]; + h_table(i) = static_cast(ctable[i]); } Kokkos::deep_copy(d_table,h_table); d_ctable = d_table; @@ -361,7 +368,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = dctable[i]; + h_table(i) = static_cast(dctable[i]); } Kokkos::deep_copy(d_table,h_table); d_dctable = d_table; @@ -373,7 +380,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c // Copy etable and detable for (int i = 0; i < ntable; i++) { - h_table(i) = etable[i]; + h_table(i) = static_cast(etable[i]); } Kokkos::deep_copy(d_table,h_table); d_etable = d_table; @@ -384,7 +391,7 @@ void PairLJCutCoulLongKokkos::init_tables(double cut_coul, double *c table_type d_table("DeviceTable",ntable); for (int i = 0; i < ntable; i++) { - h_table(i) = detable[i]; + h_table(i) = static_cast(detable[i]); } Kokkos::deep_copy(d_table,h_table); d_detable = d_table; @@ -400,7 +407,7 @@ void PairLJCutCoulLongKokkos::init_style() { PairLJCutCoulLong::init_style(); - Kokkos::deep_copy(d_cut_coulsq,cut_coulsq); + Kokkos::deep_copy(d_cut_coulsq,static_cast(cut_coulsq)); // error if rRESPA with inner levels @@ -432,20 +439,20 @@ double PairLJCutCoulLongKokkos::init_one(int i, int j) double cutone = PairLJCutCoulLong::init_one(i,j); double cut_ljsqm = cut_ljsq[i][j]; - k_params.view_host()(i,j).lj1 = lj1[i][j]; - k_params.view_host()(i,j).lj2 = lj2[i][j]; - k_params.view_host()(i,j).lj3 = lj3[i][j]; - k_params.view_host()(i,j).lj4 = lj4[i][j]; - k_params.view_host()(i,j).offset = offset[i][j]; - k_params.view_host()(i,j).cut_ljsq = cut_ljsqm; - k_params.view_host()(i,j).cut_coulsq = cut_coulsq; + k_params.view_host()(i,j).lj1 = static_cast(lj1[i][j]); + k_params.view_host()(i,j).lj2 = static_cast(lj2[i][j]); + k_params.view_host()(i,j).lj3 = static_cast(lj3[i][j]); + k_params.view_host()(i,j).lj4 = static_cast(lj4[i][j]); + k_params.view_host()(i,j).offset = static_cast(offset[i][j]); + k_params.view_host()(i,j).cut_ljsq = static_cast(cut_ljsqm); + k_params.view_host()(i,j).cut_coulsq = static_cast(cut_coulsq); k_params.view_host()(j,i) = k_params.view_host()(i,j); if (i(cutone*cutone); + m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = static_cast(cut_ljsqm); + m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = static_cast(cut_coulsq); } k_cutsq.view_host()(i,j) = k_cutsq.view_host()(j,i) = cutone*cutone; diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h index 2b935db4085..9b45a6b6151 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h @@ -104,6 +104,8 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong { KK_FLOAT special_coul[4]; KK_FLOAT special_lj[4]; KK_FLOAT qqrd2e; + KK_FLOAT g_ewald_kk; + KK_FLOAT tabinnersq_kk; void allocate() override; friend struct PairComputeFunctor>; diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index 618f2240833..f60622ce5bb 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -93,10 +93,10 @@ void PairLJCutKokkos::compute(int eflag_in, int vflag_in) nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; newton_pair = force->newton_pair; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; + special_lj[0] = static_cast(force->special_lj[0]); + special_lj[1] = static_cast(force->special_lj[1]); + special_lj[2] = static_cast(force->special_lj[2]); + special_lj[3] = static_cast(force->special_lj[3]); // loop over neighbors of my atoms @@ -104,14 +104,14 @@ void PairLJCutKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); - if (eflag_global) eng_vdwl += ev.evdwl; + if (eflag_global) eng_vdwl += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -134,7 +134,7 @@ template KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCutKokkos:: compute_fpair(const KK_FLOAT &rsq, const int &, const int &, const int &itype, const int &jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; const KK_FLOAT forcelj = r6inv * @@ -149,7 +149,7 @@ template KOKKOS_INLINE_FUNCTION KK_FLOAT PairLJCutKokkos:: compute_evdwl(const KK_FLOAT &rsq, const int &, const int &, const int &itype, const int &jtype) const { - const KK_FLOAT r2inv = 1.0/rsq; + const KK_FLOAT r2inv = static_cast(1.0) / rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv - @@ -212,16 +212,16 @@ double PairLJCutKokkos::init_one(int i, int j) { double cutone = PairLJCut::init_one(i,j); - k_params.view_host()(i,j).lj1 = lj1[i][j]; - k_params.view_host()(i,j).lj2 = lj2[i][j]; - k_params.view_host()(i,j).lj3 = lj3[i][j]; - k_params.view_host()(i,j).lj4 = lj4[i][j]; - k_params.view_host()(i,j).offset = offset[i][j]; - k_params.view_host()(i,j).cutsq = cutone*cutone; + k_params.view_host()(i,j).lj1 = static_cast(lj1[i][j]); + k_params.view_host()(i,j).lj2 = static_cast(lj2[i][j]); + k_params.view_host()(i,j).lj3 = static_cast(lj3[i][j]); + k_params.view_host()(i,j).lj4 = static_cast(lj4[i][j]); + k_params.view_host()(i,j).offset = static_cast(offset[i][j]); + k_params.view_host()(i,j).cutsq = static_cast(cutone*cutone); k_params.view_host()(j,i) = k_params.view_host()(i,j); if (i(cutone*cutone); } k_cutsq.view_host()(i,j) = k_cutsq.view_host()(j,i) = cutone*cutone; From f911a71576e680ecade297b897fc0f995cd32a9f Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:14:11 -0700 Subject: [PATCH 095/604] Removed silent conversions from pair_eam_kokkos.*, as relevant for EAM. --- src/KOKKOS/pair_eam_kokkos.cpp | 279 ++++++++++++++++----------------- src/KOKKOS/pair_eam_kokkos.h | 4 + 2 files changed, 136 insertions(+), 147 deletions(-) diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index f59432615bf..5f7d890f859 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -117,6 +117,9 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; inum = list->inum; + // make sure this is set appropriately + cutforcesq_kk = static_cast(cutforcesq); + need_dup = lmp->kokkos->need_dup(); if (need_dup) { dup_rho = Kokkos::Experimental::create_scatter_view(d_rho); @@ -130,6 +133,7 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } + rhomax_kk = static_cast(rhomax); copymode = 1; // zero out density @@ -194,8 +198,8 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) } if (eflag) { - eng_vdwl += ev.evdwl; - ev.evdwl = 0.0; + eng_vdwl += static_cast(ev.evdwl); + ev.evdwl = 0; } // communicate derivative of embedding function @@ -275,14 +279,14 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) if (need_dup) Kokkos::Experimental::contribute(f, dup_f); - if (eflag_global) eng_vdwl += ev.evdwl; + if (eflag_global) eng_vdwl += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (vflag_fdotr) pair_virial_fdotr_compute(this); @@ -381,6 +385,9 @@ void PairEAMKokkos::array2spline() rdr = 1.0/dr; rdrho = 1.0/drho; + rdr_kk = static_cast(rdr); + rdrho_kk = static_cast(rdrho); + tdual_kkfloat_2d_n7 k_frho_spline = tdual_kkfloat_2d_n7("pair:frho",nfrho,nrho+1); tdual_kkfloat_2d_n7 k_rhor_spline = tdual_kkfloat_2d_n7("pair:rhor",nrhor,nr+1); tdual_kkfloat_2d_n7 k_z2r_spline = tdual_kkfloat_2d_n7("pair:z2r",nz2r,nr+1); @@ -414,31 +421,32 @@ void PairEAMKokkos::array2spline() template void PairEAMKokkos::interpolate(int n, double delta, double *f, t_hostkkfloat_2d_n7 h_spline, int i) { - for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m]; + for (int m = 1; m <= n; m++) h_spline(i,m,6) = static_cast(f[m]); h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6); - h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6)); - h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6)); + h_spline(i,2,5) = static_cast(0.5) * (h_spline(i,3,6)-h_spline(i,1,6)); + h_spline(i,n-1,5) = static_cast(0.5) * (h_spline(i,n,6)-h_spline(i,n-2,6)); h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6); for (int m = 3; m <= n-2; m++) h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) + - 8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0; + static_cast(8.0)*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / static_cast(12.0); for (int m = 1; m <= n-1; m++) { - h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) - - 2.0*h_spline(i,m,5) - h_spline(i,m+1,5); + h_spline(i,m,4) = static_cast(3.0)*(h_spline(i,m+1,6)-h_spline(i,m,6)) - + static_cast(2.0)*h_spline(i,m,5) - h_spline(i,m+1,5); h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) - - 2.0*(h_spline(i,m+1,6)-h_spline(i,m,6)); + static_cast(2.0)*(h_spline(i,m+1,6)-h_spline(i,m,6)); } - h_spline(i,n,4) = 0.0; - h_spline(i,n,3) = 0.0; + h_spline(i,n,4) = 0; + h_spline(i,n,3) = 0; + KK_FLOAT inv_delta_float = static_cast(1.0 / delta); for (int m = 1; m <= n; m++) { - h_spline(i,m,2) = h_spline(i,m,5)/delta; - h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta; - h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta; + h_spline(i,m,2) = h_spline(i,m,5)*inv_delta_float; + h_spline(i,m,1) = static_cast(2.0)*h_spline(i,m,4)*inv_delta_float; + h_spline(i,m,0) = static_cast(3.0)*h_spline(i,m,3)*inv_delta_float; } } @@ -459,7 +467,7 @@ template KOKKOS_INLINE_FUNCTION void PairEAMKokkos::operator()(TagPairEAMPackForwardComm, const int &i) const { int j = d_sendlist(i); - v_buf[i] = d_fp[j]; + v_buf[i] = static_cast(d_fp[j]); } /* ---------------------------------------------------------------------- */ @@ -475,7 +483,7 @@ void PairEAMKokkos::unpack_forward_comm_kokkos(int n, int first_in, template KOKKOS_INLINE_FUNCTION void PairEAMKokkos::operator()(TagPairEAMUnpackForwardComm, const int &i) const { - d_fp[i + first] = v_buf[i]; + d_fp[i + first] = static_cast(v_buf[i]); } /* ---------------------------------------------------------------------- */ @@ -490,7 +498,7 @@ int PairEAMKokkos::pack_forward_comm(int n, int *list, double *buf, for (i = 0; i < n; i++) { j = list[i]; - buf[i] = h_fp[j]; + buf[i] = static_cast(h_fp[j]); } return n; } @@ -503,7 +511,7 @@ void PairEAMKokkos::unpack_forward_comm(int n, int first, double *bu k_fp.sync_host(); for (int i = 0; i < n; i++) { - h_fp[i + first] = buf[i]; + h_fp[i + first] = static_cast(buf[i]); } k_fp.modify_host(); @@ -520,7 +528,7 @@ int PairEAMKokkos::pack_reverse_comm(int n, int first, double *buf) m = 0; last = first + n; - for (i = first; i < last; i++) buf[m++] = h_rho[i]; + for (i = first; i < last; i++) buf[m++] = static_cast(h_rho[i]); return m; } @@ -536,7 +544,7 @@ void PairEAMKokkos::unpack_reverse_comm(int n, int *list, double *bu m = 0; for (i = 0; i < n; i++) { j = list[i]; - h_rho[j] += buf[m++]; + h_rho[j] += static_cast(buf[m++]); } k_rho.modify_host(); @@ -547,7 +555,7 @@ void PairEAMKokkos::unpack_reverse_comm(int n, int *list, double *bu template KOKKOS_INLINE_FUNCTION void PairEAMKokkos::operator()(TagPairEAMInitialize, const int &i) const { - d_rho[i] = 0.0; + d_rho[i] = 0; } /* ---------------------------------------------------------------------- */ @@ -585,15 +593,15 @@ void PairEAMKokkos::operator()(TagPairEAMKernelA(1.0); int m = static_cast (p); m = MIN(m,nr-1); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); const int d_type2rhor_ji = d_type2rhor(jtype,itype); - rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + - d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + rhotmp += static_cast(((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6)); if (NEWTON_PAIR || j < nlocal) { const int d_type2rhor_ij = d_type2rhor(itype,jtype); a_rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p + @@ -602,7 +610,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelA(rhotmp); } /* ---------------------------------------------------------------------- */ @@ -621,19 +629,19 @@ void PairEAMKokkos::operator()(TagPairEAMKernelB, const int & const int i = d_ilist[ii]; const int itype = type(i); - KK_FLOAT p = d_rho[i]*rdrho + 1.0; + KK_FLOAT p = d_rho[i]*rdrho_kk + static_cast(1.0); int m = static_cast (p); m = MAX(1,MIN(m,nrho-1)); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); const int d_type2frho_i = d_type2frho[itype]; d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); if (EFLAG) { KK_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); - if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); - if (eflag_global) ev.evdwl += phi; - if (eflag_atom) d_eatom[i] += phi; + if (d_rho[i] > rhomax_kk) phi += d_fp[i] * (d_rho[i]-rhomax_kk); + if (eflag_global) ev.evdwl += static_cast(phi); + if (eflag_atom) d_eatom[i] += static_cast(phi); } } @@ -664,7 +672,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int const int jnum = d_numneigh[i]; - KK_ACC_FLOAT rhotmp = 0.0; + KK_ACC_FLOAT rhotmp = 0; for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); @@ -676,38 +684,38 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int const int jtype = type(j); const KK_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutforcesq) { - KK_FLOAT p = sqrt(rsq)*rdr + 1.0; + if (rsq < cutforcesq_kk) { + KK_FLOAT p = sqrt(rsq)*rdr_kk + static_cast(1.0); int m = static_cast (p); m = MIN(m,nr-1); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); const int d_type2rhor_ji = d_type2rhor(jtype,itype); - rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + - d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + rhotmp += static_cast(((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6)); } } - d_rho[i] += rhotmp; + d_rho[i] += static_cast(rhotmp); // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), // will exceed table, so add linear term to conserve energy - KK_FLOAT p = d_rho[i]*rdrho + 1.0; + KK_FLOAT p = d_rho[i]*rdrho_kk + static_cast(1.0); int m = static_cast (p); m = MAX(1,MIN(m,nrho-1)); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); const int d_type2frho_i = d_type2frho[itype]; d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); if (EFLAG) { KK_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); - if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); - if (eflag_global) ev.evdwl += phi; - if (eflag_atom) d_eatom[i] += phi; + if (d_rho[i] > rhomax_kk) phi += d_fp[i] * (d_rho[i]-rhomax_kk); + if (eflag_global) ev.evdwl += static_cast(phi); + if (eflag_atom) d_eatom[i] += static_cast(phi); } } @@ -741,9 +749,9 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC::operator()(TagPairEAMKernelC(1.0); int m = static_cast (p); m = MIN(m,nr-1); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); // rhoip = derivative of (density at atom j due to atom i) // rhojp = derivative of (density at atom i due to atom j) @@ -785,30 +793,30 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC(3.0)*rdr_kk*z2r_spline_3*p + static_cast(2.0)*rdr_kk*z2r_spline_4)*p + + rdr_kk*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function const KK_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p + z2r_spline_5)*p + z2r_spline_6; - const KK_FLOAT recip = 1.0/r; + const KK_FLOAT recip = static_cast(1.0)/r; const KK_FLOAT phi = z2*recip; const KK_FLOAT phip = z2p*recip - phi*recip; const KK_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; const KK_FLOAT fpair = -psip*recip; - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; + fxtmp += static_cast(delx*fpair); + fytmp += static_cast(dely*fpair); + fztmp += static_cast(delz*fpair); if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { - a_f(j,0) -= delx*fpair; - a_f(j,1) -= dely*fpair; - a_f(j,2) -= delz*fpair; + a_f(j,0) -= static_cast(delx*fpair); + a_f(j,1) -= static_cast(dely*fpair); + a_f(j,2) -= static_cast(delz*fpair); } if (EVFLAG) { if (eflag) { - ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j((((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j(1.0):static_cast(0.5))*phi); } if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,phi,fpair,delx,dely,delz); @@ -865,7 +873,7 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int jnum = d_numneigh[i]; - KK_ACC_FLOAT rhotmp = 0.0; + KK_ACC_FLOAT rhotmp = 0; for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); @@ -877,42 +885,42 @@ void PairEAMKokkos::operator()(TagPairEAMKernelAB, const int jtype = type(j); const KK_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutforcesq) { - KK_FLOAT p = sqrt(rsq)*rdr + 1.0; + if (rsq < cutforcesq_kk) { + KK_FLOAT p = sqrt(rsq)*rdr_kk + static_cast(1.0); int m = static_cast (p); m = MIN(m,nr-1); p -= m; - p = MIN(p,1.0); + p = MIN(p,static_cast(1.0)); const int d_type2rhor_ji = d_type2rhor(jtype,itype); if (d_type2rhor_ji == 0 && d_rhor_spline_cached == 1) { - rhotmp += ((A(m,3)*p + A(m,4))*p + - A(m,5))*p + A(m,6); + rhotmp += static_cast(((A(m,3)*p + A(m,4))*p + + A(m,5))*p + A(m,6)); } else - rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + - d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + rhotmp += static_cast(((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6)); } } - d_rho[i] += rhotmp; + d_rho[i] += static_cast(rhotmp); // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), // will exceed table, so add linear term to conserve energy - KK_FLOAT p = d_rho[i]*rdrho + 1.0; + KK_FLOAT p = d_rho[i]*rdrho_kk + static_cast(1.0); int m = static_cast (p); m = MAX(1,MIN(m,nrho-1)); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); const int d_type2frho_i = d_type2frho[itype]; d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); if (EFLAG) { KK_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); - if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); - if (eflag_global) ev.evdwl += phi; - if (eflag_atom) d_eatom[i] += phi; + if (d_rho[i] > rhomax_kk) phi += d_fp[i] * (d_rho[i]-rhomax_kk); + if (eflag_global) ev.evdwl += static_cast(phi); + if (eflag_atom) d_eatom[i] += static_cast(phi); } } } @@ -966,9 +974,9 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC::operator()(TagPairEAMKernelC(1.0); int m = static_cast (p); m = MIN(m,nr-1); - p -= m; - p = MIN(p,1.0); + p -= static_cast(m); + p = MIN(p,static_cast(1.0)); // rhoip = derivative of (density at atom j due to atom i) // rhojp = derivative of (density at atom i due to atom j) @@ -1011,30 +1019,30 @@ void PairEAMKokkos::operator()(TagPairEAMKernelC(3.0)*rdr_kk*z2r_spline_3*p + static_cast(2.0)*rdr_kk*z2r_spline_4)*p + + rdr_kk*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function const KK_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p + z2r_spline_5)*p + z2r_spline_6; - const KK_FLOAT recip = 1.0/r; + const KK_FLOAT recip = static_cast(1.0)/r; const KK_FLOAT phi = z2*recip; const KK_FLOAT phip = z2p*recip - phi*recip; const KK_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; const KK_FLOAT fpair = -psip*recip; - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; + fxtmp += static_cast(delx*fpair); + fytmp += static_cast(dely*fpair); + fztmp += static_cast(delz*fpair); if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { - a_f(j,0) -= delx*fpair; - a_f(j,1) -= dely*fpair; - a_f(j,2) -= delz*fpair; + a_f(j,0) -= static_cast(delx*fpair); + a_f(j,1) -= static_cast(dely*fpair); + a_f(j,2) -= static_cast(delz*fpair); } if (EVFLAG) { if (eflag) { - ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j((((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j(1.0):static_cast(0.5))*phi); } if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,phi,fpair,delx,dely,delz); @@ -1081,7 +1089,7 @@ void PairEAMKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & if (EFLAG) { if (eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * epair; + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * epair); if (NEIGHFLAG!=FULL) { if (NEWTON_PAIR || i < nlocal) a_eatom[i] += epairhalf; if (NEWTON_PAIR || j < nlocal) a_eatom[j] += epairhalf; @@ -1092,66 +1100,43 @@ void PairEAMKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & } if (VFLAG) { - const KK_FLOAT v0 = delx*delx*fpair; - const KK_FLOAT v1 = dely*dely*fpair; - const KK_FLOAT v2 = delz*delz*fpair; - const KK_FLOAT v3 = delx*dely*fpair; - const KK_FLOAT v4 = delx*delz*fpair; - const KK_FLOAT v5 = dely*delz*fpair; + const KK_ACC_FLOAT v_half_acc[6] = + { static_cast(static_cast(0.5)*delx*delx*fpair), + static_cast(static_cast(0.5)*dely*dely*fpair), + static_cast(static_cast(0.5)*delz*delz*fpair), + static_cast(static_cast(0.5)*delx*dely*fpair), + static_cast(static_cast(0.5)*delx*delz*fpair), + static_cast(static_cast(0.5)*dely*delz*fpair) }; if (vflag_global) { if (NEIGHFLAG!=FULL) { if (NEWTON_PAIR || i < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_half_acc[n]; } if (NEWTON_PAIR || j < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_half_acc[n]; } } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v_half_acc[n]; } } if (vflag_atom) { if (NEIGHFLAG!=FULL) { if (NEWTON_PAIR || i < nlocal) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += v_half_acc[n]; } if (NEWTON_PAIR || j < nlocal) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(j,n) += v_half_acc[n]; } } else { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += v_half_acc[n]; } } } diff --git a/src/KOKKOS/pair_eam_kokkos.h b/src/KOKKOS/pair_eam_kokkos.h index 37117572ddc..87010831dba 100644 --- a/src/KOKKOS/pair_eam_kokkos.h +++ b/src/KOKKOS/pair_eam_kokkos.h @@ -186,6 +186,10 @@ class PairEAMKokkos : public PairEAM, public KokkosBase { typename AT::t_int_1d d_ilist; typename AT::t_int_1d d_numneigh; + KK_FLOAT rhomax_kk; + KK_FLOAT rdr_kk; + KK_FLOAT rdrho_kk; + KK_FLOAT cutforcesq_kk; int first; typename AT::t_int_1d d_sendlist; typename AT::t_double_1d_um v_buf; From 8896aa8edd566169b24285884383094fb6d04341 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Mon, 27 Oct 2025 14:21:07 -0700 Subject: [PATCH 096/604] Add an example for PIMD group functionality --- .../langevin_reduced_units_group/data.lj01 | 221 ++++++++++++++++++ .../pimd/langevin_reduced_units_group/in.lmp | 30 +++ .../pimd/langevin_reduced_units_group/run.sh | 1 + 3 files changed, 252 insertions(+) create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/data.lj01 create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/run.sh diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/data.lj01 b/examples/PACKAGES/pimd/langevin_reduced_units_group/data.lj01 new file mode 100644 index 00000000000..08f5573a559 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/data.lj01 @@ -0,0 +1,221 @@ +LAMMPS data file via write_data, version 8 Feb 2023, timestep = 2000 + +200 atoms +2 atom types + +-3.4945130603740377 3.4945130603740377 xlo xhi +-3.4945130603740377 3.4945130603740377 ylo yhi +-3.4945130603740377 3.4945130603740377 zlo zhi + +Masses + +1 1 +2 1 + +Pair Coeffs # lj/cut + +1 1 1 +2 1 1 + +Atoms # atomic + +1 1 1.9750704226304612 -1.2226584851722493 -0.5050391499153623 0 -1 0 +2 1 -1.340442106022601 2.2849149851767008 2.069093746173476 1 -2 -1 +3 1 1.426791637331859 1.1994824011018168 -0.7391487398561316 0 -1 1 +4 1 -3.1142920073981517 -0.7023062901592412 1.8555906719306698 0 0 -1 +5 1 0.14805884420723833 -2.060999311749248 2.194803632818035 0 -1 -1 +6 1 0.2994229743009 -1.5755502164022743 -1.1025451212735877 2 0 0 +7 1 -0.6422036076119758 -2.1822637849150257 -1.309964136714573 -1 0 1 +8 1 0.9896949675708329 -0.5825805499366115 0.8390036708294969 0 0 0 +9 1 -0.23886293275872486 -1.0612344376875333 -3.025432389188406 0 0 1 +10 1 1.3885697256222325 -2.5811721069430047 1.960299612344295 0 0 1 +11 1 3.1992117767992068 -1.191883736793153 -3.4723419170834338 -1 2 1 +12 1 -2.996784469690298 -1.494057160171672 -0.09378065633433766 0 0 1 +13 1 -0.12240867021598209 3.2141258686958913 2.833411182871058 -1 -1 1 +14 1 -0.8080539703460301 -3.3138674801902015 -1.5513416052298858 -1 0 1 +15 1 2.5264882009659457 -0.10271458568953422 1.9429509270634897 -1 0 -1 +16 1 2.5088486436667465 3.2867062332187165 0.5352876678637931 0 -2 1 +17 1 -1.2519028473371163 -1.9834151764617522 -0.4751925051419198 -2 0 0 +18 1 0.8738459806171345 -0.7301638515378504 3.451151027750579 0 -1 -2 +19 1 -2.219481001384856 -2.718720936926987 -0.17152653954078959 1 1 -1 +20 1 -0.3345634991984191 0.4474749361055883 2.021125197093307 1 0 -1 +21 1 1.5799209373811796 0.5165927484280916 2.197344395644513 1 0 -1 +22 1 0.11282413745771128 -0.029722367543426716 1.1299864513218787 -1 0 -1 +23 1 -1.0720302561208543 -0.8796307974060223 2.9652586368172633 0 0 -2 +24 1 1.0798151658945965 1.8088284522753606 -1.611839691529319 1 -1 0 +25 1 0.3195487317939559 -2.5915964162672633 -1.6847661018972104 -1 0 1 +26 1 0.3607641531328818 1.9915770180932266 -2.4322620664253045 0 -1 1 +27 1 0.9216706073777299 -2.620009754961919 2.980059759810019 0 0 1 +28 1 0.9556568956317052 0.9070586383550882 -3.051580171737043 0 -1 -1 +29 1 -3.4441516076858747 3.186682881361427 -3.164290558791177 1 -1 0 +30 1 -2.572197786362113 -0.5211520888332514 0.06478473226429171 1 -1 0 +31 1 0.1434804396367878 -0.5352374975985325 0.00065681711961239 0 -1 0 +32 1 1.9176581701385829 -1.0963885598161285 0.612637989357651 1 0 -1 +33 1 -0.42833987516917704 0.2127920276630183 3.0504271041557827 0 -1 -1 +34 1 -2.319980614869211 0.615010700172925 2.607891496520952 1 0 -1 +35 1 -1.9040946957475922 2.4850230251463294 -1.8238074088896814 1 -1 0 +36 1 1.117403260546369 -0.6258611972204157 -0.45746732579129296 1 -1 0 +37 1 1.9446560065460998 0.32690249887078415 -1.557848909197346 1 0 1 +38 1 -2.6469918571054945 -2.446265188540552 0.8006326688775777 1 0 1 +39 1 -1.9288855291902436 2.399541640622866 1.1856795461879515 0 -2 0 +40 1 2.1784673001300465 0.9493086808156442 1.37690657576216 0 0 0 +41 1 0.6373758855225722 3.1576967379383136 -2.2076729513672273 0 0 1 +42 1 3.3099578488331143 2.7636832208891664 -0.11775790598994411 -1 1 -1 +43 1 1.2651581412723922 -1.7350310325188918 1.2119129817343088 -1 -1 1 +44 1 1.8875283020177747 2.01954806585405 0.15097670327664356 -1 -1 1 +45 1 -2.3851170089162586 2.7278135027281576 3.2923683043438063 1 0 -1 +46 1 -0.22701759376730096 2.672935498985858 -3.2296828094464414 1 -1 -1 +47 1 -2.417816843075144 1.911767591397239 2.070179319067279 0 1 0 +48 1 1.212990075236676 2.7637861839630933 1.668138216427735 0 0 -1 +49 1 -2.3658136681990007 -3.088812044820056 3.4760124445890623 0 1 -1 +50 1 -0.9774458776093428 -3.080336575399165 2.647294574069851 0 1 0 +51 1 -1.9626772134484154 -1.4010881760394245 3.033958867461655 0 0 -1 +52 1 -3.4931268842957652 2.50155605132379 2.127865658713496 0 -1 0 +53 1 -2.2768986954542476 1.9520208666344268 -2.905899876141582 -1 -1 1 +54 1 2.4907565610148112 1.0443458588262453 2.659572052524602 0 -1 -1 +55 1 0.6250447384071113 -2.6446349401275255 -2.959767417439857 -1 0 -1 +56 1 0.1655081354187105 0.488612868229853 -0.6685586469118172 0 0 1 +57 1 3.2402592062064213 -1.9899576048001384 2.7907002699370937 1 0 0 +58 1 -1.8161128905276918 3.295453805825648 0.4419286058239596 0 -1 0 +59 1 3.120666407088228 0.49773318717092796 1.2390797462592613 0 -1 1 +60 1 -2.3748565234893153 3.1415519659534046 -2.6903226937570888 1 1 0 +61 1 0.7799049336891462 2.500510040165389 2.633457440959517 -1 -2 0 +62 1 1.2599715479139464 2.4637352645229535 -2.996655642771261 0 0 1 +63 1 1.6783992808137906 -3.22203513406057 -3.1652676844989256 -1 1 1 +64 1 2.934967635633404 -1.92620475221537 -0.7077313528493354 0 1 0 +65 1 -2.68682939849047 -0.8114540186440395 -3.126687818057689 0 -1 0 +66 1 0.0812156007450339 -1.0967406204476347 2.677696992399577 -1 -1 -1 +67 1 -1.8758084270870021 1.7856292006021222 2.9794597615239873 1 0 0 +68 1 1.2305537223759786 3.452417391198064 2.5060768204562396 0 0 1 +69 1 2.6376153053668867 -0.2845436411244765 -2.788279844473203 0 -2 0 +70 1 -1.8654941392471802 -0.3319695813423535 3.329224742630753 0 1 1 +71 1 -2.704068160468349 3.198799130983575 1.5420861670955985 0 -1 -1 +72 1 -1.091863924309645 2.651695772194485 -1.1525574140711645 1 -1 0 +73 1 -1.2083083753201984 0.16913042396249212 1.31830897028215 0 -2 0 +74 1 -0.028453987680887038 0.3739399392930446 -2.9891764409930484 1 -1 1 +75 1 -3.264195451754593 -0.2758623474229462 0.8501703715118789 1 -1 0 +76 1 0.002896678458885543 -2.2620445897063184 -0.2330303144789453 0 0 0 +77 1 1.7035877859699526 -0.8133688575022073 1.6318409740615665 0 -1 1 +78 1 -0.6388640381972747 -1.8358994538797384 3.0793625374288474 1 -1 1 +79 1 -2.8111804533557017 -0.5176533778232942 -2.0486672107634094 0 1 0 +80 1 -2.079807139196039 -1.2506968999173758 -0.6878784167189951 0 0 1 +81 1 2.2043144165939133 0.7734976358785483 -0.1443989261496442 -1 0 1 +82 1 3.450078342208421 0.43347862445962865 2.3965330453749276 -1 -1 -1 +83 1 0.7440931705195399 0.28485269404070884 2.8824261695219513 0 -2 0 +84 1 2.4045875426768606 -1.8297724699275564 -2.7874627893944544 0 0 0 +85 1 3.492386221258323 -0.022918569575539983 -0.18415289757087 1 0 1 +86 1 -3.3982593364606077 -1.9911707046121028 1.4129412590921573 -1 0 0 +87 1 -1.4786937501974577 -0.24572242955022147 -1.0411298155891553 0 -1 1 +88 1 -1.6158710173914035 -2.3369020600614756 3.233517474511907 0 1 -1 +89 1 -0.622943103836138 0.15695545887374523 0.03344907944561345 0 0 -1 +90 1 -0.6407155002729499 2.891205873820353 -2.184576201275668 0 -1 1 +91 1 2.930212504963826 -0.8900603772487206 0.31326990070574257 0 -1 -1 +92 1 3.2579931163558924 -3.0623298587917547 1.8252693484803535 0 2 -1 +93 1 -0.6871612783719626 1.4782545941619265 1.7130745398216136 0 -1 0 +94 1 -1.209899480419889 1.1419050120740204 0.768566708093127 0 0 0 +95 1 1.3579478870419064 -2.684729254303277 -2.052496056604152 0 2 -1 +96 1 2.599921016729558 -2.6427367710738228 3.4145177684858803 -2 2 -1 +97 1 1.4995074975439164 1.4827775589636611 2.8582397165142885 0 -1 -1 +98 1 0.14422647245316755 -0.4562748967814887 -1.1288823927075846 -1 0 0 +99 1 -1.376188772302252 2.1367308970905112 -0.22414000562735564 0 -1 0 +100 1 2.8360458941172935 1.5280995172170748 0.62642849190607 0 1 0 +101 1 -2.7232595674410254 -2.5310321070553594 2.520173028347837 1 3 1 +102 1 -2.609481280743025 -1.8912619121268304 -2.399753092988466 1 -1 -1 +103 1 -2.8441279458234914 0.5800051690180812 1.512417106043963 -1 1 0 +104 1 -0.9864189902172598 -3.128510029811542 -0.3946109581791233 0 0 -1 +105 1 1.4496281787006915 -1.9673030959679096 -3.332658945278261 0 0 0 +106 1 1.1039107347748043 -0.9982945968846851 -1.4358613477546331 -1 0 0 +107 1 0.27101548210140264 1.6697625775553313 3.256398876364515 -1 1 0 +108 1 -2.7795690566068996 -2.0367677788080942 3.4238258338844894 2 1 -1 +109 1 0.7422148102651713 1.9772692916114134 0.8946623229549697 -1 1 0 +110 1 3.092811420136352 1.457598231264991 1.8333488137634408 0 0 -2 +111 1 -3.223221512681343 0.8515794477136406 0.5304444380887284 1 1 1 +112 1 -1.7203801180145006 -0.8038481618523634 -2.0163240721427838 0 0 0 +113 1 -0.5058112539671865 -2.175265580488635 -2.895948573481031 -1 0 0 +114 1 0.7095094175128512 3.3529225376529834 -3.446569928809617 0 -1 1 +115 1 -1.3188418056778684 -3.426962627878111 1.3071043811339145 1 1 0 +116 1 -3.3782151164102614 3.3939134790192074 0.6418244028431277 1 0 0 +117 1 -2.5082381499858095 -1.6201356456498082 1.916360867966056 0 0 0 +118 1 -1.5219571471797697 -0.6469152337096267 0.07790607124041445 0 1 0 +119 1 -1.8952925629684365 -1.6742391291961565 0.3056526633582653 0 -1 0 +120 1 -2.7699397214807666 -3.0679149510930377 -1.4586405778286127 2 1 1 +121 1 -0.1371595891679715 -2.734037361783574 3.2547541081110136 -1 1 -1 +122 1 0.9437732996524034 -3.076308154774796 -1.0301951566376644 1 0 -1 +123 1 0.42149982032625755 -2.3318080821151144 1.1400194349387576 -1 1 -1 +124 1 -0.41265942682553436 2.1334074442091007 2.592421577863454 1 -1 -1 +125 1 0.6384933993980926 1.4087406696008793 -0.04513978672724809 0 1 0 +126 1 0.2617829860592282 -3.185635897887874 2.046978007125428 0 1 -1 +127 1 -1.832485514575906 -3.475177695698571 -1.0143919033965547 0 0 0 +128 1 2.8514020342376654 2.399755896897204 3.251183881976271 0 0 -2 +129 1 1.1355758059362249 0.3630461551808931 1.290797674214222 -1 0 0 +130 1 -0.41366866285633624 -2.851509016203123 0.6773665297002652 -1 1 0 +131 1 -1.616105025949218 -1.97873290984862 2.2250110182725185 1 0 0 +132 1 2.7791570748849397 -0.1854473725861777 3.06331164881256 0 0 -1 +133 1 0.859517194013173 -1.4579095853876511 0.2223121798831755 1 1 1 +134 1 -0.9769774773292339 -1.023866537972646 1.8133872200496586 -1 0 0 +135 1 -1.8704119131145511 1.4151617424236407 -2.0299493395071555 1 0 0 +136 1 1.795137560517328 -0.5623222017058748 2.761148573575212 0 -1 -2 +137 1 1.0403444543974962 0.29392106264506496 -1.157047853549803 -1 1 -1 +138 1 -2.102339643808083 0.3205080511040257 0.6397061187151873 0 1 -1 +139 1 2.2034635745403706 -0.09403988556633318 0.8645554281632429 -1 2 0 +140 1 0.5573788374911133 -0.5990036144148324 -2.402963062521378 -1 0 0 +141 1 -3.140749645549514 -2.8903241627869427 -2.7071762462261537 2 0 0 +142 1 1.7676090653529768 -2.7694388971884765 1.0411093428968425 1 1 0 +143 1 -3.3367462911096637 -2.646710289319255 -0.08639804580378332 1 1 0 +144 1 -2.152212571536467 -2.8119708535854224 1.6778364082976118 0 1 0 +145 1 -2.5978280185456044 0.23771261564216897 -0.6973478660383656 0 -2 -1 +146 1 -0.9802936024515303 -0.3953988402073487 -2.8364522494017708 2 1 2 +147 1 -0.22721902013037146 -3.2095321965856187 -2.5444416457809185 0 1 0 +148 1 -3.011516485433896 -1.1469037793442038 2.7610753028279933 0 0 0 +149 1 -0.8196954968551895 -2.4620140380344058 1.6681749543249536 1 1 0 +150 1 2.339993315563226 2.225227215494749 1.201791196292386 0 -1 1 +151 1 -1.5217298498042864 -2.5933593936403763 0.7963122225368328 0 1 2 +152 1 3.4379847727225346 -1.3055938268830742 -2.4905074589410936 -2 -1 0 +153 1 -0.6179778569135823 -0.6812551828045249 0.8822634033092613 1 1 0 +154 1 1.617010887740887 -2.199305270305938 0.15712874770371849 -1 2 0 +155 1 2.209613988042176 -1.7162524781709796 2.7271827572979834 0 0 -1 +156 1 2.796603060966876 -1.975811297437484 0.5269881248554414 0 -1 0 +157 1 2.2693311785439514 1.4234672203444076 -1.3950756204319474 -1 -1 1 +158 1 1.4062820374169525 1.1517166598840343 0.6269036353613398 0 0 0 +159 1 -1.638440405488927 1.040992844593558 1.6945368306541542 0 0 0 +160 1 -1.337826167757788 0.19507650189296638 2.4559469377176044 0 0 0 +161 1 -1.6469671018177567 0.9360204597700993 -3.3491365145788827 1 0 2 +162 1 -0.9152119867762214 1.8538003710645874 -2.0787458665529313 0 0 1 +163 1 2.861161516036147 -0.9653887253660198 2.3742914745631665 -1 -1 0 +164 1 -2.8409776961511124 1.1599509265598786 -2.2464830240939375 0 0 1 +165 1 0.20472499231506947 0.41157279287578097 -1.7804952487691292 0 0 0 +166 1 -2.9639598123456348 -1.8230569851379879 -1.1336421298957986 0 1 0 +167 1 1.1687036142366616 -1.4517802174625138 2.5014340704062015 0 1 -1 +168 1 3.477326382335708 -3.4068959852573992 2.821891114170504 -1 1 -2 +169 1 2.1185477137097575 2.27540363851787 2.3296903855745934 0 0 0 +170 1 1.781383760665039 0.4589516263468447 3.2606047972968697 -1 1 -1 +171 1 0.39730078560778914 -1.7846492070113664 -2.443607407553737 0 2 -1 +172 1 -1.3831407623603917 2.6439383680022495 -2.988114399273657 0 0 2 +173 1 -1.967038944001471 0.06098718735032446 -2.58940519609616 1 0 1 +174 1 -1.4968933339977661 -1.3290620346554367 -3.064010040861048 -1 0 -1 +175 1 -3.1260125076640937 -0.1261845495956751 3.231607373653429 0 0 0 +176 1 0.8667263057428289 -0.39855552371320657 2.157386135628261 0 0 0 +177 1 -0.4209642163497379 1.4081511815239465 0.048786606404428355 0 1 -1 +178 1 2.26419751848874 -1.7585846281228215 1.7123805014943867 1 1 0 +179 1 -0.8263021362457004 -1.7214596406733806 0.6812305480127513 -1 3 1 +180 1 -1.9509177290095516 3.309670529089689 2.349688492929276 1 -1 1 +181 1 0.8338139283938067 0.2955625473112081 0.1708175991674893 -1 1 -1 +182 1 0.08831527814343061 0.9709201913937692 0.9217855950625892 0 1 -1 +183 1 -0.7452468270399182 1.6958511415691964 -3.2646018242495516 1 0 0 +184 1 -2.4512811422970944 1.4407376078485292 1.024840909070544 0 0 1 +185 1 0.3910180046792306 -1.7476201774885758 3.460179487049301 -2 1 -1 +186 1 0.4501325900005473 1.1817617584793225 2.171005382838285 1 1 0 +187 1 -3.3809787804159894 2.339193447924602 0.9253127201661663 1 1 0 +188 1 1.3149323047475332 -2.0202232851439175 -0.9332517483120624 -1 0 0 +189 1 -0.6249709402997541 -1.252931712435372 -1.9882716349469658 0 0 2 +190 1 -2.8711350478349926 0.9129034739862979 -3.477848194666021 0 0 0 +191 2 -0.9433598322448645 1.0765639185970945 2.7978676989244713 1 -1 0 +192 2 -1.2960228223928079 -3.237522585981233 -3.1506262911155765 1 1 2 +193 2 -2.800913203549158 3.3913795040038752 -0.3002902442199323 1 0 0 +194 2 -0.802106175542123 1.5380363524561962 -1.0014646333327863 0 1 -1 +195 2 2.9662277270739343 -0.9097471611972223 1.378968815531931 1 0 2 +196 2 2.4252816482051065 -2.9991133261256717 -0.2915609413585816 0 2 1 +197 2 -2.0048159662329326 -0.6749211621392989 2.344679442661877 0 -1 -1 +198 2 0.20957042666083733 -1.2300305579788615 1.1505092305231928 0 0 1 +199 2 -1.8606891200301348 -0.81109530705794 1.234068196576104 1 -1 1 +200 2 -2.796037525463158 -1.2242406753887405 0.9147806710233006 -1 0 0 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp b/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp new file mode 100644 index 00000000000..59660226b94 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp @@ -0,0 +1,30 @@ +variable ibead uloop 4 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj01 + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 +mass 2 1.0 + +group real_atom type 1 +group virtual_atom type 2 + +timestep 0.00044905847 + +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} + +thermo_style custom step temp f_1[*] vol press +thermo 10 +thermo_modify norm no + +dump dcd all custom 1 traj${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump_modify dcd sort id #format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 100 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/run.sh b/examples/PACKAGES/pimd/langevin_reduced_units_group/run.sh new file mode 100644 index 00000000000..2580ef1a415 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/run.sh @@ -0,0 +1 @@ +mpirun -np 4 $LMP -in in.lmp -p 4x1 -log log -screen screen From eb15d21c0b3cf789bfe0167eb79040c6a8ee5acb Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:23:58 -0700 Subject: [PATCH 097/604] Removed silent conversions from pair_tersoff_kokkos.*, as relevant for Tersoff. --- src/KOKKOS/pair_tersoff_kokkos.cpp | 328 ++++++++++++++--------------- src/KOKKOS/pair_tersoff_kokkos.h | 131 ++++++++---- 2 files changed, 249 insertions(+), 210 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 7434ca4d55e..aa9adadce10 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -216,6 +216,8 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } + cutmax_sq = static_cast(cutmax * cutmax); + copymode = 1; EV_FLOAT ev; @@ -263,14 +265,14 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) if (need_dup) Kokkos::Experimental::contribute(f, dup_f); - if (eflag_global) eng_vdwl += ev_all.evdwl; + if (eflag_global) eng_vdwl += static_cast(ev_all.evdwl); if (vflag_global) { - virial[0] += ev_all.v[0]; - virial[1] += ev_all.v[1]; - virial[2] += ev_all.v[2]; - virial[3] += ev_all.v[3]; - virial[4] += ev_all.v[4]; - virial[5] += ev_all.v[5]; + virial[0] += static_cast(ev_all.v[0]); + virial[1] += static_cast(ev_all.v[1]); + virial[2] += static_cast(ev_all.v[2]); + virial[3] += static_cast(ev_all.v[3]); + virial[4] += static_cast(ev_all.v[4]); + virial[5] += static_cast(ev_all.v[5]); } if (eflag_atom) { @@ -308,7 +310,7 @@ void PairTersoffKokkos::operator()(TagPairTersoffComputeShortNeigh, const KK_FLOAT xtmp = x(i,0); const KK_FLOAT ytmp = x(i,1); const KK_FLOAT ztmp = x(i,2); - const KK_FLOAT cutmax_sq = cutmax*cutmax; + //const KK_FLOAT cutmax_sq = cutmax*cutmax; const int jnum = d_numneigh[i]; int inside = 0; @@ -356,9 +358,9 @@ void PairTersoffKokkos::tersoff_compute(const int &ii, EV_FLOAT& ev) // repulsive - KK_ACC_FLOAT f_x = 0.0; - KK_ACC_FLOAT f_y = 0.0; - KK_ACC_FLOAT f_z = 0.0; + KK_ACC_FLOAT f_x = 0; + KK_ACC_FLOAT f_y = 0; + KK_ACC_FLOAT f_z = 0; // attractive: bond order @@ -373,7 +375,7 @@ void PairTersoffKokkos::tersoff_compute(const int &ii, EV_FLOAT& ev) const int iparam_ij = d_elem3param(itype,jtype,jtype); const KK_FLOAT cutsq1 = d_params(iparam_ij).cutsq; - KK_FLOAT bo_ij = 0.0; + KK_FLOAT bo_ij = 0; if (rsq1 > cutsq1) continue; const KK_FLOAT rij = sqrt(rsq1); @@ -399,19 +401,19 @@ void PairTersoffKokkos::tersoff_compute(const int &ii, EV_FLOAT& ev) KK_FLOAT fa, dfa, bij, prefactor; ters_fa_k_and_ters_dfa(d_params(iparam_ij),rij,fa,dfa); ters_bij_k_and_ters_dbij(d_params(iparam_ij),bo_ij,bij,prefactor); - const KK_FLOAT fatt = -0.5*bij * dfa / rij; - prefactor = 0.5*fa * prefactor; + const KK_FLOAT fatt = -static_cast(0.5) * bij * dfa / rij; + prefactor = static_cast(0.5) * fa * prefactor; - f_x += delx1*fatt; - f_y += dely1*fatt; - f_z += delz1*fatt; - KK_ACC_FLOAT fj_x = -delx1*fatt; - KK_ACC_FLOAT fj_y = -dely1*fatt; - KK_ACC_FLOAT fj_z = -delz1*fatt; + f_x += static_cast(delx1*fatt); + f_y += static_cast(dely1*fatt); + f_z += static_cast(delz1*fatt); + KK_ACC_FLOAT fj_x = -static_cast(delx1*fatt); + KK_ACC_FLOAT fj_y = -static_cast(dely1*fatt); + KK_ACC_FLOAT fj_z = -static_cast(delz1*fatt); if (EVFLAG) { - const KK_FLOAT eng = 0.5*bij * fa; - if (eflag) ev.evdwl += eng; + const KK_FLOAT eng = static_cast(0.5) * bij * fa; + if (eflag) ev.evdwl += static_cast(eng); if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); } @@ -476,17 +478,17 @@ void PairTersoffKokkos::tersoff_compute(const int &ii, EV_FLOAT& ev) (tmp_fcd - tmp_fce*d_params[iparam_ij].lam1) / rij; const KK_FLOAT eng = tmp_fce * d_params[iparam_ij].biga * tmp_exp; - f_x += delx1*frep; - fj_x -= delx1*frep; + f_x += static_cast(delx1*frep); + fj_x -= static_cast(delx1*frep); - f_y += dely1*frep; - fj_y -= dely1*frep; + f_y += static_cast(dely1*frep); + fj_y -= static_cast(dely1*frep); - f_z += delz1*frep; - fj_z -= delz1*frep; + f_z += static_cast(delz1*frep); + fj_z -= static_cast(delz1*frep); if (EVFLAG) { - if (eflag) ev.evdwl += eng; + if (eflag) ev.evdwl += static_cast(eng); if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,frep,delx1,dely1,delz1); } } @@ -559,63 +561,63 @@ void PairTersoffKokkos::operator()(TagPairTersoffCompute KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_fc_k(const Param& param, const KK_FLOAT &r) const +KK_FLOAT PairTersoffKokkos::ters_fc_k(const ParamKokkos& param, const KK_FLOAT &r) const { const KK_FLOAT ters_R = param.bigr; const KK_FLOAT ters_D = param.bigd; - if (r < ters_R-ters_D) return 1.0; - if (r > ters_R+ters_D) return 0.0; - return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D)); + if (r < ters_R-ters_D) return static_cast(1.0); + if (r > ters_R+ters_D) return static_cast(0.0); + return static_cast(0.5)*(static_cast(1.0) - sin(static_cast(MY_PI2)*(r - ters_R)/ters_D)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_dfc(const Param& param, const KK_FLOAT &r) const +KK_FLOAT PairTersoffKokkos::ters_dfc(const ParamKokkos& param, const KK_FLOAT &r) const { const KK_FLOAT ters_R = param.bigr; const KK_FLOAT ters_D = param.bigd; - if (r < ters_R-ters_D) return 0.0; - if (r > ters_R+ters_D) return 0.0; - return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D); + if (r < ters_R-ters_D) return static_cast(0.0); + if (r > ters_R+ters_D) return static_cast(0.0); + return -(static_cast(MY_PI4)/ters_D) * cos(static_cast(MY_PI2)*(r - ters_R)/ters_D); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void PairTersoffKokkos::ters_fc_k_and_ters_dfc(const Param& param, const KK_FLOAT &r, KK_FLOAT& fc, KK_FLOAT& dfc) const +void PairTersoffKokkos::ters_fc_k_and_ters_dfc(const ParamKokkos& param, const KK_FLOAT &r, KK_FLOAT& fc, KK_FLOAT& dfc) const { const KK_FLOAT ters_R = param.bigr; const KK_FLOAT ters_D = param.bigd; if (r < ters_R-ters_D) { - fc = 1.0; - dfc = 0.0; + fc = static_cast(1.0); + dfc = static_cast(0.0); return; } if (r > ters_R+ters_D) { - fc = 0.0; - dfc = 0.0; + fc = static_cast(0.0); + dfc = static_cast(0.0); return; } - const KK_FLOAT arg = MY_PI2*(r - ters_R)/ters_D; + const KK_FLOAT arg = static_cast(MY_PI2)*(r - ters_R)/ters_D; //KK_FLOAT sn, cn; //sincos(arg, &sn, &cn); - fc = 0.5*(1.0 - sin(arg)); - dfc = -(MY_PI4/ters_D) * cos(arg); + fc = static_cast(0.5)*(static_cast(1.0) - sin(arg)); + dfc = -(static_cast(MY_PI4)/ters_D) * cos(arg); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::bondorder(const Param& param, +KK_FLOAT PairTersoffKokkos::bondorder(const ParamKokkos& param, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2) const { @@ -627,8 +629,8 @@ KK_FLOAT PairTersoffKokkos::bondorder(const Param& param, if (int(param.powerm) == 3) arg = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); else arg = paramtmp; - if (arg > 69.0776) ex_delr = 1.e30; - else if (arg < -69.0776) ex_delr = 0.0; + if (arg > static_cast(69.0776)) ex_delr = static_cast(1.e30); + else if (arg < static_cast(-69.0776)) ex_delr = static_cast(0.0); else ex_delr = exp(arg); return ters_fc_k(param,rik) * ters_gijk(param,costheta) * ex_delr; @@ -639,13 +641,13 @@ KK_FLOAT PairTersoffKokkos::bondorder(const Param& param, template KOKKOS_INLINE_FUNCTION KK_FLOAT PairTersoffKokkos:: - ters_gijk(const Param& param, const KK_FLOAT &cos) const + ters_gijk(const ParamKokkos& param, const KK_FLOAT &cos) const { const KK_FLOAT ters_c = param.c * param.c; const KK_FLOAT ters_d = param.d * param.d; const KK_FLOAT hcth = param.h - cos; - return param.gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); + return param.gamma*(static_cast(1.0) + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); } /* ---------------------------------------------------------------------- */ @@ -653,13 +655,13 @@ KK_FLOAT PairTersoffKokkos:: template KOKKOS_INLINE_FUNCTION KK_FLOAT PairTersoffKokkos:: - ters_dgijk(const Param& param, const KK_FLOAT &cos) const + ters_dgijk(const ParamKokkos& param, const KK_FLOAT &cos) const { const KK_FLOAT ters_c = param.c * param.c; const KK_FLOAT ters_d = param.d * param.d; const KK_FLOAT hcth = param.h - cos; - const KK_FLOAT numerator = -2.0 * ters_c * hcth; - const KK_FLOAT denominator = 1.0/(ters_d + hcth*hcth); + const KK_FLOAT numerator = static_cast(-2.0) * ters_c * hcth; + const KK_FLOAT denominator = static_cast(1.0)/(ters_d + hcth*hcth); return param.gamma * numerator * denominator * denominator; } @@ -668,16 +670,16 @@ KK_FLOAT PairTersoffKokkos:: template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos:: - ters_gijk_and_ters_dgijk(const Param& param, const KK_FLOAT &cos, KK_FLOAT &gijk, KK_FLOAT &dgijk) const + ters_gijk_and_ters_dgijk(const ParamKokkos& param, const KK_FLOAT &cos, KK_FLOAT &gijk, KK_FLOAT &dgijk) const { const KK_FLOAT ters_c = param.c * param.c; const KK_FLOAT ters_d = param.d * param.d; const KK_FLOAT hcth = param.h - cos; - const KK_FLOAT numerator = -2.0 * ters_c * hcth; - const KK_FLOAT denominator = 1.0/(ters_d + hcth*hcth); + const KK_FLOAT numerator = -static_cast(2.0) * ters_c * hcth; + const KK_FLOAT denominator = static_cast(1.0)/(ters_d + hcth*hcth); - gijk = param.gamma*(1.0 + ters_c/ters_d - ters_c*denominator); + gijk = param.gamma*(static_cast(1.0) + ters_c/ters_d - ters_c*denominator); dgijk = param.gamma * numerator * denominator * denominator; } @@ -685,9 +687,9 @@ void PairTersoffKokkos:: template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_fa_k(const Param& param, const KK_FLOAT &r) const +KK_FLOAT PairTersoffKokkos::ters_fa_k(const ParamKokkos& param, const KK_FLOAT &r) const { - if (r > param.bigr + param.bigd) return 0.0; + if (r > param.bigr + param.bigd) return static_cast(0.0); return -param.bigb * exp(-param.lam2 * r) * ters_fc_k(param,r); } @@ -696,9 +698,9 @@ KK_FLOAT PairTersoffKokkos::ters_fa_k(const Param& param, const KK_F template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_dfa(const Param& param, const KK_FLOAT &r) const +KK_FLOAT PairTersoffKokkos::ters_dfa(const ParamKokkos& param, const KK_FLOAT &r) const { - if (r > param.bigr + param.bigd) return 0.0; + if (r > param.bigr + param.bigd) return static_cast(0.0); return param.bigb * exp(-param.lam2 * r) * (param.lam2 * ters_fc_k(param,r) - ters_dfc(param,r)); } @@ -707,11 +709,11 @@ KK_FLOAT PairTersoffKokkos::ters_dfa(const Param& param, const KK_FL template KOKKOS_INLINE_FUNCTION -void PairTersoffKokkos::ters_fa_k_and_ters_dfa(const Param& param, const KK_FLOAT &r, KK_FLOAT &fa, KK_FLOAT &dfa) const +void PairTersoffKokkos::ters_fa_k_and_ters_dfa(const ParamKokkos& param, const KK_FLOAT &r, KK_FLOAT &fa, KK_FLOAT &dfa) const { if (r > param.bigr + param.bigd) { - fa = 0.0; - dfa = 0.0; + fa = static_cast(0.0); + dfa = static_cast(0.0); } else { KK_FLOAT tmp1 = param.bigb * exp(-param.lam2 * r); KK_FLOAT fc_k, dfc; @@ -725,51 +727,51 @@ void PairTersoffKokkos::ters_fa_k_and_ters_dfa(const Param& param, c template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_bij_k(const Param& param, const KK_FLOAT &bo) const +KK_FLOAT PairTersoffKokkos::ters_bij_k(const ParamKokkos& param, const KK_FLOAT &bo) const { const KK_FLOAT tmp = param.beta * bo; - if (tmp > param.c1) return 1.0/sqrt(tmp); + if (tmp > param.c1) return static_cast(1.0)/sqrt(tmp); if (tmp > param.c2) - return (1.0 - pow(tmp,-param.powern) / (2.0*param.powern))/sqrt(tmp); - if (tmp < param.c4) return 1.0; + return (static_cast(1.0) - pow(tmp,-param.powern) / (static_cast(2.0)*param.powern))/sqrt(tmp); + if (tmp < param.c4) return static_cast(1.0); if (tmp < param.c3) - return 1.0 - pow(tmp,param.powern)/(2.0*param.powern); - return pow(1.0 + pow(tmp,param.powern), -1.0/(2.0*param.powern)); + return static_cast(1.0) - pow(tmp,param.powern)/(static_cast(2.0)*param.powern); + return pow(static_cast(1.0) + pow(tmp,param.powern), static_cast(-1.0)/(static_cast(2.0)*param.powern)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -KK_FLOAT PairTersoffKokkos::ters_dbij(const Param& param, const KK_FLOAT &bo) const +KK_FLOAT PairTersoffKokkos::ters_dbij(const ParamKokkos& param, const KK_FLOAT &bo) const { const KK_FLOAT tmp = param.beta * bo; - const KK_FLOAT factor = -0.5/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) + const KK_FLOAT factor = static_cast(-0.5)/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) if (tmp > param.c1) return param.beta * factor; if (tmp > param.c2) return param.beta * (factor * // error in negligible 2nd term fixed 2/21/2022 // (1.0 - 0.5*(1.0 + 1.0/(2.0*param.powern)) * - (1.0 - (1.0 + 1.0/(2.0*param.powern)) * + (static_cast(1.0) - (static_cast(1.0) + static_cast(0.5)/(param.powern)) * pow(tmp,-param.powern))); - if (tmp < param.c4) return 0.0; + if (tmp < param.c4) return static_cast(0.0); if (tmp < param.c3) - return -0.5*param.beta * pow(tmp,param.powern-1.0); + return -static_cast(0.5)*param.beta * pow(tmp,param.powern-static_cast(1.0)); const KK_FLOAT tmp_n = pow(tmp,param.powern); - return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*param.powern)))*tmp_n / bo; + return static_cast(-0.5) * pow(static_cast(1.0)+tmp_n, static_cast(-1.0)-(static_cast(0.5)/(param.powern)))*tmp_n / bo; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void PairTersoffKokkos::ters_bij_k_and_ters_dbij(const Param& param, const KK_FLOAT &bo, KK_FLOAT& bij, KK_FLOAT& prefactor) const +void PairTersoffKokkos::ters_bij_k_and_ters_dbij(const ParamKokkos& param, const KK_FLOAT &bo, KK_FLOAT& bij, KK_FLOAT& prefactor) const { const KK_FLOAT tmp = param.beta * bo; - const KK_FLOAT factor = -0.5/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) + const KK_FLOAT factor = static_cast(-0.5)/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) if (tmp > param.c1) { - bij = 1.0/sqrt(tmp); + bij = static_cast(1.0)/sqrt(tmp); prefactor = param.beta * factor; return; } @@ -778,28 +780,28 @@ void PairTersoffKokkos::ters_bij_k_and_ters_dbij(const Param& param, if (tmp > param.c2) { auto tmp_pow_neg_prm_ijk_pn = pow(tmp,-prm_ijk_pn); - bij = (1.0 - tmp_pow_neg_prm_ijk_pn / (2.0*prm_ijk_pn))/sqrt(tmp); + bij = (static_cast(1.0) - tmp_pow_neg_prm_ijk_pn / (static_cast(2.0)*prm_ijk_pn))/sqrt(tmp); prefactor = param.beta * (factor * - (1.0 - 0.5*(1.0 + 1.0/(2.0*prm_ijk_pn)) * + (static_cast(1.0) - static_cast(0.5)*(static_cast(1.0) + static_cast(0.5)/(prm_ijk_pn)) * tmp_pow_neg_prm_ijk_pn)); return; } if (tmp < param.c4) { - bij = 1.0; - prefactor = 0.0; + bij = static_cast(1.0); + prefactor = static_cast(0.0); return; } if (tmp < param.c3) { - auto tmp_pow_prm_ijk_pn_less_one = pow(tmp,prm_ijk_pn-1.0); - bij = 1.0 - tmp_pow_prm_ijk_pn_less_one*tmp/(2.0*prm_ijk_pn); - prefactor = -0.5*param.beta * tmp_pow_prm_ijk_pn_less_one; + auto tmp_pow_prm_ijk_pn_less_one = pow(tmp,prm_ijk_pn-static_cast(1.0)); + bij = static_cast(1.0) - tmp_pow_prm_ijk_pn_less_one*tmp/(static_cast(2.0)*prm_ijk_pn); + prefactor = static_cast(-0.5)*param.beta * tmp_pow_prm_ijk_pn_less_one; return; } const KK_FLOAT tmp_n = pow(tmp,param.powern); - bij = pow(1.0 + tmp_n, -1.0/(2.0*prm_ijk_pn)); - prefactor = -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*prm_ijk_pn)))*tmp_n / bo; + bij = pow(static_cast(1.0) + tmp_n, static_cast(-0.5)/(prm_ijk_pn)); + prefactor = static_cast(-0.5) * pow(static_cast(1.0)+tmp_n, static_cast(-1.0)-(static_cast(0.5)/(prm_ijk_pn)))*tmp_n / bo; } /* ---------------------------------------------------------------------- */ @@ -807,7 +809,7 @@ void PairTersoffKokkos::ters_bij_k_and_ters_dbij(const Param& param, template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::ters_dthb( - const Param& param, const KK_FLOAT &prefactor, + const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fi, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk) const @@ -821,11 +823,11 @@ void PairTersoffKokkos::ters_dthb( delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; //rij = sqrt(rsq1); - rijinv = 1.0/rij; + rijinv = static_cast(1.0)/rij; vec3_scale(rijinv,delrij,rij_hat); //rik = sqrt(rsq2); - rikinv = 1.0/rik; + rikinv = static_cast(1.0)/rik; vec3_scale(rikinv,delrik,rik_hat); // from PairTersoff::ters_zetaterm_d @@ -838,12 +840,12 @@ void PairTersoffKokkos::ters_dthb( if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); else tmp = paramtmp; - if (tmp > 69.0776) ex_delr = 1.e30; - else if (tmp < -69.0776) ex_delr = 0.0; + if (tmp > static_cast(69.0776)) ex_delr = static_cast(1.e30); + else if (tmp < static_cast(-69.0776)) ex_delr = static_cast(0.0); else ex_delr = exp(tmp); if (int(param.powerm) == 3) - dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr; + dex_delr = static_cast(3.0)*paramtmp*paramtmp*param.lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr; else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); @@ -856,7 +858,7 @@ void PairTersoffKokkos::ters_dthb( vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); vec3_scale(rikinv,dcosfk,dcosfk); vec3_add(dcosfj,dcosfk,dcosfi); - vec3_scale(-1.0,dcosfi,dcosfi); + vec3_scale(static_cast(-1.0),dcosfi,dcosfi); vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi); vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi); @@ -880,7 +882,7 @@ void PairTersoffKokkos::ters_dthb( template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::ters_dthbj( - const Param& param, const KK_FLOAT &prefactor, + const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk) const @@ -892,10 +894,10 @@ void PairTersoffKokkos::ters_dthbj( delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; - rijinv = 1.0/rij; + rijinv = static_cast(1.0)/rij; vec3_scale(rijinv,delrij,rij_hat); - rikinv = 1.0/rik; + rikinv = static_cast(1.0)/rik; vec3_scale(rikinv,delrik,rik_hat); KK_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; @@ -907,12 +909,12 @@ void PairTersoffKokkos::ters_dthbj( if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); else tmp = paramtmp; - if (tmp > 69.0776) ex_delr = 1.e30; - else if (tmp < -69.0776) ex_delr = 0.0; + if (tmp > static_cast(69.0776)) ex_delr = static_cast(1.e30); + else if (tmp < static_cast(-69.0776)) ex_delr = static_cast(0.0); else ex_delr = exp(tmp); if (int(param.powerm) == 3) - dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + dex_delr = static_cast(3.0)*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); @@ -924,7 +926,7 @@ void PairTersoffKokkos::ters_dthbj( vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); vec3_scale(rikinv,dcosfk,dcosfk); vec3_add(dcosfj,dcosfk,dcosfi); - vec3_scale(-1.0,dcosfi,dcosfi); + vec3_scale(static_cast(-1.0),dcosfi,dcosfi); vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); @@ -942,7 +944,7 @@ void PairTersoffKokkos::ters_dthbj( template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::ters_dthbk( - const Param& param, const KK_FLOAT &prefactor, + const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fk) const @@ -954,10 +956,10 @@ void PairTersoffKokkos::ters_dthbk( delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; - rijinv = 1.0/rij; + rijinv = static_cast(1.0)/rij; vec3_scale(rijinv,delrij,rij_hat); - rikinv = 1.0/rik; + rikinv = static_cast(1.0)/rik; vec3_scale(rikinv,delrik,rik_hat); KK_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; @@ -969,12 +971,12 @@ void PairTersoffKokkos::ters_dthbk( if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); else tmp = paramtmp; - if (tmp > 69.0776) ex_delr = 1.e30; - else if (tmp < -69.0776) ex_delr = 0.0; + if (tmp > static_cast(69.0776)) ex_delr = static_cast(1.e30); + else if (tmp < static_cast(-69.0776)) ex_delr = static_cast(0.0); else ex_delr = exp(tmp); if (int(param.powerm) == 3) - dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + dex_delr = static_cast(3.0)*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); @@ -986,7 +988,7 @@ void PairTersoffKokkos::ters_dthbk( vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); vec3_scale(rikinv,dcosfk,dcosfk); vec3_add(dcosfj,dcosfk,dcosfi); - vec3_scale(-1.0,dcosfi,dcosfi); + vec3_scale(static_cast(-1.0),dcosfi,dcosfi); vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); @@ -1012,7 +1014,7 @@ void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i auto a_vatom = v_vatom.template access>(); if (eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * epair; + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * epair); a_eatom[i] += epairhalf; a_eatom[j] += epairhalf; } @@ -1026,28 +1028,28 @@ void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i const KK_FLOAT v5 = dely*delz*fpair; if (vflag_global) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; + ev.v[0] += static_cast(v0); + ev.v[1] += static_cast(v1); + ev.v[2] += static_cast(v2); + ev.v[3] += static_cast(v3); + ev.v[4] += static_cast(v4); + ev.v[5] += static_cast(v5); } if (vflag_atom) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; - - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + a_vatom(i,0) += static_cast(static_cast(0.5) * v0); + a_vatom(i,1) += static_cast(static_cast(0.5) * v1); + a_vatom(i,2) += static_cast(static_cast(0.5) * v2); + a_vatom(i,3) += static_cast(static_cast(0.5) * v3); + a_vatom(i,4) += static_cast(static_cast(0.5) * v4); + a_vatom(i,5) += static_cast(static_cast(0.5) * v5); + + a_vatom(j,0) += static_cast(static_cast(0.5) * v0); + a_vatom(j,1) += static_cast(static_cast(0.5) * v1); + a_vatom(j,2) += static_cast(static_cast(0.5) * v2); + a_vatom(j,3) += static_cast(static_cast(0.5) * v3); + a_vatom(j,4) += static_cast(static_cast(0.5) * v4); + a_vatom(j,5) += static_cast(static_cast(0.5) * v5); } } } @@ -1066,40 +1068,32 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; - v[0] = (drij[0]*fj[0] + drik[0]*fk[0]); - v[1] = (drij[1]*fj[1] + drik[1]*fk[1]); - v[2] = (drij[2]*fj[2] + drik[2]*fk[2]); - v[3] = (drij[0]*fj[1] + drik[0]*fk[1]); - v[4] = (drij[0]*fj[2] + drik[0]*fk[2]); - v[5] = (drij[1]*fj[2] + drik[1]*fk[2]); + v[0] = (static_cast(drij[0])*fj[0] + static_cast(drik[0])*fk[0]); + v[1] = (static_cast(drij[1])*fj[1] + static_cast(drik[1])*fk[1]); + v[2] = (static_cast(drij[2])*fj[2] + static_cast(drik[2])*fk[2]); + v[3] = (static_cast(drij[0])*fj[1] + static_cast(drik[0])*fk[1]); + v[4] = (static_cast(drij[0])*fj[2] + static_cast(drik[0])*fk[2]); + v[5] = (static_cast(drij[1])*fj[2] + static_cast(drik[1])*fk[2]); if (vflag_global) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v[n]; } if (vflag_atom) { - v[0] *= THIRD; - v[1] *= THIRD; - v[2] *= THIRD; - v[3] *= THIRD; - v[4] *= THIRD; - v[5] *= THIRD; + for (int n = 0; n < 6; n++) + v[n] *= static_cast(THIRD); - a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2]; - a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5]; + for (int n = 0; n < 6; n++) + a_vatom(i,n) += v[n]; - a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2]; - a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5]; + for (int n = 0; n < 6; n++) + a_vatom(j,n) += v[n]; - a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2]; - a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; + for (int n = 0; n < 6; n++) + a_vatom(k,n) += v[n]; } } @@ -1111,27 +1105,23 @@ void PairTersoffKokkos::v_tally3_atom(EV_FLOAT &ev, const int &i, co const int & /*k*/, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk, KK_FLOAT *drji, KK_FLOAT *drjk) const { - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; - v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]); - v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]); - v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]); - v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]); - v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]); - v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]); + v[0] = static_cast(THIRD) * (static_cast(drji[0])*fj[0] + static_cast(drjk[0])*fk[0]); + v[1] = static_cast(THIRD) * (static_cast(drji[1])*fj[1] + static_cast(drjk[1])*fk[1]); + v[2] = static_cast(THIRD) * (static_cast(drji[2])*fj[2] + static_cast(drjk[2])*fk[2]); + v[3] = static_cast(THIRD) * (static_cast(drji[0])*fj[1] + static_cast(drjk[0])*fk[1]); + v[4] = static_cast(THIRD) * (static_cast(drji[0])*fj[2] + static_cast(drjk[0])*fk[2]); + v[5] = static_cast(THIRD) * (static_cast(drji[1])*fj[2] + static_cast(drjk[1])*fk[2]); if (vflag_global) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v[n]; } if (vflag_atom) { - d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2]; - d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i,n) += v[n]; } } diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index 3d914794774..46e9c6c3e23 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -29,6 +29,62 @@ PairStyle(tersoff/kk/host,PairTersoffKokkos); namespace LAMMPS_NS { +struct ParamKokkos { + KK_FLOAT lam1, lam2, lam3; + KK_FLOAT c, d, h; + KK_FLOAT gamma, powerm; + KK_FLOAT powern, beta; + KK_FLOAT biga, bigb, bigd, bigr; + KK_FLOAT cut, cutsq; + KK_FLOAT c1, c2, c3, c4; + int ielement, jelement, kelement; + int powermint; + KK_FLOAT Z_i, Z_j; // added for TersoffZBL + KK_FLOAT ZBLcut, ZBLexpscale; + KK_FLOAT c5, ca1, ca4; // added for TersoffMOD + KK_FLOAT powern_del; + KK_FLOAT c0; // added for TersoffMODC + + // convenient = operator + ParamKokkos& operator=(const PairTersoff::Param& other) { + lam1 = static_cast(other.lam1); + lam2 = static_cast(other.lam2); + lam3 = static_cast(other.lam3); + c = static_cast(other.c); + d = static_cast(other.d); + h = static_cast(other.h); + gamma = static_cast(other.gamma); + powerm = static_cast(other.powerm); + powern = static_cast(other.powern); + beta = static_cast(other.beta); + biga = static_cast(other.biga); + bigb = static_cast(other.bigb); + bigd = static_cast(other.bigd); + bigr = static_cast(other.bigr); + cut = static_cast(other.cut); + cutsq = static_cast(other.cutsq); + c1 = static_cast(other.c1); + c2 = static_cast(other.c2); + c3 = static_cast(other.c3); + c4 = static_cast(other.c4); + ielement = other.ielement; + jelement = other.jelement; + kelement = other.kelement; + powermint = other.powermint; + Z_i = static_cast(other.Z_i); + Z_j = static_cast(other.Z_j); + ZBLcut = static_cast(other.ZBLcut); + ZBLexpscale = static_cast(other.ZBLexpscale); + c5 = static_cast(other.c5); + ca1 = static_cast(other.ca1); + ca4 = static_cast(other.ca4); + powern_del = static_cast(other.powern_del); + c0 = static_cast(other.c0); + return *this; + } + +}; + template struct TagPairTersoffCompute{}; @@ -90,100 +146,91 @@ class PairTersoffKokkos : public PairTersoff { void tersoff_compute(const int&, EV_FLOAT&) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_fc_k(const Param& param, const KK_FLOAT &r) const; + KK_FLOAT ters_fc_k(const ParamKokkos& param, const KK_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_dfc(const Param& param, const KK_FLOAT &r) const; + KK_FLOAT ters_dfc(const ParamKokkos& param, const KK_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - void ters_fc_k_and_ters_dfc(const Param& param, const KK_FLOAT &r, KK_FLOAT &fc, KK_FLOAT &dfc) const; + void ters_fc_k_and_ters_dfc(const ParamKokkos& param, const KK_FLOAT &r, KK_FLOAT &fc, KK_FLOAT &dfc) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_fa_k(const Param& param, const KK_FLOAT &r) const; + KK_FLOAT ters_fa_k(const ParamKokkos& param, const KK_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_dfa(const Param& param, const KK_FLOAT &r) const; + KK_FLOAT ters_dfa(const ParamKokkos& param, const KK_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - void ters_fa_k_and_ters_dfa(const Param& param, const KK_FLOAT &r, KK_FLOAT &fa, KK_FLOAT &dfa) const; + void ters_fa_k_and_ters_dfa(const ParamKokkos& param, const KK_FLOAT &r, KK_FLOAT &fa, KK_FLOAT &dfa) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_bij_k(const Param& param, const KK_FLOAT &bo) const; + KK_FLOAT ters_bij_k(const ParamKokkos& param, const KK_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_dbij(const Param& param, const KK_FLOAT &bo) const; + KK_FLOAT ters_dbij(const ParamKokkos& param, const KK_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - void ters_bij_k_and_ters_dbij(const Param& param, const KK_FLOAT &bo, KK_FLOAT &bij, KK_FLOAT &prefactor) const; + void ters_bij_k_and_ters_dbij(const ParamKokkos& param, const KK_FLOAT &bo, KK_FLOAT &bij, KK_FLOAT &prefactor) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT bondorder(const Param& param, + KK_FLOAT bondorder(const ParamKokkos& param, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_gijk(const Param& param, const KK_FLOAT &cos) const; + KK_FLOAT ters_gijk(const ParamKokkos& param, const KK_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - KK_FLOAT ters_dgijk(const Param& param, const KK_FLOAT &cos) const; + KK_FLOAT ters_dgijk(const ParamKokkos& param, const KK_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - void ters_gijk_and_ters_dgijk(const Param& param, const KK_FLOAT &cos, KK_FLOAT& gijk, KK_FLOAT& dgijk) const; + void ters_gijk_and_ters_dgijk(const ParamKokkos& param, const KK_FLOAT &cos, KK_FLOAT& gijk, KK_FLOAT& dgijk) const; KOKKOS_INLINE_FUNCTION - void ters_dthb(const Param& param, const KK_FLOAT &prefactor, + void ters_dthb(const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fi, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION - void ters_dthbj(const Param& param, const KK_FLOAT &prefactor, + void ters_dthbj(const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION - void ters_dthbk(const Param& param, const KK_FLOAT &prefactor, + void ters_dthbk(const ParamKokkos& param, const KK_FLOAT &prefactor, const KK_FLOAT &rij, const KK_FLOAT &dx1, const KK_FLOAT &dy1, const KK_FLOAT &dz1, const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION KK_FLOAT vec3_dot(const KK_FLOAT x[3], const KK_FLOAT y[3]) const { - return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + KK_FLOAT dot = 0; + for (int i = 0; i < 3; i++) + dot += x[i]*y[i]; + return dot; } KOKKOS_INLINE_FUNCTION void vec3_add(const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_FLOAT y[3]) const { - y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; - } - -#ifdef LMP_KOKKOS_SINGLE_DOUBLE - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; + for (int i = 0; i < 3; i++) + z[i] = x[i]+y[i]; } + template KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_ACC_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; + void vec3_scale(const k_type k, const x_type x[3], y_type y[3]) const { + for (int i = 0; i < 3; i++) + y[i] = static_cast(static_cast(k)*x[i]); } - + + template KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_ACC_FLOAT y[3], KK_ACC_FLOAT * const z) const { - z[0] = (KK_ACC_FLOAT)k*x[0]+y[0]; z[1] = (KK_ACC_FLOAT)k*x[1]+y[1]; z[2] = (KK_ACC_FLOAT)k*x[2]+y[2]; + void vec3_scaleadd(const kx_type k, const kx_type x[3], const yz_type y[3], yz_type z[3]) const { + for (int i = 0; i < 3; i++) + z[i] = static_cast(k*x[i])+y[i]; } -#endif KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const; @@ -209,12 +256,14 @@ class PairTersoffKokkos : public PairTersoff { typename AT::t_int_3d_randomread d_elem3param; typename AT::t_int_1d_randomread d_map; - typedef Kokkos::DualView tdual_param_1d; + typedef Kokkos::DualView tdual_param_1d; typedef typename tdual_param_1d::t_dev t_param_1d; typedef typename tdual_param_1d::t_host t_host_param_1d; t_param_1d d_params; + KK_FLOAT cutmax_sq; + int inum; typename AT::t_kkfloat_1d_3_lr_randomread x; typename AT::t_kkacc_1d_3 f; From 98985bbf60fc2475afdeb2bdc6df75e3484de32c Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:31:06 -0700 Subject: [PATCH 098/604] Minor code clean-up in the Kokkos Tersoff headers, replacing multiple overloads of utility functions for combinations of float/double with templated versions. --- src/KOKKOS/pair_tersoff_kokkos.h | 2 +- src/KOKKOS/pair_tersoff_mod_kokkos.h | 39 +++++++++++----------------- src/KOKKOS/pair_tersoff_zbl_kokkos.h | 38 ++++++++++----------------- 3 files changed, 30 insertions(+), 49 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h index 46e9c6c3e23..eb6b54f2754 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.h +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -224,7 +224,7 @@ class PairTersoffKokkos : public PairTersoff { for (int i = 0; i < 3; i++) y[i] = static_cast(static_cast(k)*x[i]); } - + template KOKKOS_INLINE_FUNCTION void vec3_scaleadd(const kx_type k, const kx_type x[3], const yz_type y[3], yz_type z[3]) const { diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.h b/src/KOKKOS/pair_tersoff_mod_kokkos.h index b58a0a4e7db..27aeca670e8 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.h +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h @@ -136,43 +136,34 @@ class PairTersoffMODKokkos : public PairTersoffMOD { const KK_FLOAT &rik, const KK_FLOAT &dx2, const KK_FLOAT &dy2, const KK_FLOAT &dz2, KK_ACC_FLOAT *fk) const; + KOKKOS_INLINE_FUNCTION KK_FLOAT vec3_dot(const KK_FLOAT x[3], const KK_FLOAT y[3]) const { - return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + KK_FLOAT dot = 0; + for (int i = 0; i < 3; i++) + dot += x[i]*y[i]; + return dot; } KOKKOS_INLINE_FUNCTION void vec3_add(const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_FLOAT y[3]) const { - y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; - } - -#ifdef LMP_KOKKOS_SINGLE_DOUBLE - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; + for (int i = 0; i < 3; i++) + z[i] = x[i]+y[i]; } + template KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_ACC_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; + void vec3_scale(const k_type k, const x_type x[3], y_type y[3]) const { + for (int i = 0; i < 3; i++) + y[i] = static_cast(static_cast(k)*x[i]); } + template KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_ACC_FLOAT y[3], KK_ACC_FLOAT * const z) const { - z[0] = (KK_ACC_FLOAT)k*x[0]+y[0]; z[1] = (KK_ACC_FLOAT)k*x[1]+y[1]; z[2] = (KK_ACC_FLOAT)k*x[2]+y[2]; + void vec3_scaleadd(const kx_type k, const kx_type x[3], const yz_type y[3], yz_type z[3]) const { + for (int i = 0; i < 3; i++) + z[i] = static_cast(k*x[i])+y[i]; } -#endif - KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const; diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.h b/src/KOKKOS/pair_tersoff_zbl_kokkos.h index 6f44eca42f4..723ca7adff1 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.h +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h @@ -138,42 +138,32 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { KOKKOS_INLINE_FUNCTION KK_FLOAT vec3_dot(const KK_FLOAT x[3], const KK_FLOAT y[3]) const { - return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + KK_FLOAT dot = 0; + for (int i = 0; i < 3; i++) + dot += x[i]*y[i]; + return dot; } KOKKOS_INLINE_FUNCTION void vec3_add(const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + for (int i = 0; i < 3; i++) + z[i] = x[i]+y[i]; } + template KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_FLOAT y[3]) const { - y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + void vec3_scale(const k_type k, const x_type x[3], y_type y[3]) const { + for (int i = 0; i < 3; i++) + y[i] = static_cast(static_cast(k)*x[i]); } + template KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_FLOAT y[3], KK_FLOAT * const z) const { - z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; + void vec3_scaleadd(const kx_type k, const kx_type x[3], const yz_type y[3], yz_type z[3]) const { + for (int i = 0; i < 3; i++) + z[i] = static_cast(k*x[i])+y[i]; } -#ifdef LMP_KOKKOS_SINGLE_DOUBLE - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scale(const KK_FLOAT k, const KK_ACC_FLOAT x[3], KK_ACC_FLOAT y[3]) const { - y[0] = (KK_ACC_FLOAT)k*x[0]; y[1] = (KK_ACC_FLOAT)k*x[1]; y[2] = (KK_ACC_FLOAT)k*x[2]; - } - - KOKKOS_INLINE_FUNCTION - void vec3_scaleadd(const KK_FLOAT k, const KK_FLOAT x[3], const KK_ACC_FLOAT y[3], KK_ACC_FLOAT * const z) const { - z[0] = (KK_ACC_FLOAT)k*x[0]+y[0]; z[1] = (KK_ACC_FLOAT)k*x[1]+y[1]; z[2] = (KK_ACC_FLOAT)k*x[2]+y[2]; - } -#endif - - KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const; From e056cd959670d19e8390e907dd30fda5a94f00f2 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:50:35 -0700 Subject: [PATCH 099/604] Removed silent conversions from pppm_kokkos*, as relevant for Rhodopsin, SPC/E, and other electrostatics workflows. I also changed the initialization-only preprocessing kernels to intentionally always run in full double, which can be reverted. --- src/KOKKOS/pppm_kokkos.cpp | 457 +++++++++++++++++++++---------------- src/KOKKOS/pppm_kokkos.h | 28 ++- 2 files changed, 274 insertions(+), 211 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 663d99ccb24..457fd0634b5 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -65,7 +65,7 @@ PPPMKokkos::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp) // see JCP 109, pg 7698 for derivation of coefficients // higher order coefficients may be computed if needed - acons = typename Kokkos::DualView::t_host("pppm:acons"); + acons = typename Kokkos::DualView::t_host("pppm:acons"); acons(1,0) = 2.0 / 3.0; acons(2,0) = 1.0 / 50.0; acons(2,1) = 5.0 / 294.0; @@ -194,6 +194,8 @@ void PPPMKokkos::init() scale = 1.0; qqrd2e = force->qqrd2e; qsum_qsq(); + qscale = qqrd2e * scale; + qscale_kk = static_cast(qscale); natoms_original = atom->natoms; // set accuracy (force units) from accuracy_relative or accuracy_absolute @@ -221,6 +223,10 @@ void PPPMKokkos::init() set_grid_global(); set_grid_local(); + // set appropriately cast shifts + shift_kk = static_cast(shift); + shiftone_kk = static_cast(shiftone); + if (overlap_allowed) break; gc = new Grid3dKokkos(lmp,world,nx_pppm,ny_pppm,nz_pppm); @@ -249,6 +255,8 @@ void PPPMKokkos::init() // adjust g_ewald if (!gewaldflag) adjust_gewald(); + g_ewald_kk = static_cast(g_ewald); + g_ewald_inv_kk = static_cast(1.0 / g_ewald); // calculate the final accuracy @@ -332,10 +340,19 @@ void PPPMKokkos::setup() delvolinv = delxinv*delyinv*delzinv; + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + delvolinv_kk = static_cast(delvolinv); + unitkx = (MY_2PI/xprd); unitky = (MY_2PI/yprd); unitkz = (MY_2PI/zprd_slab); + unitkx_kk = static_cast(unitkx); + unitky_kk = static_cast(unitky); + unitkz_kk = static_cast(unitkz); + // d_fkx,d_fky,d_fkz for my FFT grid pts copymode = 1; @@ -368,24 +385,27 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_setup1, const int &i) const { - KK_FLOAT per = i - nx_pppm*(2*i/nx_pppm); - d_fkx[i-nxlo_fft] = unitkx*per; + // keep this is double because it's run in setup + double per = static_cast(i - nx_pppm*(2*i/nx_pppm)); + d_fkx[i-nxlo_fft] = static_cast(unitkx*per); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_setup2, const int &i) const { - KK_FLOAT per = i - ny_pppm*(2*i/ny_pppm); - d_fky[i-nylo_fft] = unitky*per; + // keep this is double because it's run in setup + double per = static_cast(i - ny_pppm*(2*i/ny_pppm)); + d_fky[i-nylo_fft] = static_cast(unitky*per); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_setup3, const int &i) const { - KK_FLOAT per = i - nz_pppm*(2*i/nz_pppm); - d_fkz[i-nzlo_fft] = unitkz*per; + // keep this is double because it's run in setup + double per = static_cast(i - nz_pppm*(2*i/nz_pppm)); + d_fkz[i-nzlo_fft] = static_cast(unitkz*per); } template @@ -396,18 +416,18 @@ void PPPMKokkos::operator()(TagPPPM_setup4, const int &n) const const int j = (n - k*numy_fft*numx_fft) / numx_fft; const int i = n - k*numy_fft*numx_fft - j*numx_fft; const KK_FLOAT sqk = d_fkx[i]*d_fkx[i] + d_fky[j]*d_fky[j] + d_fkz[k]*d_fkz[k]; - if (sqk == 0.0) { - d_vg(n,0) = 0.0; - d_vg(n,1) = 0.0; - d_vg(n,2) = 0.0; - d_vg(n,3) = 0.0; - d_vg(n,4) = 0.0; - d_vg(n,5) = 0.0; + if (sqk == 0) { + d_vg(n,0) = 0; + d_vg(n,1) = 0; + d_vg(n,2) = 0; + d_vg(n,3) = 0; + d_vg(n,4) = 0; + d_vg(n,5) = 0; } else { - const KK_FLOAT vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - d_vg(n,0) = 1.0 + vterm*d_fkx[i]*d_fkx[i]; - d_vg(n,1) = 1.0 + vterm*d_fky[j]*d_fky[j]; - d_vg(n,2) = 1.0 + vterm*d_fkz[k]*d_fkz[k]; + const KK_FLOAT vterm = static_cast(-2.0) * (static_cast(1.0) / sqk + static_cast(0.25) * g_ewald_inv_kk * g_ewald_inv_kk); + d_vg(n,0) = static_cast(1.0) + vterm*d_fkx[i]*d_fkx[i]; + d_vg(n,1) = static_cast(1.0) + vterm*d_fky[j]*d_fky[j]; + d_vg(n,2) = static_cast(1.0) + vterm*d_fkz[k]*d_fkz[k]; d_vg(n,3) = vterm*d_fkx[i]*d_fky[j]; d_vg(n,4) = vterm*d_fkx[i]*d_fkz[k]; d_vg(n,5) = vterm*d_fky[j]*d_fkz[k]; @@ -446,6 +466,11 @@ void PPPMKokkos::setup_triclinic() delzinv = nz_pppm; delvolinv = delxinv*delyinv*delzinv/volume; + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + delvolinv_kk = static_cast(delvolinv); + numz_fft = nzhi_fft-nzlo_fft + 1; numy_fft = nyhi_fft-nylo_fft + 1; numx_fft = nxhi_fft-nxlo_fft + 1; @@ -475,18 +500,18 @@ void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &n) j += nylo_fft; i += nxlo_fft; - KK_FLOAT per_k = k - nz_pppm*(2*k/nz_pppm); - KK_FLOAT per_j = j - ny_pppm*(2*j/ny_pppm); - KK_FLOAT per_i = i - nx_pppm*(2*i/nx_pppm); + double per_k = static_cast(k - nz_pppm*(2*k/nz_pppm)); + double per_j = static_cast(j - ny_pppm*(2*j/ny_pppm)); + double per_i = static_cast(i - nx_pppm*(2*i/nx_pppm)); - KK_FLOAT unitk_lamda[3]; + double unitk_lamda[3]; unitk_lamda[0] = 2.0*MY_PI*per_i; unitk_lamda[1] = 2.0*MY_PI*per_j; unitk_lamda[2] = 2.0*MY_PI*per_k; x2lamdaT_kokkos(&unitk_lamda[0],&unitk_lamda[0]); - d_fkx[n] = unitk_lamda[0]; - d_fky[n] = unitk_lamda[1]; - d_fkz[n] = unitk_lamda[2]; + d_fkx[n] = static_cast(unitk_lamda[0]); + d_fky[n] = static_cast(unitk_lamda[1]); + d_fkz[n] = static_cast(unitk_lamda[2]); } template @@ -494,18 +519,18 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_setup_triclinic2, const int &n) const { const KK_FLOAT sqk = d_fkx[n]*d_fkx[n] + d_fky[n]*d_fky[n] + d_fkz[n]*d_fkz[n]; - if (sqk == 0.0) { - d_vg(n,0) = 0.0; - d_vg(n,1) = 0.0; - d_vg(n,2) = 0.0; - d_vg(n,3) = 0.0; - d_vg(n,4) = 0.0; - d_vg(n,5) = 0.0; + if (sqk == 0) { + d_vg(n,0) = 0; + d_vg(n,1) = 0; + d_vg(n,2) = 0; + d_vg(n,3) = 0; + d_vg(n,4) = 0; + d_vg(n,5) = 0; } else { - const KK_FLOAT vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - d_vg(n,0) = 1.0 + vterm*d_fkx[n]*d_fkx[n]; - d_vg(n,1) = 1.0 + vterm*d_fky[n]*d_fky[n]; - d_vg(n,2) = 1.0 + vterm*d_fkz[n]*d_fkz[n]; + const KK_FLOAT vterm = static_cast(-2.0) * (static_cast(1.0) / sqk + static_cast(0.25) * g_ewald_inv_kk * g_ewald_inv_kk); + d_vg(n,0) = static_cast(1.0) + vterm*d_fkx[n]*d_fkx[n]; + d_vg(n,1) = static_cast(1.0) + vterm*d_fky[n]*d_fky[n]; + d_vg(n,2) = static_cast(1.0) + vterm*d_fkz[n]*d_fkz[n]; d_vg(n,3) = vterm*d_fkx[n]*d_fky[n]; d_vg(n,4) = vterm*d_fkx[n]*d_fkz[n]; d_vg(n,5) = vterm*d_fky[n]*d_fkz[n]; @@ -571,9 +596,14 @@ void PPPMKokkos::compute(int eflag, int vflag) boxlo[0] = domain->boxlo_lamda[0]; boxlo[1] = domain->boxlo_lamda[1]; boxlo[2] = domain->boxlo_lamda[2]; + domain->x2lamda(atomKK->nlocal); } + boxlo_kk[0] = static_cast(domain->boxlo[0]); + boxlo_kk[1] = static_cast(domain->boxlo[1]); + boxlo_kk[2] = static_cast(domain->boxlo[2]); + // extend size of per-atom arrays if necessary if (atom->nmax > nmax) { @@ -690,17 +720,19 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_self1, const int &i) const { - d_eatom[i] *= 0.5; - d_eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - d_eatom[i] *= qscale; + KK_ACC_FLOAT e_self = d_eatom[i]; + e_self *= static_cast(0.5); + e_self -= static_cast(g_ewald_kk*q[i]*q[i]/static_cast(MY_PIS) + + static_cast(MY_PI2)*q[i]*static_cast(qsum) / (g_ewald_kk*g_ewald_kk*static_cast(volume))); + e_self *= static_cast(qscale); + d_eatom[i] = e_self; } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_self2, const int &i) const { - for (int j = 0; j < 6; j++) d_vatom(i,j) *= 0.5*qscale; + for (int j = 0; j < 6; j++) d_vatom(i,j) *= static_cast(0.5*qscale); } /* ---------------------------------------------------------------------- @@ -779,7 +811,7 @@ void PPPMKokkos::allocate() // summation coeffs order_allocated = order; - k_gf_b = typename DAT::tdual_kkfloat_1d("pppm:gf_b",order); + k_gf_b = typename DAT::tdual_double_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); @@ -881,9 +913,9 @@ double PPPMKokkos::estimate_ik_error(double h, double prd, bigint na { double sum = 0.0; for (int m = 0; m < order; m++) - sum += acons(order,m) * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); + sum += acons(order,m) * pow(h*g_ewald,2.0*static_cast(m)); + double value = q2 * pow(h*g_ewald,static_cast(order)) * + sqrt(g_ewald*prd*sqrt(MY_2PI)*sum / static_cast(natoms)) / (prd*prd); return value; } @@ -903,6 +935,9 @@ void PPPMKokkos::set_grid_local() boxlo[0] = domain->boxlo[0]; boxlo[1] = domain->boxlo[1]; boxlo[2] = domain->boxlo[2]; + boxlo_kk[0] = static_cast(domain->boxlo[0]); + boxlo_kk[1] = static_cast(domain->boxlo[1]); + boxlo_kk[2] = static_cast(domain->boxlo[2]); } /* ---------------------------------------------------------------------- @@ -914,18 +949,23 @@ void PPPMKokkos::compute_gf_denom() { int k,l,m; - for (l = 1; l < order; l++) k_gf_b.view_host()[l] = 0.0; + // keep the calculation in double + for (l = 1; l < order; l++) k_gf_b.view_host()[l] = 0; k_gf_b.view_host()[0] = 1.0; for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - k_gf_b.view_host()[l] = 4.0 * (k_gf_b.view_host()[l]*(l-m)*(l-m-0.5)-k_gf_b.view_host()[l-1]*(l-m-1)*(l-m-1)); - k_gf_b.view_host()[0] = 4.0 * (k_gf_b.view_host()[0]*(l-m)*(l-m-0.5)); + double m_double = static_cast(m); + for (l = m; l > 0; l--) { + double l_double = static_cast(l); + k_gf_b.view_host()[l] = 4.0 * (k_gf_b.view_host()[l]*(l_double-m_double)*(l_double-m_double-0.5)-k_gf_b.view_host()[l-1]*(l_double-m_double-1.0)*(l_double-m_double-1.0)); + } + double l_double = static_cast(l); + k_gf_b.view_host()[0] = 4.0 * (k_gf_b.view_host()[0]*(l_double-m_double)*(l_double-m_double-0.5)); } bigint ifact = 1; for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; + double gaminv = 1.0/static_cast(ifact); for (l = 0; l < order; l++) k_gf_b.view_host()[l] *= gaminv; k_gf_b.modify_host(); @@ -949,6 +989,10 @@ void PPPMKokkos::compute_gf_ik() unitky = (MY_2PI/yprd); unitkz = (MY_2PI/zprd_slab); + unitkx_kk = static_cast(unitkx); + unitky_kk = static_cast(unitky); + unitkz_kk = static_cast(unitkz); + nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25)); nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * @@ -980,48 +1024,50 @@ void PPPMKokkos::operator()(TagPPPM_compute_gf_ik, const int &n) con l += nylo_fft; k += nxlo_fft; - const int mper = m - nz_pppm*(2*m/nz_pppm); - const KK_FLOAT snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + // we compute this in pure double to preserve the precision of Fourier angles + // in theory this will get called infrequently anyway + const double mper = static_cast(m - nz_pppm*(2*m/nz_pppm)); + const double snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); - const int lper = l - ny_pppm*(2*l/ny_pppm); - const KK_FLOAT sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + const double lper = static_cast(l - ny_pppm*(2*l/ny_pppm)); + const double sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); - const int kper = k - nx_pppm*(2*k/nx_pppm); - const KK_FLOAT snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + const double kper = static_cast(k - nx_pppm*(2*k/nx_pppm)); + const double snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); - const KK_FLOAT sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + const double sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - if (sqk != 0.0) { - const KK_FLOAT numerator = 12.5663706/sqk; - const KK_FLOAT denominator = gf_denom(snx,sny,snz); - KK_ACC_FLOAT sum1 = 0.0; + if (sqk != 0) { + const double numerator = 12.5663706/sqk; + const double denominator = gf_denom(snx,sny,snz); + double sum1 = 0; for (int nx = -nbx; nx <= nbx; nx++) { - const KK_FLOAT qx = unitkx*(kper+nx_pppm*nx); - const KK_FLOAT sx = exp(-0.25*square(qx/g_ewald)); - const KK_FLOAT argx = 0.5*qx*xprd/nx_pppm; - const KK_FLOAT wx = powsinxx(argx,twoorder); + const double qx = unitkx*(kper+nx_pppm*nx); + const double sx = exp(-0.25*square(qx/g_ewald)); + const double argx = 0.5*qx*xprd/nx_pppm; + const double wx = powsinxx(argx,twoorder); for (int ny = -nby; ny <= nby; ny++) { - const KK_FLOAT qy = unitky*(lper+ny_pppm*ny); - const KK_FLOAT sy = exp(-0.25*square(qy/g_ewald)); - const KK_FLOAT argy = 0.5*qy*yprd/ny_pppm; - const KK_FLOAT wy = powsinxx(argy,twoorder); + const double qy = unitky*(lper+ny_pppm*ny); + const double sy = exp(-0.25*square(qy/g_ewald)); + const double argy = 0.5*qy*yprd/ny_pppm; + const double wy = powsinxx(argy,twoorder); for (int nz = -nbz; nz <= nbz; nz++) { - const KK_FLOAT qz = unitkz*(mper+nz_pppm*nz); - const KK_FLOAT sz = exp(-0.25*square(qz/g_ewald)); - const KK_FLOAT argz = 0.5*qz*zprd_slab/nz_pppm; - const KK_FLOAT wz = powsinxx(argz,twoorder); + const double qz = unitkz*(mper+nz_pppm*nz); + const double sz = exp(-0.25*square(qz/g_ewald)); + const double argz = 0.5*qz*zprd_slab/nz_pppm; + const double wz = powsinxx(argz,twoorder); - const KK_FLOAT dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - const KK_FLOAT dot2 = qx*qx+qy*qy+qz*qz; + const double dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + const double dot2 = qx*qx+qy*qy+qz*qz; sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; } } } - d_greensfn[n] = numerator*sum1/denominator; - } else d_greensfn[n] = 0.0; + d_greensfn[n] = static_cast(numerator * sum1 / denominator); + } else d_greensfn[n] = 0; } /* ---------------------------------------------------------------------- @@ -1058,65 +1104,67 @@ void PPPMKokkos::operator()(TagPPPM_compute_gf_ik_triclinic, const i { int n = (m - nzlo_fft)*(nyhi_fft+1 - nylo_fft)*(nxhi_fft+1 - nxlo_fft); - const int mper = m - nz_pppm*(2*m/nz_pppm); - const KK_FLOAT snz = square(sin(MY_PI*mper/nz_pppm)); + // we compute this in pure double to preserve the precision of Fourier angles + // in theory this will get called infrequently anyway + const double mper = static_cast(m - nz_pppm*(2*m/nz_pppm)); + const double snz = square(sin(MY_PI*mper/nz_pppm)); for (int l = nylo_fft; l <= nyhi_fft; l++) { - const int lper = l - ny_pppm*(2*l/ny_pppm); - const KK_FLOAT sny = square(sin(MY_PI*lper/ny_pppm)); + const double lper = static_cast(l - ny_pppm*(2*l/ny_pppm)); + const double sny = square(sin(MY_PI*lper/ny_pppm)); for (int k = nxlo_fft; k <= nxhi_fft; k++) { - const int kper = k - nx_pppm*(2*k/nx_pppm); - const KK_FLOAT snx = square(sin(MY_PI*kper/nx_pppm)); + const double kper = static_cast(k - nx_pppm*(2*k/nx_pppm)); + const double snx = square(sin(MY_PI*kper/nx_pppm)); - KK_FLOAT unitk_lamda[3]; + double unitk_lamda[3]; unitk_lamda[0] = 2.0*MY_PI*kper; unitk_lamda[1] = 2.0*MY_PI*lper; unitk_lamda[2] = 2.0*MY_PI*mper; x2lamdaT_kokkos(&unitk_lamda[0],&unitk_lamda[0]); - const KK_FLOAT sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); + const double sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - if (sqk != 0.0) { - const KK_FLOAT numerator = 12.5663706/sqk; - const KK_FLOAT denominator = gf_denom(snx,sny,snz); - KK_ACC_FLOAT sum1 = 0.0; + if (sqk != 0) { + const double numerator = 12.5663706/sqk; + const double denominator = gf_denom(snx,sny,snz); + double sum1 = 0; for (int nx = -nbx; nx <= nbx; nx++) { - const KK_FLOAT argx = MY_PI*kper/nx_pppm + MY_PI*nx; - const KK_FLOAT wx = powsinxx(argx,twoorder); + const double argx = MY_PI*kper/nx_pppm + MY_PI*nx; + const double wx = powsinxx(argx,twoorder); for (int ny = -nby; ny <= nby; ny++) { - const KK_FLOAT argy = MY_PI*lper/ny_pppm + MY_PI*ny; - const KK_FLOAT wy = powsinxx(argy,twoorder); + const double argy = MY_PI*lper/ny_pppm + MY_PI*ny; + const double wy = powsinxx(argy,twoorder); for (int nz = -nbz; nz <= nbz; nz++) { - const KK_FLOAT argz = MY_PI*mper/nz_pppm + MY_PI*nz; - const KK_FLOAT wz = powsinxx(argz,twoorder); + const double argz = MY_PI*mper/nz_pppm + MY_PI*nz; + const double wz = powsinxx(argz,twoorder); - KK_FLOAT b[3]; + double b[3]; b[0] = 2.0*MY_PI*nx_pppm*nx; b[1] = 2.0*MY_PI*ny_pppm*ny; b[2] = 2.0*MY_PI*nz_pppm*nz; x2lamdaT_kokkos(&b[0],&b[0]); - const KK_FLOAT qx = unitk_lamda[0]+b[0]; - const KK_FLOAT sx = exp(-0.25*square(qx/g_ewald)); + const double qx = unitk_lamda[0]+b[0]; + const double sx = exp(-0.25*square(qx / g_ewald)); - const KK_FLOAT qy = unitk_lamda[1]+b[1]; - const KK_FLOAT sy = exp(-0.25*square(qy/g_ewald)); + const double qy = unitk_lamda[1]+b[1]; + const double sy = exp(-0.25*square(qy / g_ewald)); - const KK_FLOAT qz = unitk_lamda[2]+b[2]; - const KK_FLOAT sz = exp(-0.25*square(qz/g_ewald)); + const double qz = unitk_lamda[2]+b[2]; + const double sz = exp(-0.25*square(qz / g_ewald)); - const KK_FLOAT dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; - const KK_FLOAT dot2 = qx*qx+qy*qy+qz*qz; + const double dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; + const double dot2 = qx*qx+qy*qy+qz*qz; sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; } } } - d_greensfn[n++] = numerator*sum1/denominator; - } else d_greensfn[n++] = 0.0; + d_greensfn[n++] = static_cast(numerator * sum1 / denominator); + } else d_greensfn[n++] = 0; } } } @@ -1131,6 +1179,7 @@ template void PPPMKokkos::particle_map() { int nlocal = atomKK->nlocal; + shift_kk = static_cast(shift); k_flag.view_host()() = 0; k_flag.modify_host(); @@ -1157,9 +1206,9 @@ void PPPMKokkos::operator()(TagPPPM_particle_map, const int &i) cons // current particle coord can be outside global and local box // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - const int nx = static_cast ((x(i,0)-boxlo[0])*delxinv+shift) - OFFSET; - const int ny = static_cast ((x(i,1)-boxlo[1])*delyinv+shift) - OFFSET; - const int nz = static_cast ((x(i,2)-boxlo[2])*delzinv+shift) - OFFSET; + const int nx = static_cast ((x(i,0)-boxlo_kk[0])*delxinv_kk+shift_kk) - OFFSET; + const int ny = static_cast ((x(i,1)-boxlo_kk[1])*delyinv_kk+shift_kk) - OFFSET; + const int nz = static_cast ((x(i,2)-boxlo_kk[2])*delzinv_kk+shift_kk) - OFFSET; d_part2grid(i,0) = nx; d_part2grid(i,1) = ny; @@ -1223,7 +1272,7 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_zero, const int &ii) co int iz = ii/(numy_out*numx_out); int iy = (ii - iz*numy_out*numx_out) / numx_out; int ix = ii - iz*numy_out*numx_out - iy*numx_out; - d_density_brick(iz,iy,ix) = 0.0; + d_density_brick(iz,iy,ix) = 0; } template @@ -1236,10 +1285,9 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c int nx = d_part2grid(i,0); int ny = d_part2grid(i,1); int nz = d_part2grid(i,2); - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; - + const FFT_SCALAR dx = static_cast(static_cast(nx)+shiftone_kk - (x(i,0)-boxlo_kk[0])*delxinv_kk); + const FFT_SCALAR dy = static_cast(static_cast(ny)+shiftone_kk - (x(i,1)-boxlo_kk[1])*delyinv_kk); + const FFT_SCALAR dz = static_cast(static_cast(nz)+shiftone_kk - (x(i,2)-boxlo_kk[2])*delzinv_kk); nz -= nzlo_out; ny -= nylo_out; @@ -1247,7 +1295,7 @@ void PPPMKokkos::operator()(TagPPPM_make_rho_atomic, const int &i) c compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = static_cast(delvolinv_kk * q[i]); for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const FFT_SCALAR y0 = z0*d_rho1d(i,n+order/2,2); @@ -1291,9 +1339,9 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team if ( ((nz+nlower-nzlo_out)*ix*iy >= ito) || ((nz+nupper-nzlo_out+1)*ix*iy < ifrom) ) continue; - const FFT_SCALAR dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - const FFT_SCALAR dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - const FFT_SCALAR dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + const FFT_SCALAR dx = static_cast(static_cast(nx)+shiftone_kk - (x(i,0)-boxlo_kk[0])*delxinv_kk); + const FFT_SCALAR dy = static_cast(static_cast(ny)+shiftone_kk - (x(i,1)-boxlo_kk[1])*delyinv_kk); + const FFT_SCALAR dz = static_cast(static_cast(nz)+shiftone_kk - (x(i,2)-boxlo_kk[2])*delzinv_kk); nz -= nzlo_out; ny -= nylo_out; @@ -1301,7 +1349,7 @@ void PPPMKokkos::operator() (TagPPPM_make_rho, typename Kokkos::Team compute_rho1d(i,dx,dy,dz); - const FFT_SCALAR z0 = delvolinv * q[i]; + const FFT_SCALAR z0 = static_cast(delvolinv_kk * q[i]); for (int n = nlower; n <= nupper; n++) { const int mz = n+nz; const int in = mz*ix*iy; @@ -1379,8 +1427,10 @@ void PPPMKokkos::poisson_ik() // global energy and virial contribution bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm; - scaleinv = 1.0/ngridtotal; + scaleinv = 1.0/static_cast(ngridtotal); + scaleinv_kk = static_cast(scaleinv); s2 = scaleinv*scaleinv; + s2_kk = static_cast(s2); if (eflag_global || vflag_global) { EV_FLOAT ev; @@ -1388,13 +1438,13 @@ void PPPMKokkos::poisson_ik() copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nfft),*this,ev); copymode = 0; - for (int j = 0; j < 6; j++) virial[j] += ev.v[j]; - energy += ev.ecoul; + for (int j = 0; j < 6; j++) virial[j] += static_cast(ev.v[j]); + energy += static_cast(ev.ecoul); } else { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nfft),*this,ev); copymode = 0; - energy += ev.ecoul; + energy += static_cast(ev.ecoul); } } @@ -1477,16 +1527,16 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik1, const int &i) const { d_work1[2*i] = d_density_fft[i]; - d_work1[2*i+1] = ZEROF; + d_work1[2*i+1] = 0; } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik2, const int &i, EV_FLOAT& ev) const { - const KK_FLOAT eng = s2 * d_greensfn[i] * (d_work1[2*i]*d_work1[2*i] + d_work1[2*i+1]*d_work1[2*i+1]); - for (int j = 0; j < 6; j++) ev.v[j] += eng*d_vg(i,j); - if (eflag_global) ev.ecoul += eng; + const KK_FLOAT eng = s2_kk * d_greensfn[i] * static_cast(d_work1[2*i]*d_work1[2*i] + d_work1[2*i+1]*d_work1[2*i+1]); + for (int j = 0; j < 6; j++) ev.v[j] += static_cast(eng*d_vg(i,j)); + if (eflag_global) ev.ecoul += static_cast(eng); } template @@ -1494,15 +1544,15 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik3, const int &i, EV_FLOAT& ev) const { ev.ecoul += - s2 * d_greensfn[i] * (d_work1[2*i]*d_work1[2*i] + d_work1[2*i+1]*d_work1[2*i+1]); + static_cast(s2_kk * d_greensfn[i] * static_cast(d_work1[2*i]*d_work1[2*i] + d_work1[2*i+1]*d_work1[2*i+1])); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik4, const int &i) const { - d_work1[2*i] *= scaleinv * d_greensfn[i]; - d_work1[2*i+1] *= scaleinv * d_greensfn[i]; + d_work1[2*i] *= static_cast(scaleinv_kk * d_greensfn[i]); + d_work1[2*i+1] *= static_cast(scaleinv_kk * d_greensfn[i]); } template @@ -1513,8 +1563,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik5, const int &ii) cons const int k = ii/(numy_fft*numx_fft); const int j = (ii - k*numy_fft*numx_fft) / numx_fft; const int i = ii - k*numy_fft*numx_fft - j*numx_fft; - d_work2[n] = -d_fkx[i]*d_work1[n+1]; - d_work2[n+1] = d_fkx[i]*d_work1[n]; + d_work2[n] = -static_cast(d_fkx[i])*d_work1[n+1]; + d_work2[n+1] = static_cast(d_fkx[i])*d_work1[n]; } template @@ -1538,8 +1588,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik7, const int &ii) cons const int n = ii*2; const int k = ii/(numy_fft*numx_fft); const int j = (ii - k*numy_fft*numx_fft) / numx_fft; - d_work2[n] = -d_fky[j]*d_work1[n+1]; - d_work2[n+1] = d_fky[j]*d_work1[n]; + d_work2[n] = -static_cast(d_fky[j])*d_work1[n+1]; + d_work2[n+1] = static_cast(d_fky[j])*d_work1[n]; } template @@ -1562,8 +1612,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik9, const int &ii) cons { const int n = ii*2; const int k = ii/(numy_fft*numx_fft); - d_work2[n] = -d_fkz[k]*d_work1[n+1]; - d_work2[n+1] = d_fkz[k]*d_work1[n]; + d_work2[n] = -static_cast(d_fkz[k])*d_work1[n+1]; + d_work2[n+1] = static_cast(d_fkz[k])*d_work1[n]; } template @@ -1644,8 +1694,8 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic1, const int &ii) const { - d_work2[2*ii] = -d_fkx[ii]*d_work1[2*ii+1]; - d_work2[2*ii+1] = d_fkx[ii]*d_work1[2*ii]; + d_work2[2*ii] = -static_cast(d_fkx[ii])*d_work1[2*ii+1]; + d_work2[2*ii+1] = static_cast(d_fkx[ii])*d_work1[2*ii]; } template @@ -1667,8 +1717,8 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic3, const int &ii) const { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; - d_work2[2*ii] = -d_fky[ii]*d_work1[2*ii+1]; - d_work2[2*ii+1] = d_fky[ii]*d_work1[2*ii]; + d_work2[2*ii] = -static_cast(d_fky[ii])*d_work1[2*ii+1]; + d_work2[2*ii+1] = static_cast(d_fky[ii])*d_work1[2*ii]; } @@ -1695,8 +1745,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic5, const int { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; // - d_work2[2*ii] = -d_fkz[ii]*d_work1[2*ii+1]; - d_work2[2*ii+1] = d_fkz[ii]*d_work1[2*ii]; + d_work2[2*ii] = -static_cast(d_fkz[ii])*d_work1[2*ii+1]; + d_work2[2*ii+1] = static_cast(d_fkz[ii])*d_work1[2*ii]; } template @@ -1857,8 +1907,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom3, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,0); - d_work2[n+1] = d_work1[n+1]*d_vg(i,0); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,0)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,0)); n += 2; } @@ -1882,8 +1932,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom5, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,1); - d_work2[n+1] = d_work1[n+1]*d_vg(i,1); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,1)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,1)); n += 2; } @@ -1907,8 +1957,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom7, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,2); - d_work2[n+1] = d_work1[n+1]*d_vg(i,2); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,2)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,2)); n += 2; } @@ -1932,8 +1982,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom9, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,3); - d_work2[n+1] = d_work1[n+1]*d_vg(i,3); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,3)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,3)); n += 2; } @@ -1957,8 +2007,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom11, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,4); - d_work2[n+1] = d_work1[n+1]*d_vg(i,4); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,4)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,4)); n += 2; } @@ -1982,8 +2032,8 @@ void PPPMKokkos::operator()(TagPPPM_poisson_peratom13, const int &i) { int n = 2*i; - d_work2[n] = d_work1[n]*d_vg(i,5); - d_work2[n+1] = d_work1[n+1]*d_vg(i,5); + d_work2[n] = d_work1[n]*static_cast(d_vg(i,5)); + d_work2[n+1] = d_work1[n+1]*static_cast(d_vg(i,5)); n += 2; } @@ -2066,10 +2116,10 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_ik, const int &i) con // convert E-field to force - const KK_FLOAT qfactor = qqrd2e * scale * q[i]; - f(i,0) += qfactor*ekx; - f(i,1) += qfactor*eky; - if (slabflag != 2) f(i,2) += qfactor*ekz; + const KK_FLOAT qfactor = qscale_kk * q[i]; + f(i,0) += static_cast(qfactor*static_cast(ekx)); + f(i,1) += static_cast(qfactor*static_cast(eky)); + if (slabflag != 2) f(i,2) += static_cast(qfactor*static_cast(ekz)); } /* ---------------------------------------------------------------------- @@ -2102,9 +2152,9 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i nx = d_part2grid(i,0); ny = d_part2grid(i,1); nz = d_part2grid(i,2); - dx = nx+shiftone - (x(i,0)-boxlo[0])*delxinv; - dy = ny+shiftone - (x(i,1)-boxlo[1])*delyinv; - dz = nz+shiftone - (x(i,2)-boxlo[2])*delzinv; + dx = static_cast(static_cast(nx)+shiftone_kk - (x(i,0)-boxlo_kk[0])*delxinv_kk); + dy = static_cast(static_cast(ny)+shiftone_kk - (x(i,1)-boxlo_kk[1])*delyinv_kk); + dz = static_cast(static_cast(nz)+shiftone_kk - (x(i,2)-boxlo_kk[2])*delzinv_kk); nz -= nzlo_out; ny -= nylo_out; @@ -2112,7 +2162,7 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i compute_rho1d(i,dx,dy,dz); - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + u = v0 = v1 = v2 = v3 = v4 = v5 = 0; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = d_rho1d(i,n+order/2,2); @@ -2135,14 +2185,14 @@ void PPPMKokkos::operator()(TagPPPM_fieldforce_peratom, const int &i } } - if (eflag_atom) d_eatom[i] += q[i]*u; + if (eflag_atom) d_eatom[i] += static_cast(q[i])*static_cast(u); if (vflag_atom) { - d_vatom(i,0) += q[i]*v0; - d_vatom(i,1) += q[i]*v1; - d_vatom(i,2) += q[i]*v2; - d_vatom(i,3) += q[i]*v3; - d_vatom(i,4) += q[i]*v4; - d_vatom(i,5) += q[i]*v5; + d_vatom(i,0) += static_cast(q[i])*static_cast(v0); + d_vatom(i,1) += static_cast(q[i])*static_cast(v1); + d_vatom(i,2) += static_cast(q[i])*static_cast(v2); + d_vatom(i,3) += static_cast(q[i])*static_cast(v3); + d_vatom(i,4) += static_cast(q[i])*static_cast(v4); + d_vatom(i,5) += static_cast(q[i])*static_cast(v5); } } @@ -2175,9 +2225,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_pack_forward1, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; d_buf[3*i] = d_vdx_brick(iz,iy,ix); d_buf[3*i+1] = d_vdy_brick(iz,iy,ix); @@ -2188,9 +2238,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_pack_forward2, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; if (eflag_atom) d_buf[7*i] = d_u_brick(iz,iy,ix); if (vflag_atom) { @@ -2232,9 +2282,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_unpack_forward1, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; d_vdx_brick(iz,iy,ix) = d_buf[3*i + unpack_offset]; d_vdy_brick(iz,iy,ix) = d_buf[3*i+1 + unpack_offset]; @@ -2245,9 +2295,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_unpack_forward2, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; if (eflag_atom) d_u_brick(iz,iy,ix) = d_buf[7*i]; if (vflag_atom) { @@ -2283,9 +2333,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_pack_reverse, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; d_buf[i] = d_density_brick(iz,iy,ix); } @@ -2314,9 +2364,9 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_unpack_reverse, const int &i) const { - const KK_FLOAT dlist = (KK_FLOAT) d_list_index[i]; - const int iz = (int) (dlist/(nx*ny)); - const int iy = (int) ((dlist - iz*nx*ny)/nx); + const double dlist = static_cast(d_list_index[i]); + const int iz = static_cast(dlist/(nx*ny)); + const int iy = static_cast((dlist - iz*nx*ny)/nx); const int ix = d_list_index[i] - iz*nx*ny - iy*nx; d_density_brick(iz,iy,ix) += d_buf[i + unpack_offset]; } @@ -2335,7 +2385,7 @@ void PPPMKokkos::compute_rho1d(const int i, const FFT_SCALAR &dx, co FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; + r1 = r2 = r3 = 0; for (l = order-1; l >= 0; l--) { r1 = d_rho_coeff(l,k-(1-order)/2) + r1*dx; @@ -2378,20 +2428,20 @@ void PPPMKokkos::compute_rho_coeff() for (k = 0; k <= 2*order; k++) for (l = 0; l < order; l++) - a[l][k] = 0.0; + a[l][k] = 0; - a[0][order] = 1.0; + a[0][order] = static_cast(1.0); for (j = 1; j < order; j++) { for (k = -j; k <= j; k += 2) { - s = 0.0; + s = 0; for (l = 0; l < j; l++) { - a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / (l+1); + a[l+1][k+order] = (a[l][k+1+order]-a[l][k-1+order]) / static_cast(l+1); #ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1+order] + powf(-1.0,(float) l) * a[l][k+1+order]) / (l+1); + s += powf(0.5f,static_cast(l+1)) * + (a[l][k-1+order] + powf(-1.0,static_cast(l)) * a[l][k+1+order]) / static_cast(l+1); #else - s += pow(0.5,(double) l+1) * - (a[l][k-1+order] + pow(-1.0,(double) l) * a[l][k+1+order]) / (l+1); + s += pow(0.5,static_cast(l+1)) * + (a[l][k-1+order] + pow(-1.0,static_cast(l)) * a[l][k+1+order]) / static_cast(l+1); #endif } a[0][k+order] = s; @@ -2455,6 +2505,7 @@ void PPPMKokkos::slabcorr() const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - qsum*dipole_r2 - qsum*qsum*zprd_slab*zprd_slab/12.0)/volume; qscale = qqrd2e * scale; + qscale_kk = static_cast(qscale); if (eflag_global) energy += qscale * e_slabcorr; @@ -2480,29 +2531,33 @@ template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_slabcorr1, const int &i, double &dipole) const { - dipole += q[i]*x(i,2); + dipole += static_cast(q[i]*x(i,2)); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_slabcorr2, const int &i, double &dipole_r2) const { - dipole_r2 += q[i]*x(i,2)*x(i,2); + dipole_r2 += static_cast(q[i]*x(i,2)*x(i,2)); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_slabcorr3, const int &i) const { - d_eatom[i] += efact * q[i]*(x(i,2)*dipole_all - 0.5*(dipole_r2 + - qsum*x(i,2)*x(i,2)) - qsum*zprd_slab*zprd_slab/12.0); + double z_i = static_cast(x(i,2)); + double q_i = static_cast(q[i]); + d_eatom[i] += static_cast(efact * q_i*(z_i*dipole_all - 0.5*(dipole_r2 + + qsum*z_i*z_i) - qsum*zprd_slab*zprd_slab/12.0)); } template KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_slabcorr4, const int &i) const { - f(i,2) += ffact * q[i]*(dipole_all - qsum*x(i,2)); + double z_i = static_cast(x(i,2)); + double q_i = static_cast(q[i]); + f(i,2) += static_cast(ffact * q_i*(dipole_all - qsum*z_i)); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index e598ec890aa..27c78ac68bf 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -295,6 +295,12 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { double scaleinv,s2; double qscale,efact,ffact,dipole_all,dipole_r2,zprd; double xprd,yprd,zprd_slab; + KK_FLOAT qscale_kk; + KK_FLOAT g_ewald_kk, g_ewald_inv_kk; + KK_FLOAT unitkx_kk,unitky_kk,unitkz_kk; + KK_FLOAT scaleinv_kk, s2_kk; + KK_FLOAT shift_kk, shiftone_kk; + KK_FLOAT delxinv_kk, delyinv_kk, delzinv_kk, delvolinv_kk; int nbx,nby,nbz,twoorder; int numx_fft,numy_fft,numz_fft; int numx_inout,numy_inout,numz_inout; @@ -304,10 +310,11 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { // Local copies of the domain box tilt etc. Few h, h_inv; + template KOKKOS_INLINE_FUNCTION - void x2lamdaT_kokkos(KK_FLOAT* v, KK_FLOAT* lamda) const + void x2lamdaT_kokkos(T* v, T* lamda) const { - KK_FLOAT lamda_tmp[3]; + T lamda_tmp[3]; lamda_tmp[0] = h_inv[0]*v[0]; lamda_tmp[1] = h_inv[5]*v[0] + h_inv[1]*v[1]; @@ -351,8 +358,8 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename FFT_AT::t_FFT_SCALAR_1d d_work1; typename FFT_AT::t_FFT_SCALAR_1d d_work2; - DAT::tdual_kkfloat_1d k_gf_b; - typename AT::t_kkfloat_1d d_gf_b; + DAT::tdual_double_1d k_gf_b; + typename AT::t_double_1d d_gf_b; //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; @@ -360,7 +367,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; //double **acons; - typename Kokkos::DualView::t_host acons; + typename Kokkos::DualView::t_host acons; // FFTs and grid communication @@ -375,6 +382,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { typename AT::t_int_1d_3 d_part2grid; double boxlo[3]; + KK_FLOAT boxlo_kk[3]; void set_grid_local() override; @@ -430,16 +438,16 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { ------------------------------------------------------------------------- */ KOKKOS_INLINE_FUNCTION - KK_FLOAT gf_denom(const KK_FLOAT &x, const KK_FLOAT &y, - const KK_FLOAT &z) const { - KK_FLOAT sx,sy,sz; - sz = sy = sx = 0.0; + double gf_denom(const double &x, const double &y, + const double &z) const { + double sx,sy,sz; + sz = sy = sx = 0; for (int l = order-1; l >= 0; l--) { sx = d_gf_b[l] + sx*x; sy = d_gf_b[l] + sy*y; sz = d_gf_b[l] + sz*z; } - KK_FLOAT s = sx*sy*sz; + double s = sx*sy*sz; return s*s; }; }; From bed9a8737d7ee37e2fee56166f1ad9c9b73e66e7 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 27 Oct 2025 15:32:50 -0600 Subject: [PATCH 100/604] More accurate contribution note --- src/neighbor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 3f3c8b7d6a5..780f7b4657a 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -13,8 +13,9 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author (triclinic and multi-neigh) : Pieter in 't Veld (SNL) - Contributing author (improved multi-neigh) : Joel Clemmer (SNL) + Contributing author (triclinic and original multi-neigh) : Pieter in 't Veld (SNL) + Contributing author (improved multi-neigh) : Joel Clemmer (SNL), Kevin Hanley (Edinburgh), + Kevin Stratford (Edinburgh), Tom Shire (Edinburgh) ------------------------------------------------------------------------- */ #include "neighbor.h" From 69b4acb58f1365715c7400ab748b4f25523d3a8c Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 14:58:38 -0700 Subject: [PATCH 101/604] Removed silent conversions from angle, bond, dihedral, and improper interactions as relevant for Rhodopsin and SPC/E. --- src/KOKKOS/angle_charmm_kokkos.cpp | 146 +++++------ src/KOKKOS/angle_harmonic_kokkos.cpp | 140 ++++------- src/KOKKOS/bond_harmonic_kokkos.cpp | 112 ++++----- src/KOKKOS/dihedral_charmm_kokkos.cpp | 311 ++++++++++-------------- src/KOKKOS/dihedral_charmm_kokkos.h | 14 +- src/KOKKOS/improper_harmonic_kokkos.cpp | 175 ++++++------- 6 files changed, 355 insertions(+), 543 deletions(-) diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp index 51ba1b28d53..11e39d5043d 100644 --- a/src/KOKKOS/angle_charmm_kokkos.cpp +++ b/src/KOKKOS/angle_charmm_kokkos.cpp @@ -114,14 +114,14 @@ void AngleCharmmKokkos::compute(int eflag_in, int vflag_in) } } - if (eflag_global) energy += ev.evdwl; + if (eflag_global) energy += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -179,10 +179,10 @@ void AngleCharmmKokkos::operator()(TagAngleCharmmCompute 0.0) forceUB = -2.0*rk/rUB; + KK_FLOAT forceUB = 0; + if (rUB > 0) forceUB = -static_cast(2.0) * rk / rUB; - KK_FLOAT eangle = 0.0; + KK_FLOAT eangle = 0; if (eflag) eangle = rk*dr; // angle (cos and sin) @@ -190,12 +190,12 @@ void AngleCharmmKokkos::operator()(TagAngleCharmmCompute 1.0) c = 1.0; - if (c < -1.0) c = -1.0; + if (c > static_cast(1.0)) c = static_cast(1.0); + if (c < static_cast(-1.0)) c = static_cast(-1.0); - KK_FLOAT s = sqrt(1.0 - c*c); - if (s < SMALL) s = SMALL; - s = 1.0/s; + KK_FLOAT s = sqrt(static_cast(1.0) - c*c); + if (s < static_cast(SMALL)) s = static_cast(SMALL); + s = static_cast(1.0) / s; // harmonic force & energy @@ -204,7 +204,7 @@ void AngleCharmmKokkos::operator()(TagAngleCharmmCompute(2.0) * tk * s; const KK_FLOAT a11 = a*c / rsq1; const KK_FLOAT a12 = -a / (r1*r2); const KK_FLOAT a22 = a*c / rsq2; @@ -221,21 +221,21 @@ void AngleCharmmKokkos::operator()(TagAngleCharmmCompute(f1[0]); + f(i1,1) += static_cast(f1[1]); + f(i1,2) += static_cast(f1[2]); } if (NEWTON_BOND || i2 < nlocal) { - f(i2,0) -= f1[0] + f3[0]; - f(i2,1) -= f1[1] + f3[1]; - f(i2,2) -= f1[2] + f3[2]; + f(i2,0) -= static_cast(f1[0] + f3[0]); + f(i2,1) -= static_cast(f1[1] + f3[1]); + f(i2,2) -= static_cast(f1[2] + f3[2]); } if (NEWTON_BOND || i3 < nlocal) { - f(i3,0) += f3[0]; - f(i3,1) += f3[1]; - f(i3,2) += f3[2]; + f(i3,0) += static_cast(f3[0]); + f(i3,1) += static_cast(f3[1]); + f(i3,2) += static_cast(f3[2]); } if (EVFLAG) ev_tally(ev,i1,i2,i3,eangle,f1,f3, @@ -279,10 +279,10 @@ void AngleCharmmKokkos::coeff(int narg, char **arg) d_r_ub = k_r_ub.template view(); for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_theta0.view_host()[i] = theta0[i]; - k_k_ub.view_host()[i] = k_ub[i]; - k_r_ub.view_host()[i] = r_ub[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_theta0.view_host()[i] = static_cast(theta0[i]); + k_k_ub.view_host()[i] = static_cast(k_ub[i]); + k_r_ub.view_host()[i] = static_cast(r_ub[i]); } k_k.modify_host(); @@ -317,10 +317,10 @@ void AngleCharmmKokkos::read_restart(FILE *fp) d_r_ub = k_r_ub.template view(); for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_theta0.view_host()[i] = theta0[i]; - k_k_ub.view_host()[i] = k_ub[i]; - k_r_ub.view_host()[i] = r_ub[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_theta0.view_host()[i] = static_cast(theta0[i]); + k_k_ub.view_host()[i] = static_cast(k_ub[i]); + k_r_ub.view_host()[i] = static_cast(r_ub[i]); } k_k.modify_host(); @@ -347,23 +347,18 @@ void AngleCharmmKokkos::ev_tally(EV_FLOAT &ev, const int i, const in const KK_FLOAT &delx1, const KK_FLOAT &dely1, const KK_FLOAT &delz1, const KK_FLOAT &delx2, const KK_FLOAT &dely2, const KK_FLOAT &delz2) const { - KK_FLOAT eanglethird; - KK_FLOAT v[6]; if (eflag_either) { + KK_ACC_FLOAT eanglethird = static_cast(static_cast(THIRD)*eangle); if (eflag_global) { - if (newton_bond) ev.evdwl += eangle; + if (newton_bond) ev.evdwl += static_cast(3.0)*eanglethird; else { - eanglethird = THIRD*eangle; - if (i < nlocal) ev.evdwl += eanglethird; if (j < nlocal) ev.evdwl += eanglethird; if (k < nlocal) ev.evdwl += eanglethird; } } if (eflag_atom) { - eanglethird = THIRD*eangle; - if (newton_bond || i < nlocal) d_eatom[i] += eanglethird; if (newton_bond || j < nlocal) d_eatom[j] += eanglethird; if (newton_bond || k < nlocal) d_eatom[k] += eanglethird; @@ -371,75 +366,46 @@ void AngleCharmmKokkos::ev_tally(EV_FLOAT &ev, const int i, const in } if (vflag_either) { - v[0] = delx1*f1[0] + delx2*f3[0]; - v[1] = dely1*f1[1] + dely2*f3[1]; - v[2] = delz1*f1[2] + delz2*f3[2]; - v[3] = delx1*f1[1] + delx2*f3[1]; - v[4] = delx1*f1[2] + delx2*f3[2]; - v[5] = dely1*f1[2] + dely2*f3[2]; + KK_ACC_FLOAT v_third_acc[6]; + v_third_acc[0] = static_cast(static_cast(THIRD)*(delx1*f1[0] + delx2*f3[0])); + v_third_acc[1] = static_cast(static_cast(THIRD)*(dely1*f1[1] + dely2*f3[1])); + v_third_acc[2] = static_cast(static_cast(THIRD)*(delz1*f1[2] + delz2*f3[2])); + v_third_acc[3] = static_cast(static_cast(THIRD)*(delx1*f1[1] + delx2*f3[1])); + v_third_acc[4] = static_cast(static_cast(THIRD)*(delx1*f1[2] + delx2*f3[2])); + v_third_acc[5] = static_cast(static_cast(THIRD)*(dely1*f1[2] + dely2*f3[2])); if (vflag_global) { if (newton_bond) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(3.0)*v_third_acc[n]; } else { if (i < nlocal) { - ev.v[0] += THIRD*v[0]; - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } if (j < nlocal) { - ev.v[0] += THIRD*v[0]; - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } if (k < nlocal) { - ev.v[0] += THIRD*v[0]; - - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { - d_vatom(i,0) += THIRD*v[0]; - d_vatom(i,1) += THIRD*v[1]; - d_vatom(i,2) += THIRD*v[2]; - d_vatom(i,3) += THIRD*v[3]; - d_vatom(i,4) += THIRD*v[4]; - d_vatom(i,5) += THIRD*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i,n) += v_third_acc[n]; } if (newton_bond || j < nlocal) { - d_vatom(j,0) += THIRD*v[0]; - d_vatom(j,1) += THIRD*v[1]; - d_vatom(j,2) += THIRD*v[2]; - d_vatom(j,3) += THIRD*v[3]; - d_vatom(j,4) += THIRD*v[4]; - d_vatom(j,5) += THIRD*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(j,n) += v_third_acc[n]; } if (newton_bond || k < nlocal) { - d_vatom(k,0) += THIRD*v[0]; - d_vatom(k,1) += THIRD*v[1]; - d_vatom(k,2) += THIRD*v[2]; - d_vatom(k,3) += THIRD*v[3]; - d_vatom(k,4) += THIRD*v[4]; - d_vatom(k,5) += THIRD*v[5]; - + for (int n = 0; n < 6; n++) + d_vatom(k,n) += v_third_acc[n]; } } } diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp index e9ef7c9371b..1176ae2290f 100644 --- a/src/KOKKOS/angle_harmonic_kokkos.cpp +++ b/src/KOKKOS/angle_harmonic_kokkos.cpp @@ -120,14 +120,14 @@ void AngleHarmonicKokkos::compute(int eflag_in, int vflag_in) } } - if (eflag_global) energy += ev.evdwl; + if (eflag_global) energy += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -179,22 +179,22 @@ void AngleHarmonicKokkos::operator()(TagAngleHarmonicCompute 1.0) c = 1.0; - if (c < -1.0) c = -1.0; + if (c > static_cast(1.0)) c = static_cast(1.0); + if (c < static_cast(-1.0)) c = static_cast(-1.0); - KK_FLOAT s = sqrt(1.0 - c*c); - if (s < SMALL) s = SMALL; - s = 1.0/s; + KK_FLOAT s = sqrt(static_cast(1.0) - c*c); + if (s < static_cast(SMALL)) s = static_cast(SMALL); + s = static_cast(1.0)/s; // force & energy const KK_FLOAT dtheta = acos(c) - d_theta0[type]; const KK_FLOAT tk = d_k[type] * dtheta; - KK_FLOAT eangle = 0.0; + KK_FLOAT eangle = 0; if (eflag) eangle = tk*dtheta; - const KK_FLOAT a = -2.0 * tk * s; + const KK_FLOAT a = static_cast(-2.0) * tk * s; const KK_FLOAT a11 = a*c / rsq1; const KK_FLOAT a12 = -a / (r1*r2); const KK_FLOAT a22 = a*c / rsq2; @@ -210,21 +210,21 @@ void AngleHarmonicKokkos::operator()(TagAngleHarmonicCompute(f1[0]); + a_f(i1,1) += static_cast(f1[1]); + a_f(i1,2) += static_cast(f1[2]); } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) -= f1[0] + f3[0]; - a_f(i2,1) -= f1[1] + f3[1]; - a_f(i2,2) -= f1[2] + f3[2]; + a_f(i2,0) -= static_cast(f1[0] + f3[0]); + a_f(i2,1) -= static_cast(f1[1] + f3[1]); + a_f(i2,2) -= static_cast(f1[2] + f3[2]); } if (NEWTON_BOND || i3 < nlocal) { - a_f(i3,0) += f3[0]; - a_f(i3,1) += f3[1]; - a_f(i3,2) += f3[2]; + a_f(i3,0) += static_cast(f3[0]); + a_f(i3,1) += static_cast(f3[1]); + a_f(i3,2) += static_cast(f3[2]); } if (EVFLAG) ev_tally(ev,i1,i2,i3,eangle,f1,f3, @@ -265,8 +265,8 @@ void AngleHarmonicKokkos::coeff(int narg, char **arg) int n = atom->nangletypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_theta0.view_host()[i] = theta0[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_theta0.view_host()[i] = static_cast(theta0[i]); } k_k.modify_host(); @@ -284,8 +284,8 @@ void AngleHarmonicKokkos::read_restart(FILE *fp) int n = atom->nangletypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_theta0.view_host()[i] = theta0[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_theta0.view_host()[i] = static_cast(theta0[i]); } k_k.modify_host(); @@ -305,18 +305,15 @@ void AngleHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i, const const KK_FLOAT &delx1, const KK_FLOAT &dely1, const KK_FLOAT &delz1, const KK_FLOAT &delx2, const KK_FLOAT &dely2, const KK_FLOAT &delz2) const { - KK_FLOAT eanglethird; - KK_FLOAT v[6]; - // The eatom and vatom arrays are atomic Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = d_eatom; Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = d_vatom; if (eflag_either) { if (eflag_global) { - if (newton_bond) ev.evdwl += eangle; + if (newton_bond) ev.evdwl += static_cast(eangle); else { - eanglethird = THIRD*eangle; + KK_ACC_FLOAT eanglethird = static_cast(static_cast(THIRD)*eangle); if (i < nlocal) ev.evdwl += eanglethird; if (j < nlocal) ev.evdwl += eanglethird; @@ -324,84 +321,55 @@ void AngleHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i, const } } if (eflag_atom) { - eanglethird = THIRD*eangle; + KK_ACC_FLOAT eanglethird = static_cast(static_cast(THIRD)*eangle); - if (newton_bond || i < nlocal) v_eatom[i] += eanglethird; - if (newton_bond || j < nlocal) v_eatom[j] += eanglethird; - if (newton_bond || k < nlocal) v_eatom[k] += eanglethird; + if (newton_bond || i < nlocal) v_eatom[i] += static_cast(eanglethird); + if (newton_bond || j < nlocal) v_eatom[j] += static_cast(eanglethird); + if (newton_bond || k < nlocal) v_eatom[k] += static_cast(eanglethird); } } if (vflag_either) { - v[0] = delx1*f1[0] + delx2*f3[0]; - v[1] = dely1*f1[1] + dely2*f3[1]; - v[2] = delz1*f1[2] + delz2*f3[2]; - v[3] = delx1*f1[1] + delx2*f3[1]; - v[4] = delx1*f1[2] + delx2*f3[2]; - v[5] = dely1*f1[2] + dely2*f3[2]; + KK_ACC_FLOAT v_third_acc[6]; + v_third_acc[0] = static_cast(static_cast(THIRD)*(delx1*f1[0] + delx2*f3[0])); + v_third_acc[1] = static_cast(static_cast(THIRD)*(dely1*f1[1] + dely2*f3[1])); + v_third_acc[2] = static_cast(static_cast(THIRD)*(delz1*f1[2] + delz2*f3[2])); + v_third_acc[3] = static_cast(static_cast(THIRD)*(delx1*f1[1] + delx2*f3[1])); + v_third_acc[4] = static_cast(static_cast(THIRD)*(delx1*f1[2] + delx2*f3[2])); + v_third_acc[5] = static_cast(static_cast(THIRD)*(dely1*f1[2] + dely2*f3[2])); if (vflag_global) { if (newton_bond) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(static_cast(3.0)*v_third_acc[n]); } else { if (i < nlocal) { - ev.v[0] += THIRD*v[0]; - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } if (j < nlocal) { - ev.v[0] += THIRD*v[0]; - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } if (k < nlocal) { - ev.v[0] += THIRD*v[0]; - - ev.v[1] += THIRD*v[1]; - ev.v[2] += THIRD*v[2]; - ev.v[3] += THIRD*v[3]; - ev.v[4] += THIRD*v[4]; - ev.v[5] += THIRD*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_third_acc[n]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { - v_vatom(i,0) += THIRD*v[0]; - v_vatom(i,1) += THIRD*v[1]; - v_vatom(i,2) += THIRD*v[2]; - v_vatom(i,3) += THIRD*v[3]; - v_vatom(i,4) += THIRD*v[4]; - v_vatom(i,5) += THIRD*v[5]; + for (int n = 0; n < 6; n++) + v_vatom(i,n) += v_third_acc[n]; } if (newton_bond || j < nlocal) { - v_vatom(j,0) += THIRD*v[0]; - v_vatom(j,1) += THIRD*v[1]; - v_vatom(j,2) += THIRD*v[2]; - v_vatom(j,3) += THIRD*v[3]; - v_vatom(j,4) += THIRD*v[4]; - v_vatom(j,5) += THIRD*v[5]; + for (int n = 0; n < 6; n++) + v_vatom(j,n) += v_third_acc[n]; } if (newton_bond || k < nlocal) { - v_vatom(k,0) += THIRD*v[0]; - v_vatom(k,1) += THIRD*v[1]; - v_vatom(k,2) += THIRD*v[2]; - v_vatom(k,3) += THIRD*v[3]; - v_vatom(k,4) += THIRD*v[4]; - v_vatom(k,5) += THIRD*v[5]; - + for (int n = 0; n < 6; n++) + v_vatom(k,n) += v_third_acc[n]; } } } diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index 4d605753ee5..85b34d3ff97 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -109,14 +109,14 @@ void BondHarmonicKokkos::compute(int eflag_in, int vflag_in) } } - if (eflag_global) energy += ev.evdwl; + if (eflag_global) energy += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -152,25 +152,25 @@ void BondHarmonicKokkos::operator()(TagBondHarmonicCompute 0.0) fbond = -2.0*rk/r; + KK_FLOAT fbond = 0; + if (r > 0) fbond = -static_cast(2.0) * rk / r; - KK_FLOAT ebond = 0.0; + KK_FLOAT ebond = 0; if (eflag) - ebond = rk*dr; + ebond = static_cast(rk*dr); // apply force to each of 2 atoms if (NEWTON_BOND || i1 < nlocal) { - f(i1,0) += delx*fbond; - f(i1,1) += dely*fbond; - f(i1,2) += delz*fbond; + f(i1,0) += static_cast(delx*fbond); + f(i1,1) += static_cast(dely*fbond); + f(i1,2) += static_cast(delz*fbond); } if (NEWTON_BOND || i2 < nlocal) { - f(i2,0) -= delx*fbond; - f(i2,1) -= dely*fbond; - f(i2,2) -= delz*fbond; + f(i2,0) -= static_cast(delx*fbond); + f(i2,1) -= static_cast(dely*fbond); + f(i2,2) -= static_cast(delz*fbond); } if (EVFLAG) ev_tally(ev,i1,i2,ebond,fbond,delx,dely,delz); @@ -209,8 +209,8 @@ void BondHarmonicKokkos::coeff(int narg, char **arg) d_r0 = k_r0.template view(); for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_r0.view_host()[i] = r0[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_r0.view_host()[i] = static_cast(r0[i]); } k_k.modify_host(); @@ -236,8 +236,8 @@ void BondHarmonicKokkos::read_restart(FILE *fp) d_r0 = k_r0.template view(); for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_r0.view_host()[i] = r0[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_r0.view_host()[i] = static_cast(r0[i]); } k_k.modify_host(); @@ -257,77 +257,55 @@ void BondHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int &i, const const KK_FLOAT &ebond, const KK_FLOAT &fbond, const KK_FLOAT &delx, const KK_FLOAT &dely, const KK_FLOAT &delz) const { - KK_FLOAT ebondhalf; - KK_FLOAT v[6]; - if (eflag_either) { if (eflag_global) { - if (newton_bond) ev.evdwl += ebond; + if (newton_bond) ev.evdwl += static_cast(ebond); else { - ebondhalf = 0.5*ebond; - if (i < nlocal) ev.evdwl += ebondhalf; - if (j < nlocal) ev.evdwl += ebondhalf; + KK_ACC_FLOAT ebondhalf = static_cast(static_cast(0.5)*ebond); + if (i < nlocal) ev.evdwl += static_cast(ebondhalf); + if (j < nlocal) ev.evdwl += static_cast(ebondhalf); } } if (eflag_atom) { - ebondhalf = 0.5*ebond; - if (newton_bond || i < nlocal) d_eatom[i] += ebondhalf; - if (newton_bond || j < nlocal) d_eatom[j] += ebondhalf; + KK_ACC_FLOAT ebondhalf = static_cast(static_cast(0.5)*ebond); + if (newton_bond || i < nlocal) d_eatom[i] += static_cast(ebondhalf); + if (newton_bond || j < nlocal) d_eatom[j] += static_cast(ebondhalf); } } if (vflag_either) { - v[0] = delx*delx*fbond; - v[1] = dely*dely*fbond; - v[2] = delz*delz*fbond; - v[3] = delx*dely*fbond; - v[4] = delx*delz*fbond; - v[5] = dely*delz*fbond; + KK_ACC_FLOAT v_half_acc[6]; + v_half_acc[0] = static_cast(static_cast(0.5)*delx*delx*fbond); + v_half_acc[1] = static_cast(static_cast(0.5)*dely*dely*fbond); + v_half_acc[2] = static_cast(static_cast(0.5)*delz*delz*fbond); + v_half_acc[3] = static_cast(static_cast(0.5)*delx*dely*fbond); + v_half_acc[4] = static_cast(static_cast(0.5)*delx*delz*fbond); + v_half_acc[5] = static_cast(static_cast(0.5)*dely*delz*fbond); if (vflag_global) { if (newton_bond) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(2.0)*v_half_acc[n]; } else { if (i < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_half_acc[n]; } if (j < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_half_acc[n]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { - d_vatom(i,0) += 0.5*v[0]; - d_vatom(i,1) += 0.5*v[1]; - d_vatom(i,2) += 0.5*v[2]; - d_vatom(i,3) += 0.5*v[3]; - d_vatom(i,4) += 0.5*v[4]; - d_vatom(i,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i,n) += v_half_acc[n]; } if (newton_bond || j < nlocal) { - d_vatom(j,0) += 0.5*v[0]; - d_vatom(j,1) += 0.5*v[1]; - d_vatom(j,2) += 0.5*v[2]; - d_vatom(j,3) += 0.5*v[3]; - d_vatom(j,4) += 0.5*v[4]; - d_vatom(j,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(j,n) += v_half_acc[n]; } } } diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index efab0417a75..984d8af649f 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -113,7 +113,7 @@ void DihedralCharmmKokkos::compute(int eflag_in, int vflag_in) int ndihedrallist = neighborKK->ndihedrallist; nlocal = atom->nlocal; newton_bond = force->newton_bond; - qqrd2e = force->qqrd2e; + qqrd2e = static_cast(force->qqrd2e); h_warning_flag() = 0; k_warning_flag.modify_host(); @@ -147,24 +147,24 @@ void DihedralCharmmKokkos::compute(int eflag_in, int vflag_in) error->warning(FLERR,"Dihedral problem"); if (eflag_global) { - energy += evm.emol; - force->pair->eng_vdwl += evm.evdwl; - force->pair->eng_coul += evm.ecoul; + energy += static_cast(evm.emol); + force->pair->eng_vdwl += static_cast(evm.evdwl); + force->pair->eng_coul += static_cast(evm.ecoul); } if (vflag_global) { - virial[0] += evm.v[0]; - virial[1] += evm.v[1]; - virial[2] += evm.v[2]; - virial[3] += evm.v[3]; - virial[4] += evm.v[4]; - virial[5] += evm.v[5]; - - force->pair->virial[0] += evm.vp[0]; - force->pair->virial[1] += evm.vp[1]; - force->pair->virial[2] += evm.vp[2]; - force->pair->virial[3] += evm.vp[3]; - force->pair->virial[4] += evm.vp[4]; - force->pair->virial[5] += evm.vp[5]; + virial[0] += static_cast(evm.v[0]); + virial[1] += static_cast(evm.v[1]); + virial[2] += static_cast(evm.v[2]); + virial[3] += static_cast(evm.v[3]); + virial[4] += static_cast(evm.v[4]); + virial[5] += static_cast(evm.v[5]); + + force->pair->virial[0] += static_cast(evm.vp[0]); + force->pair->virial[1] += static_cast(evm.vp[1]); + force->pair->virial[2] += static_cast(evm.vp[2]); + force->pair->virial[3] += static_cast(evm.vp[3]); + force->pair->virial[4] += static_cast(evm.vp[4]); + force->pair->virial[5] += static_cast(evm.vp[5]); } // don't yet have dualviews for eatom and vatom in pair_kokkos, @@ -190,12 +190,12 @@ void DihedralCharmmKokkos::compute(int eflag_in, int vflag_in) k_vatom_pair.template modify(); k_vatom_pair.sync_host(); for (int i = 0; i < n; i++) { - force->pair->vatom[i][0] += k_vatom_pair.view_host()(i,0); - force->pair->vatom[i][1] += k_vatom_pair.view_host()(i,1); - force->pair->vatom[i][2] += k_vatom_pair.view_host()(i,2); - force->pair->vatom[i][3] += k_vatom_pair.view_host()(i,3); - force->pair->vatom[i][4] += k_vatom_pair.view_host()(i,4); - force->pair->vatom[i][5] += k_vatom_pair.view_host()(i,5); + force->pair->vatom[i][0] += static_cast(k_vatom_pair.view_host()(i,0)); + force->pair->vatom[i][1] += static_cast(k_vatom_pair.view_host()(i,1)); + force->pair->vatom[i][2] += static_cast(k_vatom_pair.view_host()(i,2)); + force->pair->vatom[i][3] += static_cast(k_vatom_pair.view_host()(i,3)); + force->pair->vatom[i][4] += static_cast(k_vatom_pair.view_host()(i,4)); + force->pair->vatom[i][5] += static_cast(k_vatom_pair.view_host()(i,5)); } } @@ -251,10 +251,10 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute 0) rginv = 1.0/rg; - if (rasq > 0) ra2inv = 1.0/rasq; - if (rbsq > 0) rb2inv = 1.0/rbsq; + rginv = ra2inv = rb2inv = 0; + if (rg > 0) rginv = static_cast(1.0)/rg; + if (rasq > 0) ra2inv = static_cast(1.0)/rasq; + if (rbsq > 0) rb2inv = static_cast(1.0)/rbsq; const KK_FLOAT rabinv = sqrt(ra2inv*rb2inv); KK_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv; @@ -262,16 +262,16 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + if ((c > static_cast(1.0) + static_cast(TOLERANCE) || c < static_cast(-1.0) - static_cast(TOLERANCE)) && !d_warning_flag()) d_warning_flag() = 1; - if (c > 1.0) c = 1.0; - if (c < -1.0) c = -1.0; + if (c > static_cast(1.0)) c = static_cast(1.0); + if (c < static_cast(-1.0)) c = static_cast(-1.0); const int m = d_multiplicity[type]; - KK_FLOAT p = 1.0; + KK_FLOAT p = static_cast(1.0); KK_FLOAT ddf1,df1; - ddf1 = df1 = 0.0; + ddf1 = df1 = 0; for (int i = 0; i < m; i++) { ddf1 = p*c - df1*s; @@ -281,15 +281,15 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute(-m); + p += static_cast(1.0); if (m == 0) { - p = 1.0 + d_cos_shift[type]; - df1 = 0.0; + p = static_cast(1.0) + d_cos_shift[type]; + df1 = 0; } - KK_FLOAT edihedral = 0.0; + KK_FLOAT edihedral = 0; if (eflag) edihedral = d_k[type] * p; const KK_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; @@ -335,27 +335,27 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute(f1[0]); + a_f(i1,1) += static_cast(f1[1]); + a_f(i1,2) += static_cast(f1[2]); } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) += f2[0]; - a_f(i2,1) += f2[1]; - a_f(i2,2) += f2[2]; + a_f(i2,0) += static_cast(f2[0]); + a_f(i2,1) += static_cast(f2[1]); + a_f(i2,2) += static_cast(f2[2]); } if (NEWTON_BOND || i3 < nlocal) { - a_f(i3,0) += f3[0]; - a_f(i3,1) += f3[1]; - a_f(i3,2) += f3[2]; + a_f(i3,0) += static_cast(f3[0]); + a_f(i3,1) += static_cast(f3[1]); + a_f(i3,2) += static_cast(f3[2]); } if (NEWTON_BOND || i4 < nlocal) { - a_f(i4,0) += f4[0]; - a_f(i4,1) += f4[1]; - a_f(i4,2) += f4[2]; + a_f(i4,0) += static_cast(f4[0]); + a_f(i4,1) += static_cast(f4[1]); + a_f(i4,2) += static_cast(f4[2]); } if (EVFLAG) @@ -365,7 +365,7 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute 0.0) { + if (d_weight[type] > 0) { const int itype = atomtype[i1]; const int jtype = atomtype[i4]; @@ -373,7 +373,7 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute(1.0)/rsq; const KK_FLOAT r6inv = r2inv*r2inv*r2inv; KK_FLOAT forcecoul; @@ -382,8 +382,8 @@ void DihedralCharmmKokkos::operator()(TagDihedralCharmmCompute::operator()(TagDihedralCharmmCompute(delx*fpair); + a_f(i1,1) += static_cast(dely*fpair); + a_f(i1,2) += static_cast(delz*fpair); } if (newton_bond || i4 < nlocal) { - a_f(i4,0) -= delx*fpair; - a_f(i4,1) -= dely*fpair; - a_f(i4,2) -= delz*fpair; + a_f(i4,0) -= static_cast(delx*fpair); + a_f(i4,1) -= static_cast(dely*fpair); + a_f(i4,2) -= static_cast(delz*fpair); } if (EVFLAG) ev_tally(evm,i1,i4,evdwl,ecoul,fpair,delx,dely,delz); @@ -432,8 +432,8 @@ void DihedralCharmmKokkos::coeff(int narg, char **arg) int nd = atom->ndihedraltypes; DAT::tdual_kkfloat_1d k_k("DihedralCharmm::k",nd+1); - DAT::tdual_kkfloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - DAT::tdual_kkfloat_1d k_shift("DihedralCharmm::shift",nd+1); + DAT::tdual_int_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + DAT::tdual_int_1d k_shift("DihedralCharmm::shift",nd+1); DAT::tdual_kkfloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); DAT::tdual_kkfloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); DAT::tdual_kkfloat_1d k_weight("DihedralCharmm::weight",nd+1); @@ -447,12 +447,12 @@ void DihedralCharmmKokkos::coeff(int narg, char **arg) int n = atom->ndihedraltypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; + k_k.view_host()[i] = static_cast(k[i]); k_multiplicity.view_host()[i] = multiplicity[i]; k_shift.view_host()[i] = shift[i]; - k_cos_shift.view_host()[i] = cos_shift[i]; - k_sin_shift.view_host()[i] = sin_shift[i]; - k_weight.view_host()[i] = weight[i]; + k_cos_shift.view_host()[i] = static_cast(cos_shift[i]); + k_sin_shift.view_host()[i] = static_cast(sin_shift[i]); + k_weight.view_host()[i] = static_cast(weight[i]); } k_k.modify_host(); @@ -495,10 +495,10 @@ void DihedralCharmmKokkos::init_style() int n = atom->ntypes; for (int i = 1; i <= n; i++) { for (int j = 1; j <= n; j++) { - k_lj14_1.view_host()(i,j) = lj14_1[i][j]; - k_lj14_2.view_host()(i,j) = lj14_2[i][j]; - k_lj14_3.view_host()(i,j) = lj14_3[i][j]; - k_lj14_4.view_host()(i,j) = lj14_4[i][j]; + k_lj14_1.view_host()(i,j) = static_cast(lj14_1[i][j]); + k_lj14_2.view_host()(i,j) = static_cast(lj14_2[i][j]); + k_lj14_3.view_host()(i,j) = static_cast(lj14_3[i][j]); + k_lj14_4.view_host()(i,j) = static_cast(lj14_4[i][j]); } } } @@ -525,8 +525,8 @@ void DihedralCharmmKokkos::read_restart(FILE *fp) int nd = atom->ndihedraltypes; DAT::tdual_kkfloat_1d k_k("DihedralCharmm::k",nd+1); - DAT::tdual_kkfloat_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); - DAT::tdual_kkfloat_1d k_shift("DihedralCharmm::shift",nd+1); + DAT::tdual_int_1d k_multiplicity("DihedralCharmm::multiplicity",nd+1); + DAT::tdual_int_1d k_shift("DihedralCharmm::shift",nd+1); DAT::tdual_kkfloat_1d k_cos_shift("DihedralCharmm::cos_shift",nd+1); DAT::tdual_kkfloat_1d k_sin_shift("DihedralCharmm::sin_shift",nd+1); DAT::tdual_kkfloat_1d k_weight("DihedralCharmm::weight",nd+1); @@ -540,12 +540,12 @@ void DihedralCharmmKokkos::read_restart(FILE *fp) int n = atom->ndihedraltypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; + k_k.view_host()[i] = static_cast(k[i]); k_multiplicity.view_host()[i] = multiplicity[i]; k_shift.view_host()[i] = shift[i]; - k_cos_shift.view_host()[i] = cos_shift[i]; - k_sin_shift.view_host()[i] = sin_shift[i]; - k_weight.view_host()[i] = weight[i]; + k_cos_shift.view_host()[i] = static_cast(cos_shift[i]); + k_sin_shift.view_host()[i] = static_cast(sin_shift[i]); + k_weight.view_host()[i] = static_cast(weight[i]); } k_k.modify_host(); @@ -579,14 +579,11 @@ void DihedralCharmmKokkos::ev_tally(EVM_FLOAT &evm, const int i1, co const KK_FLOAT &vb2x, const KK_FLOAT &vb2y, const KK_FLOAT &vb2z, const KK_FLOAT &vb3x, const KK_FLOAT &vb3y, const KK_FLOAT &vb3z) const { - KK_FLOAT edihedralquarter; - KK_FLOAT v[6]; - if (eflag_either) { if (eflag_global) { - if (newton_bond) evm.emol += edihedral; + if (newton_bond) evm.emol += static_cast(edihedral); else { - edihedralquarter = 0.25*edihedral; + KK_ACC_FLOAT edihedralquarter = static_cast(static_cast(0.25)*edihedral); if (i1 < nlocal) evm.emol += edihedralquarter; if (i2 < nlocal) evm.emol += edihedralquarter; if (i3 < nlocal) evm.emol += edihedralquarter; @@ -594,7 +591,7 @@ void DihedralCharmmKokkos::ev_tally(EVM_FLOAT &evm, const int i1, co } } if (eflag_atom) { - edihedralquarter = 0.25*edihedral; + KK_ACC_FLOAT edihedralquarter = static_cast(static_cast(0.25)*edihedral); if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter; if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter; if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter; @@ -603,89 +600,54 @@ void DihedralCharmmKokkos::ev_tally(EVM_FLOAT &evm, const int i1, co } if (vflag_either) { - v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; - v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; - v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; - v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; - v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; - v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + KK_ACC_FLOAT v_quarter_acc[6]; + v_quarter_acc[0] = static_cast(static_cast(0.25)*(vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0])); + v_quarter_acc[1] = static_cast(static_cast(0.25)*(vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1])); + v_quarter_acc[2] = static_cast(static_cast(0.25)*(vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2])); + v_quarter_acc[3] = static_cast(static_cast(0.25)*(vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1])); + v_quarter_acc[4] = static_cast(static_cast(0.25)*(vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2])); + v_quarter_acc[5] = static_cast(static_cast(0.25)*(vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2])); if (vflag_global) { if (newton_bond) { - evm.v[0] += v[0]; - evm.v[1] += v[1]; - evm.v[2] += v[2]; - evm.v[3] += v[3]; - evm.v[4] += v[4]; - evm.v[5] += v[5]; + for (int n = 0; n < 6; n++) + evm.v[n] += static_cast(4.0)*v_quarter_acc[n]; } else { if (i1 < nlocal) { - evm.v[0] += 0.25*v[0]; - evm.v[1] += 0.25*v[1]; - evm.v[2] += 0.25*v[2]; - evm.v[3] += 0.25*v[3]; - evm.v[4] += 0.25*v[4]; - evm.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + evm.v[n] += v_quarter_acc[n]; } if (i2 < nlocal) { - evm.v[0] += 0.25*v[0]; - evm.v[1] += 0.25*v[1]; - evm.v[2] += 0.25*v[2]; - evm.v[3] += 0.25*v[3]; - evm.v[4] += 0.25*v[4]; - evm.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + evm.v[n] += v_quarter_acc[n]; } if (i3 < nlocal) { - evm.v[0] += 0.25*v[0]; - evm.v[1] += 0.25*v[1]; - evm.v[2] += 0.25*v[2]; - evm.v[3] += 0.25*v[3]; - evm.v[4] += 0.25*v[4]; - evm.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + evm.v[n] += v_quarter_acc[n]; } if (i4 < nlocal) { - evm.v[0] += 0.25*v[0]; - evm.v[1] += 0.25*v[1]; - evm.v[2] += 0.25*v[2]; - evm.v[3] += 0.25*v[3]; - evm.v[4] += 0.25*v[4]; - evm.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + evm.v[n] += v_quarter_acc[n]; } } } if (vflag_atom) { if (newton_bond || i1 < nlocal) { - d_vatom(i1,0) += 0.25*v[0]; - d_vatom(i1,1) += 0.25*v[1]; - d_vatom(i1,2) += 0.25*v[2]; - d_vatom(i1,3) += 0.25*v[3]; - d_vatom(i1,4) += 0.25*v[4]; - d_vatom(i1,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i1,n) += v_quarter_acc[n]; } if (newton_bond || i2 < nlocal) { - d_vatom(i2,0) += 0.25*v[0]; - d_vatom(i2,1) += 0.25*v[1]; - d_vatom(i2,2) += 0.25*v[2]; - d_vatom(i2,3) += 0.25*v[3]; - d_vatom(i2,4) += 0.25*v[4]; - d_vatom(i2,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i2,n) += v_quarter_acc[n]; } if (newton_bond || i3 < nlocal) { - d_vatom(i3,0) += 0.25*v[0]; - d_vatom(i3,1) += 0.25*v[1]; - d_vatom(i3,2) += 0.25*v[2]; - d_vatom(i3,3) += 0.25*v[3]; - d_vatom(i3,4) += 0.25*v[4]; - d_vatom(i3,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i3,n) += v_quarter_acc[n]; } if (newton_bond || i4 < nlocal) { - d_vatom(i4,0) += 0.25*v[0]; - d_vatom(i4,1) += 0.25*v[1]; - d_vatom(i4,2) += 0.25*v[2]; - d_vatom(i4,3) += 0.25*v[3]; - d_vatom(i4,4) += 0.25*v[4]; - d_vatom(i4,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i4,n) += v_quarter_acc[n]; } } } @@ -702,18 +664,14 @@ void DihedralCharmmKokkos::ev_tally(EVM_FLOAT &evm, const int i, con const KK_FLOAT &evdwl, const KK_FLOAT &ecoul, const KK_FLOAT &fpair, const KK_FLOAT &delx, const KK_FLOAT &dely, const KK_FLOAT &delz) const { - KK_FLOAT evdwlhalf,ecoulhalf,epairhalf; - KK_FLOAT v[6]; - - if (eflag_either) { if (eflag_global) { if (newton_bond) { - evm.evdwl += evdwl; - evm.ecoul += ecoul; + evm.evdwl += static_cast(evdwl); + evm.ecoul += static_cast(ecoul); } else { - evdwlhalf = 0.5*evdwl; - ecoulhalf = 0.5*ecoul; + KK_ACC_FLOAT evdwlhalf = static_cast(static_cast(0.5)*evdwl); + KK_ACC_FLOAT ecoulhalf = static_cast(static_cast(0.5)*ecoul); if (i < nlocal) { evm.evdwl += evdwlhalf; evm.ecoul += ecoulhalf; @@ -725,64 +683,45 @@ void DihedralCharmmKokkos::ev_tally(EVM_FLOAT &evm, const int i, con } } if (eflag_atom) { - epairhalf = 0.5 * (evdwl + ecoul); + KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5)*(evdwl + ecoul)); if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf; if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf; } } if (vflag_either) { - v[0] = delx*delx*fpair; - v[1] = dely*dely*fpair; - v[2] = delz*delz*fpair; - v[3] = delx*dely*fpair; - v[4] = delx*delz*fpair; - v[5] = dely*delz*fpair; + KK_ACC_FLOAT v_half_acc[6]; + v_half_acc[0] = static_cast(static_cast(0.5)*delx*delx*fpair); + v_half_acc[1] = static_cast(static_cast(0.5)*dely*dely*fpair); + v_half_acc[2] = static_cast(static_cast(0.5)*delz*delz*fpair); + v_half_acc[3] = static_cast(static_cast(0.5)*delx*dely*fpair); + v_half_acc[4] = static_cast(static_cast(0.5)*delx*delz*fpair); + v_half_acc[5] = static_cast(static_cast(0.5)*dely*delz*fpair); if (vflag_global) { if (newton_bond) { - evm.vp[0] += v[0]; - evm.vp[1] += v[1]; - evm.vp[2] += v[2]; - evm.vp[3] += v[3]; - evm.vp[4] += v[4]; - evm.vp[5] += v[5]; + for (int n = 0; n < 6; n++) + evm.vp[n] += static_cast(2.0)*v_half_acc[n]; } else { if (i < nlocal) { - evm.vp[0] += 0.5*v[0]; - evm.vp[1] += 0.5*v[1]; - evm.vp[2] += 0.5*v[2]; - evm.vp[3] += 0.5*v[3]; - evm.vp[4] += 0.5*v[4]; - evm.vp[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + evm.vp[n] += v_half_acc[n]; } if (j < nlocal) { - evm.vp[0] += 0.5*v[0]; - evm.vp[1] += 0.5*v[1]; - evm.vp[2] += 0.5*v[2]; - evm.vp[3] += 0.5*v[3]; - evm.vp[4] += 0.5*v[4]; - evm.vp[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + evm.vp[n] += v_half_acc[n]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { - d_vatom_pair(i,0) += 0.5*v[0]; - d_vatom_pair(i,1) += 0.5*v[1]; - d_vatom_pair(i,2) += 0.5*v[2]; - d_vatom_pair(i,3) += 0.5*v[3]; - d_vatom_pair(i,4) += 0.5*v[4]; - d_vatom_pair(i,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + d_vatom_pair(i,n) += v_half_acc[n]; } if (newton_bond || j < nlocal) { - d_vatom_pair(j,0) += 0.5*v[0]; - d_vatom_pair(j,1) += 0.5*v[1]; - d_vatom_pair(j,2) += 0.5*v[2]; - d_vatom_pair(j,3) += 0.5*v[3]; - d_vatom_pair(j,4) += 0.5*v[4]; - d_vatom_pair(j,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + d_vatom_pair(j,n) += v_half_acc[n]; } } } diff --git a/src/KOKKOS/dihedral_charmm_kokkos.h b/src/KOKKOS/dihedral_charmm_kokkos.h index dfded8cb178..1b4ecc34ffa 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.h +++ b/src/KOKKOS/dihedral_charmm_kokkos.h @@ -29,11 +29,11 @@ DihedralStyle(charmm/kk/host,DihedralCharmmKokkos); namespace LAMMPS_NS { struct s_EVM_FLOAT { - double evdwl; - double ecoul; - double emol; - double v[6]; - double vp[6]; + KK_ACC_FLOAT evdwl; + KK_ACC_FLOAT ecoul; + KK_ACC_FLOAT emol; + KK_ACC_FLOAT v[6]; + KK_ACC_FLOAT vp[6]; KOKKOS_INLINE_FUNCTION s_EVM_FLOAT() { evdwl = 0; @@ -140,8 +140,8 @@ class DihedralCharmmKokkos : public DihedralCharmm { typename AT::t_kkfloat_2d d_lj14_4; typename AT::t_kkfloat_1d d_k; - typename AT::t_kkfloat_1d d_multiplicity; - typename AT::t_kkfloat_1d d_shift; + typename AT::t_int_1d d_multiplicity; + typename AT::t_int_1d d_shift; typename AT::t_kkfloat_1d d_sin_shift; typename AT::t_kkfloat_1d d_cos_shift; typename AT::t_kkfloat_1d d_weight; diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index e4b30573c49..a6fbbc7c772 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -133,14 +133,14 @@ void ImproperHarmonicKokkos::compute(int eflag_in, int vflag_in) if (h_warning_flag()) error->warning(FLERR,"Dihedral problem"); - if (eflag_global) energy += ev.evdwl; + if (eflag_global) energy += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -181,9 +181,9 @@ void ImproperHarmonicKokkos::operator()(TagImproperHarmonicCompute(1.0) / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + const KK_FLOAT ss2 = static_cast(1.0) / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + const KK_FLOAT ss3 = static_cast(1.0) / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); const KK_FLOAT r1 = sqrt(ss1); const KK_FLOAT r2 = sqrt(ss2); @@ -195,41 +195,41 @@ void ImproperHarmonicKokkos::operator()(TagImproperHarmonicCompute(1.0) - c1*c1; + if (s1 < static_cast(SMALL)) s1 = static_cast(SMALL); + s1 = static_cast(1.0) / s1; - KK_FLOAT s2 = 1.0 - c2*c2; - if (s2 < SMALL) s2 = SMALL; - s2 = 1.0 / s2; + KK_FLOAT s2 = static_cast(1.0) - c2*c2; + if (s2 < static_cast(SMALL)) s2 = static_cast(SMALL); + s2 = static_cast(1.0) / s2; KK_FLOAT s12 = sqrt(s1*s2); KK_FLOAT c = (c1*c2 + c0) * s12; // error check - if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag()) + if ((c > static_cast(1.0) + static_cast(TOLERANCE) || c < (-static_cast(1.0) - static_cast(TOLERANCE))) && !d_warning_flag()) d_warning_flag() = 1; - if (c > 1.0) c = 1.0; - if (c < -1.0) c = -1.0; + if (c > static_cast(1.0)) c = static_cast(1.0); + if (c < -static_cast(1.0)) c = -static_cast(1.0); - KK_FLOAT s = sqrt(1.0 - c*c); - if (s < SMALL) s = SMALL; + KK_FLOAT s = sqrt(static_cast(1.0) - c*c); + if (s < static_cast(SMALL)) s = static_cast(SMALL); // force & energy const KK_FLOAT domega = acos(c) - d_chi[type]; KK_FLOAT a = d_k[type] * domega; - KK_FLOAT eimproper = 0.0; + KK_FLOAT eimproper = 0; if (eflag) eimproper = a*domega; - a = -a * 2.0/s; + a = -a * static_cast(2.0) / s; c = c * a; s12 = s12 * a; const KK_FLOAT a11 = c*ss1*s1; - const KK_FLOAT a22 = -ss2 * (2.0*c0*s12 - c*(s1+s2)); + const KK_FLOAT a22 = -ss2 * (static_cast(2.0)*c0*s12 - c*(s1+s2)); const KK_FLOAT a33 = c*ss3*s2; const KK_FLOAT a12 = -r1*r2*(c1*c*s1 + c2*s12); const KK_FLOAT a13 = -r1*r3*s12; @@ -259,27 +259,27 @@ void ImproperHarmonicKokkos::operator()(TagImproperHarmonicCompute(f1[0]); + f(i1,1) += static_cast(f1[1]); + f(i1,2) += static_cast(f1[2]); } if (NEWTON_BOND || i2 < nlocal) { - f(i2,0) += f2[0]; - f(i2,1) += f2[1]; - f(i2,2) += f2[2]; + f(i2,0) += static_cast(f2[0]); + f(i2,1) += static_cast(f2[1]); + f(i2,2) += static_cast(f2[2]); } if (NEWTON_BOND || i3 < nlocal) { - f(i3,0) += f3[0]; - f(i3,1) += f3[1]; - f(i3,2) += f3[2]; + f(i3,0) += static_cast(f3[0]); + f(i3,1) += static_cast(f3[1]); + f(i3,2) += static_cast(f3[2]); } if (NEWTON_BOND || i4 < nlocal) { - f(i4,0) += f4[0]; - f(i4,1) += f4[1]; - f(i4,2) += f4[2]; + f(i4,0) += static_cast(f4[0]); + f(i4,1) += static_cast(f4[1]); + f(i4,2) += static_cast(f4[2]); } if (EVFLAG) @@ -321,8 +321,8 @@ void ImproperHarmonicKokkos::coeff(int narg, char **arg) int n = atom->nimpropertypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_chi.view_host()[i] = chi[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_chi.view_host()[i] = static_cast(chi[i]); } k_k.modify_host(); @@ -340,8 +340,8 @@ void ImproperHarmonicKokkos::read_restart(FILE *fp) int n = atom->nimpropertypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_chi.view_host()[i] = chi[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_chi.view_host()[i] = static_cast(chi[i]); } k_k.modify_host(); @@ -364,15 +364,11 @@ void ImproperHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i1, co const KK_FLOAT &vb2x, const KK_FLOAT &vb2y, const KK_FLOAT &vb2z, const KK_FLOAT &vb3x, const KK_FLOAT &vb3y, const KK_FLOAT &vb3z) const { - KK_FLOAT eimproperquarter; - KK_FLOAT v[6]; - - if (eflag_either) { if (eflag_global) { - if (newton_bond) ev.evdwl += eimproper; + if (newton_bond) ev.evdwl += static_cast(eimproper); else { - eimproperquarter = 0.25*eimproper; + KK_ACC_FLOAT eimproperquarter = static_cast(static_cast(0.25)*eimproper); if (i1 < nlocal) ev.evdwl += eimproperquarter; if (i2 < nlocal) ev.evdwl += eimproperquarter; if (i3 < nlocal) ev.evdwl += eimproperquarter; @@ -380,7 +376,7 @@ void ImproperHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i1, co } } if (eflag_atom) { - eimproperquarter = 0.25*eimproper; + KK_ACC_FLOAT eimproperquarter = static_cast(static_cast(0.25)*eimproper); if (newton_bond || i1 < nlocal) d_eatom[i1] += eimproperquarter; if (newton_bond || i2 < nlocal) d_eatom[i2] += eimproperquarter; if (newton_bond || i3 < nlocal) d_eatom[i3] += eimproperquarter; @@ -389,89 +385,54 @@ void ImproperHarmonicKokkos::ev_tally(EV_FLOAT &ev, const int i1, co } if (vflag_either) { - v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; - v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; - v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; - v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; - v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; - v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + KK_ACC_FLOAT v_quarter_acc[6]; + v_quarter_acc[0] = static_cast(static_cast(0.25)*(vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0])); + v_quarter_acc[1] = static_cast(static_cast(0.25)*(vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1])); + v_quarter_acc[2] = static_cast(static_cast(0.25)*(vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2])); + v_quarter_acc[3] = static_cast(static_cast(0.25)*(vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1])); + v_quarter_acc[4] = static_cast(static_cast(0.25)*(vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2])); + v_quarter_acc[5] = static_cast(static_cast(0.25)*(vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2])); if (vflag_global) { if (newton_bond) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(4.0)*v_quarter_acc[n]; } else { if (i1 < nlocal) { - ev.v[0] += 0.25*v[0]; - ev.v[1] += 0.25*v[1]; - ev.v[2] += 0.25*v[2]; - ev.v[3] += 0.25*v[3]; - ev.v[4] += 0.25*v[4]; - ev.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_quarter_acc[n]; } if (i2 < nlocal) { - ev.v[0] += 0.25*v[0]; - ev.v[1] += 0.25*v[1]; - ev.v[2] += 0.25*v[2]; - ev.v[3] += 0.25*v[3]; - ev.v[4] += 0.25*v[4]; - ev.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_quarter_acc[n]; } if (i3 < nlocal) { - ev.v[0] += 0.25*v[0]; - ev.v[1] += 0.25*v[1]; - ev.v[2] += 0.25*v[2]; - ev.v[3] += 0.25*v[3]; - ev.v[4] += 0.25*v[4]; - ev.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_quarter_acc[n]; } if (i4 < nlocal) { - ev.v[0] += 0.25*v[0]; - ev.v[1] += 0.25*v[1]; - ev.v[2] += 0.25*v[2]; - ev.v[3] += 0.25*v[3]; - ev.v[4] += 0.25*v[4]; - ev.v[5] += 0.25*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_quarter_acc[n]; } } } if (vflag_atom) { if (newton_bond || i1 < nlocal) { - d_vatom(i1,0) += 0.25*v[0]; - d_vatom(i1,1) += 0.25*v[1]; - d_vatom(i1,2) += 0.25*v[2]; - d_vatom(i1,3) += 0.25*v[3]; - d_vatom(i1,4) += 0.25*v[4]; - d_vatom(i1,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i1,n) += v_quarter_acc[n]; } if (newton_bond || i2 < nlocal) { - d_vatom(i2,0) += 0.25*v[0]; - d_vatom(i2,1) += 0.25*v[1]; - d_vatom(i2,2) += 0.25*v[2]; - d_vatom(i2,3) += 0.25*v[3]; - d_vatom(i2,4) += 0.25*v[4]; - d_vatom(i2,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i2,n) += v_quarter_acc[n]; } if (newton_bond || i3 < nlocal) { - d_vatom(i3,0) += 0.25*v[0]; - d_vatom(i3,1) += 0.25*v[1]; - d_vatom(i3,2) += 0.25*v[2]; - d_vatom(i3,3) += 0.25*v[3]; - d_vatom(i3,4) += 0.25*v[4]; - d_vatom(i3,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i3,n) += v_quarter_acc[n]; } if (newton_bond || i4 < nlocal) { - d_vatom(i4,0) += 0.25*v[0]; - d_vatom(i4,1) += 0.25*v[1]; - d_vatom(i4,2) += 0.25*v[2]; - d_vatom(i4,3) += 0.25*v[3]; - d_vatom(i4,4) += 0.25*v[4]; - d_vatom(i4,5) += 0.25*v[5]; + for (int n = 0; n < 6; n++) + d_vatom(i4,n) += v_quarter_acc[n]; } } } From c4031fd10ba9ad8a28c4b3c8aca30f6fa2d63018 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:03:42 -0700 Subject: [PATCH 102/604] Removed silent conversions from fix/shake.*, as relevant for Rhodopsin and SPC/E. This includes deciding that 1e30f is the threshold for single-precision overflow cutoff. --- src/KOKKOS/fix_shake_kokkos.cpp | 410 ++++++++++++++++---------------- src/KOKKOS/fix_shake_kokkos.h | 14 ++ 2 files changed, 219 insertions(+), 205 deletions(-) diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index fff17c3e99f..9b3e8c70a14 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -158,18 +158,22 @@ void FixShakeKokkos::init() // set equilibrium bond distances for (int i = 1; i <= atom->nbondtypes; i++) - k_bond_distance.view_host()[i] = bond_distance[i]; + k_bond_distance.view_host()[i] = static_cast(bond_distance[i]); // set equilibrium angle distances for (int i = 1; i <= atom->nangletypes; i++) - k_angle_distance.view_host()[i] = angle_distance[i]; + k_angle_distance.view_host()[i] = static_cast(angle_distance[i]); k_bond_distance.modify_host(); k_angle_distance.modify_host(); k_bond_distance.sync(); k_angle_distance.sync(); + + tolerance_kk = static_cast(tolerance); + // set the overflow value for shake self-consistency check + overflow_kk = get_overflow_max(); } /* ---------------------------------------------------------------------- @@ -408,6 +412,9 @@ void FixShakeKokkos::post_force(int vflag) EV_FLOAT ev; + // update just in case tolerance was changed + tolerance_kk = static_cast(tolerance); + // loop over clusters to add constraint forces if (neighflag == HALF) { @@ -443,12 +450,12 @@ void FixShakeKokkos::post_force(int vflag) atomKK->modified(execution_space,F_MASK); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (vflag_atom) { @@ -582,8 +589,8 @@ void FixShakeKokkos::unconstrained_update() auto x = this->d_x; auto v = this->d_v; auto f = this->d_f; - auto dtfsq = this->dtfsq; - auto dtv = this->dtv; + auto dtfsq_kk = this->dtfsq_kk; + auto dtv_kk = this->dtv_kk; if (d_rmass.data()) { @@ -592,11 +599,11 @@ void FixShakeKokkos::unconstrained_update() Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { - const double dtfmsq = dtfsq / rmass[i]; - d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); - d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); - d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); - } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; + const KK_FLOAT dtfmsq = dtfsq_kk / rmass[i]; + d_xshake(i,0) = x(i,0) + dtv_kk*v(i,0) + dtfmsq*static_cast(f(i,0)); + d_xshake(i,1) = x(i,1) + dtv_kk*v(i,1) + dtfmsq*static_cast(f(i,1)); + d_xshake(i,2) = x(i,2) + dtv_kk*v(i,2) + dtfmsq*static_cast(f(i,2)); + } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0; }); } else { @@ -606,11 +613,11 @@ void FixShakeKokkos::unconstrained_update() Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { - const double dtfmsq = dtfsq / mass[type[i]]; - d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); - d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); - d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); - } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; + const KK_FLOAT dtfmsq = dtfsq_kk / mass[type[i]]; + d_xshake(i,0) = x(i,0) + dtv_kk*v(i,0) + dtfmsq*static_cast(f(i,0)); + d_xshake(i,1) = x(i,1) + dtv_kk*v(i,1) + dtfmsq*static_cast(f(i,1)); + d_xshake(i,2) = x(i,2) + dtv_kk*v(i,2) + dtfmsq*static_cast(f(i,2)); + } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0; }); } } @@ -666,50 +673,50 @@ void FixShakeKokkos::shake(int ilist, EV_FLOAT& ev) const // a,b,c = coeffs in quadratic equation for lamda if (d_rmass.data()) { - invmass0 = 1.0/d_rmass[i0]; - invmass1 = 1.0/d_rmass[i1]; + invmass0 = static_cast(1.0)/d_rmass[i0]; + invmass1 = static_cast(1.0)/d_rmass[i1]; } else { - invmass0 = 1.0/d_mass[d_type[i0]]; - invmass1 = 1.0/d_mass[d_type[i1]]; + invmass0 = static_cast(1.0)/d_mass[d_type[i0]]; + invmass1 = static_cast(1.0)/d_mass[d_type[i1]]; } KK_FLOAT a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; - KK_FLOAT b = 2.0 * (invmass0+invmass1) * + KK_FLOAT b = static_cast(2.0) * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); KK_FLOAT c = s01sq - bond1*bond1; // error check - KK_FLOAT determ = b*b - 4.0*a*c; - if (determ < 0.0) { + KK_FLOAT determ = b*b - static_cast(4.0)*a*c; + if (determ < 0) { //error->warning(FLERR,"Shake determinant < 0.0",0); d_error_flag() = 2; - determ = 0.0; + determ = 0; } // exact quadratic solution for lamda KK_FLOAT lamda,lamda1,lamda2; - lamda1 = (-b+sqrt(determ)) / (2.0*a); - lamda2 = (-b-sqrt(determ)) / (2.0*a); + lamda1 = (-b+sqrt(determ)) / (static_cast(2.0)*a); + lamda2 = (-b-sqrt(determ)) / (static_cast(2.0)*a); if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; else lamda = lamda2; // update forces if atom is owned by this processor - lamda /= dtfsq; + lamda /= dtfsq_kk; if (i0 < nlocal) { - a_f(i0,0) += lamda*r01[0]; - a_f(i0,1) += lamda*r01[1]; - a_f(i0,2) += lamda*r01[2]; + a_f(i0,0) += static_cast(lamda*r01[0]); + a_f(i0,1) += static_cast(lamda*r01[1]); + a_f(i0,2) += static_cast(lamda*r01[2]); } if (i1 < nlocal) { - a_f(i1,0) -= lamda*r01[0]; - a_f(i1,1) -= lamda*r01[1]; - a_f(i1,2) -= lamda*r01[2]; + a_f(i1,0) -= static_cast(lamda*r01[0]); + a_f(i1,1) -= static_cast(lamda*r01[1]); + a_f(i1,2) -= static_cast(lamda*r01[2]); } if (EVFLAG) { @@ -724,7 +731,7 @@ void FixShakeKokkos::shake(int ilist, EV_FLOAT& ev) const v[4] = lamda*r01[0]*r01[2]; v[5] = lamda*r01[1]*r01[2]; - v_tally(ev,count,atomlist,2.0,v); + v_tally(ev,count,atomlist,static_cast(2.0),v); } } @@ -790,30 +797,30 @@ void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { - invmass0 = 1.0/d_rmass[i0]; - invmass1 = 1.0/d_rmass[i1]; - invmass2 = 1.0/d_rmass[i2]; + invmass0 = static_cast(1.0)/d_rmass[i0]; + invmass1 = static_cast(1.0)/d_rmass[i1]; + invmass2 = static_cast(1.0)/d_rmass[i2]; } else { - invmass0 = 1.0/d_mass[d_type[i0]]; - invmass1 = 1.0/d_mass[d_type[i1]]; - invmass2 = 1.0/d_mass[d_type[i2]]; + invmass0 = static_cast(1.0)/d_mass[d_type[i0]]; + invmass1 = static_cast(1.0)/d_mass[d_type[i1]]; + invmass2 = static_cast(1.0)/d_mass[d_type[i2]]; } - KK_FLOAT a11 = 2.0 * (invmass0+invmass1) * + KK_FLOAT a11 = static_cast(2.0) * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); - KK_FLOAT a12 = 2.0 * invmass0 * + KK_FLOAT a12 = static_cast(2.0) * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); - KK_FLOAT a21 = 2.0 * invmass0 * + KK_FLOAT a21 = static_cast(2.0) * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - KK_FLOAT a22 = 2.0 * (invmass0+invmass2) * + KK_FLOAT a22 = static_cast(2.0) * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); // inverse of matrix KK_FLOAT determ = a11*a22 - a12*a21; - if (determ == 0.0) d_error_flag() = 3; + if (determ == static_cast(0.0)) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); - KK_FLOAT determinv = 1.0/determ; + KK_FLOAT determinv = static_cast(1.0)/determ; KK_FLOAT a11inv = a22*determinv; KK_FLOAT a12inv = -a12*determinv; @@ -826,16 +833,16 @@ void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const KK_FLOAT quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; KK_FLOAT quad1_0202 = invmass0*invmass0 * r02sq; - KK_FLOAT quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; + KK_FLOAT quad1_0102 = static_cast(2.0) * (invmass0+invmass1)*invmass0 * r0102; KK_FLOAT quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; KK_FLOAT quad2_0101 = invmass0*invmass0 * r01sq; - KK_FLOAT quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; + KK_FLOAT quad2_0102 = static_cast(2.0) * (invmass0+invmass2)*invmass0 * r0102; // iterate until converged - KK_FLOAT lamda01 = 0.0; - KK_FLOAT lamda02 = 0.0; + KK_FLOAT lamda01 = 0; + KK_FLOAT lamda02 = 0; int niter = 0; int done = 0; @@ -854,41 +861,39 @@ void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const lamda02_new = a21inv*b1 + a22inv*b2; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; + if (fabs(lamda01_new-lamda01) > tolerance_kk) done = 0; + if (fabs(lamda02_new-lamda02) > tolerance_kk) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; // stop iterations before we have a floating point overflow - // max KK_FLOAT is < 1.0e308, so 1e150 is a reasonable cutoff - - if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150) done = 1; + if (fabs(lamda01) > overflow_kk || fabs(lamda02) > overflow_kk) done = 1; niter++; } // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; + lamda01 = lamda01/dtfsq_kk; + lamda02 = lamda02/dtfsq_kk; if (i0 < nlocal) { - a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; - a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; - a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; + a_f(i0,0) += static_cast(lamda01*r01[0] + lamda02*r02[0]); + a_f(i0,1) += static_cast(lamda01*r01[1] + lamda02*r02[1]); + a_f(i0,2) += static_cast(lamda01*r01[2] + lamda02*r02[2]); } if (i1 < nlocal) { - a_f(i1,0) -= lamda01*r01[0]; - a_f(i1,1) -= lamda01*r01[1]; - a_f(i1,2) -= lamda01*r01[2]; + a_f(i1,0) -= static_cast(lamda01*r01[0]); + a_f(i1,1) -= static_cast(lamda01*r01[1]); + a_f(i1,2) -= static_cast(lamda01*r01[2]); } if (i2 < nlocal) { - a_f(i2,0) -= lamda02*r02[0]; - a_f(i2,1) -= lamda02*r02[1]; - a_f(i2,2) -= lamda02*r02[2]; + a_f(i2,0) -= static_cast(lamda02*r02[0]); + a_f(i2,1) -= static_cast(lamda02*r02[1]); + a_f(i2,2) -= static_cast(lamda02*r02[2]); } if (EVFLAG) { @@ -904,7 +909,7 @@ void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2]; v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2]; - v_tally(ev,count,atomlist,3.0,v); + v_tally(ev,count,atomlist,static_cast(3.0),v); } } @@ -984,43 +989,43 @@ void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { - invmass0 = 1.0/d_rmass[i0]; - invmass1 = 1.0/d_rmass[i1]; - invmass2 = 1.0/d_rmass[i2]; - invmass3 = 1.0/d_rmass[i3]; + invmass0 = static_cast(1.0)/d_rmass[i0]; + invmass1 = static_cast(1.0)/d_rmass[i1]; + invmass2 = static_cast(1.0)/d_rmass[i2]; + invmass3 = static_cast(1.0)/d_rmass[i3]; } else { - invmass0 = 1.0/d_mass[d_type[i0]]; - invmass1 = 1.0/d_mass[d_type[i1]]; - invmass2 = 1.0/d_mass[d_type[i2]]; - invmass3 = 1.0/d_mass[d_type[i3]]; + invmass0 = static_cast(1.0)/d_mass[d_type[i0]]; + invmass1 = static_cast(1.0)/d_mass[d_type[i1]]; + invmass2 = static_cast(1.0)/d_mass[d_type[i2]]; + invmass3 = static_cast(1.0)/d_mass[d_type[i3]]; } - KK_FLOAT a11 = 2.0 * (invmass0+invmass1) * + KK_FLOAT a11 = static_cast(2.0) * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); - KK_FLOAT a12 = 2.0 * invmass0 * + KK_FLOAT a12 = static_cast(2.0) * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); - KK_FLOAT a13 = 2.0 * invmass0 * + KK_FLOAT a13 = static_cast(2.0) * invmass0 * (s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]); - KK_FLOAT a21 = 2.0 * invmass0 * + KK_FLOAT a21 = static_cast(2.0) * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - KK_FLOAT a22 = 2.0 * (invmass0+invmass2) * + KK_FLOAT a22 = static_cast(2.0) * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); - KK_FLOAT a23 = 2.0 * invmass0 * + KK_FLOAT a23 = static_cast(2.0) * invmass0 * (s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]); - KK_FLOAT a31 = 2.0 * invmass0 * + KK_FLOAT a31 = static_cast(2.0) * invmass0 * (s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]); - KK_FLOAT a32 = 2.0 * invmass0 * + KK_FLOAT a32 = static_cast(2.0) * invmass0 * (s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]); - KK_FLOAT a33 = 2.0 * (invmass0+invmass3) * + KK_FLOAT a33 = static_cast(2.0) * (invmass0+invmass3) * (s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]); // inverse of matrix; KK_FLOAT determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; - if (determ == 0.0) d_error_flag() = 3; + if (determ == static_cast(0.0)) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); - KK_FLOAT determinv = 1.0/determ; + KK_FLOAT determinv = static_cast(1.0)/determ; KK_FLOAT a11inv = determinv * (a22*a33 - a23*a32); KK_FLOAT a12inv = -determinv * (a12*a33 - a13*a32); @@ -1041,29 +1046,29 @@ void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const KK_FLOAT quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; KK_FLOAT quad1_0202 = invmass0*invmass0 * r02sq; KK_FLOAT quad1_0303 = invmass0*invmass0 * r03sq; - KK_FLOAT quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; - KK_FLOAT quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103; - KK_FLOAT quad1_0203 = 2.0 * invmass0*invmass0 * r0203; + KK_FLOAT quad1_0102 = static_cast(2.0) * (invmass0+invmass1)*invmass0 * r0102; + KK_FLOAT quad1_0103 = static_cast(2.0) * (invmass0+invmass1)*invmass0 * r0103; + KK_FLOAT quad1_0203 = static_cast(2.0) * invmass0*invmass0 * r0203; KK_FLOAT quad2_0101 = invmass0*invmass0 * r01sq; KK_FLOAT quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; KK_FLOAT quad2_0303 = invmass0*invmass0 * r03sq; - KK_FLOAT quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; - KK_FLOAT quad2_0103 = 2.0 * invmass0*invmass0 * r0103; - KK_FLOAT quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203; + KK_FLOAT quad2_0102 = static_cast(2.0) * (invmass0+invmass2)*invmass0 * r0102; + KK_FLOAT quad2_0103 = static_cast(2.0) * invmass0*invmass0 * r0103; + KK_FLOAT quad2_0203 = static_cast(2.0) * (invmass0+invmass2)*invmass0 * r0203; KK_FLOAT quad3_0101 = invmass0*invmass0 * r01sq; KK_FLOAT quad3_0202 = invmass0*invmass0 * r02sq; KK_FLOAT quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq; - KK_FLOAT quad3_0102 = 2.0 * invmass0*invmass0 * r0102; - KK_FLOAT quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103; - KK_FLOAT quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203; + KK_FLOAT quad3_0102 = static_cast(2.0) * invmass0*invmass0 * r0102; + KK_FLOAT quad3_0103 = static_cast(2.0) * (invmass0+invmass3)*invmass0 * r0103; + KK_FLOAT quad3_0203 = static_cast(2.0) * (invmass0+invmass3)*invmass0 * r0203; // iterate until converged - KK_FLOAT lamda01 = 0.0; - KK_FLOAT lamda02 = 0.0; - KK_FLOAT lamda03 = 0.0; + KK_FLOAT lamda01 = 0; + KK_FLOAT lamda02 = 0; + KK_FLOAT lamda03 = 0; int niter = 0; int done = 0; @@ -1100,51 +1105,48 @@ void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; - if (fabs(lamda03_new-lamda03) > tolerance) done = 0; + if (fabs(lamda01_new-lamda01) > tolerance_kk) done = 0; + if (fabs(lamda02_new-lamda02) > tolerance_kk) done = 0; + if (fabs(lamda03_new-lamda03) > tolerance_kk) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda03 = lamda03_new; // stop iterations before we have a floating point overflow - // max KK_FLOAT is < 1.0e308, so 1e150 is a reasonable cutoff - - if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 - || fabs(lamda03) > 1e150) done = 1; + if (fabs(lamda01) > overflow_kk || fabs(lamda02) > overflow_kk || fabs(lamda03) > overflow_kk) done = 1; niter++; } // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; - lamda03 = lamda03/dtfsq; + lamda01 = lamda01/dtfsq_kk; + lamda02 = lamda02/dtfsq_kk; + lamda03 = lamda03/dtfsq_kk; if (i0 < nlocal) { - a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0]; - a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1]; - a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2]; + a_f(i0,0) += static_cast(lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0]); + a_f(i0,1) += static_cast(lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1]); + a_f(i0,2) += static_cast(lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2]); } if (i1 < nlocal) { - a_f(i1,0) -= lamda01*r01[0]; - a_f(i1,1) -= lamda01*r01[1]; - a_f(i1,2) -= lamda01*r01[2]; + a_f(i1,0) -= static_cast(lamda01*r01[0]); + a_f(i1,1) -= static_cast(lamda01*r01[1]); + a_f(i1,2) -= static_cast(lamda01*r01[2]); } if (i2 < nlocal) { - a_f(i2,0) -= lamda02*r02[0]; - a_f(i2,1) -= lamda02*r02[1]; - a_f(i2,2) -= lamda02*r02[2]; + a_f(i2,0) -= static_cast(lamda02*r02[0]); + a_f(i2,1) -= static_cast(lamda02*r02[1]); + a_f(i2,2) -= static_cast(lamda02*r02[2]); } if (i3 < nlocal) { - a_f(i3,0) -= lamda03*r03[0]; - a_f(i3,1) -= lamda03*r03[1]; - a_f(i3,2) -= lamda03*r03[2]; + a_f(i3,0) -= static_cast(lamda03*r03[0]); + a_f(i3,1) -= static_cast(lamda03*r03[1]); + a_f(i3,2) -= static_cast(lamda03*r03[2]); } if (EVFLAG) { @@ -1161,7 +1163,7 @@ void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2]; - v_tally(ev,count,atomlist,4.0,v); + v_tally(ev,count,atomlist,static_cast(4.0),v); } } @@ -1240,41 +1242,41 @@ void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { - invmass0 = 1.0/d_rmass[i0]; - invmass1 = 1.0/d_rmass[i1]; - invmass2 = 1.0/d_rmass[i2]; + invmass0 = static_cast(1.0)/d_rmass[i0]; + invmass1 = static_cast(1.0)/d_rmass[i1]; + invmass2 = static_cast(1.0)/d_rmass[i2]; } else { - invmass0 = 1.0/d_mass[d_type[i0]]; - invmass1 = 1.0/d_mass[d_type[i1]]; - invmass2 = 1.0/d_mass[d_type[i2]]; + invmass0 = static_cast(1.0)/d_mass[d_type[i0]]; + invmass1 = static_cast(1.0)/d_mass[d_type[i1]]; + invmass2 = static_cast(1.0)/d_mass[d_type[i2]]; } - KK_FLOAT a11 = 2.0 * (invmass0+invmass1) * + KK_FLOAT a11 = static_cast(2.0) * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); - KK_FLOAT a12 = 2.0 * invmass0 * + KK_FLOAT a12 = static_cast(2.0) * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); - KK_FLOAT a13 = - 2.0 * invmass1 * + KK_FLOAT a13 = - static_cast(2.0) * invmass1 * (s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]); - KK_FLOAT a21 = 2.0 * invmass0 * + KK_FLOAT a21 = static_cast(2.0) * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); - KK_FLOAT a22 = 2.0 * (invmass0+invmass2) * + KK_FLOAT a22 = static_cast(2.0) * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); - KK_FLOAT a23 = 2.0 * invmass2 * + KK_FLOAT a23 = static_cast(2.0) * invmass2 * (s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]); - KK_FLOAT a31 = - 2.0 * invmass1 * + KK_FLOAT a31 = - static_cast(2.0) * invmass1 * (s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]); - KK_FLOAT a32 = 2.0 * invmass2 * + KK_FLOAT a32 = static_cast(2.0) * invmass2 * (s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]); - KK_FLOAT a33 = 2.0 * (invmass1+invmass2) * + KK_FLOAT a33 = static_cast(2.0) * (invmass1+invmass2) * (s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]); // inverse of matrix KK_FLOAT determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; - if (determ == 0.0) d_error_flag() = 3; + if (determ == static_cast(0.0)) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); - KK_FLOAT determinv = 1.0/determ; + KK_FLOAT determinv = static_cast(1.0)/determ; KK_FLOAT a11inv = determinv * (a22*a33 - a23*a32); KK_FLOAT a12inv = -determinv * (a12*a33 - a13*a32); @@ -1295,29 +1297,29 @@ void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const KK_FLOAT quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; KK_FLOAT quad1_0202 = invmass0*invmass0 * r02sq; KK_FLOAT quad1_1212 = invmass1*invmass1 * r12sq; - KK_FLOAT quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; - KK_FLOAT quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112; - KK_FLOAT quad1_0212 = - 2.0 * invmass0*invmass1 * r0212; + KK_FLOAT quad1_0102 = static_cast(2.0) * (invmass0+invmass1)*invmass0 * r0102; + KK_FLOAT quad1_0112 = - static_cast(2.0) * (invmass0+invmass1)*invmass1 * r0112; + KK_FLOAT quad1_0212 = - static_cast(2.0) * invmass0*invmass1 * r0212; KK_FLOAT quad2_0101 = invmass0*invmass0 * r01sq; KK_FLOAT quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; KK_FLOAT quad2_1212 = invmass2*invmass2 * r12sq; - KK_FLOAT quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; - KK_FLOAT quad2_0112 = 2.0 * invmass0*invmass2 * r0112; - KK_FLOAT quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212; + KK_FLOAT quad2_0102 = static_cast(2.0) * (invmass0+invmass2)*invmass0 * r0102; + KK_FLOAT quad2_0112 = static_cast(2.0) * invmass0*invmass2 * r0112; + KK_FLOAT quad2_0212 = static_cast(2.0) * (invmass0+invmass2)*invmass2 * r0212; KK_FLOAT quad3_0101 = invmass1*invmass1 * r01sq; KK_FLOAT quad3_0202 = invmass2*invmass2 * r02sq; KK_FLOAT quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq; - KK_FLOAT quad3_0102 = - 2.0 * invmass1*invmass2 * r0102; - KK_FLOAT quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112; - KK_FLOAT quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212; + KK_FLOAT quad3_0102 = - static_cast(2.0) * invmass1*invmass2 * r0102; + KK_FLOAT quad3_0112 = - static_cast(2.0) * (invmass1+invmass2)*invmass1 * r0112; + KK_FLOAT quad3_0212 = static_cast(2.0) * (invmass1+invmass2)*invmass2 * r0212; // iterate until converged - KK_FLOAT lamda01 = 0.0; - KK_FLOAT lamda02 = 0.0; - KK_FLOAT lamda12 = 0.0; + KK_FLOAT lamda01 = 0; + KK_FLOAT lamda02 = 0; + KK_FLOAT lamda12 = 0; int niter = 0; int done = 0; @@ -1355,45 +1357,42 @@ void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; - if (fabs(lamda01_new-lamda01) > tolerance) done = 0; - if (fabs(lamda02_new-lamda02) > tolerance) done = 0; - if (fabs(lamda12_new-lamda12) > tolerance) done = 0; + if (fabs(lamda01_new-lamda01) > tolerance_kk) done = 0; + if (fabs(lamda02_new-lamda02) > tolerance_kk) done = 0; + if (fabs(lamda12_new-lamda12) > tolerance_kk) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda12 = lamda12_new; // stop iterations before we have a floating point overflow - // max KK_FLOAT is < 1.0e308, so 1e150 is a reasonable cutoff - - if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 - || fabs(lamda12) > 1e150) done = 1; + if (fabs(lamda01) > overflow_kk || fabs(lamda02) > overflow_kk || fabs(lamda12) > overflow_kk) done = 1; niter++; } // update forces if atom is owned by this processor - lamda01 = lamda01/dtfsq; - lamda02 = lamda02/dtfsq; - lamda12 = lamda12/dtfsq; + lamda01 = lamda01/dtfsq_kk; + lamda02 = lamda02/dtfsq_kk; + lamda12 = lamda12/dtfsq_kk; if (i0 < nlocal) { - a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; - a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; - a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; + a_f(i0,0) += static_cast(lamda01*r01[0] + lamda02*r02[0]); + a_f(i0,1) += static_cast(lamda01*r01[1] + lamda02*r02[1]); + a_f(i0,2) += static_cast(lamda01*r01[2] + lamda02*r02[2]); } if (i1 < nlocal) { - a_f(i1,0) -= lamda01*r01[0] - lamda12*r12[0]; - a_f(i1,1) -= lamda01*r01[1] - lamda12*r12[1]; - a_f(i1,2) -= lamda01*r01[2] - lamda12*r12[2]; + a_f(i1,0) -= static_cast(lamda01*r01[0] - lamda12*r12[0]); + a_f(i1,1) -= static_cast(lamda01*r01[1] - lamda12*r12[1]); + a_f(i1,2) -= static_cast(lamda01*r01[2] - lamda12*r12[2]); } if (i2 < nlocal) { - a_f(i2,0) -= lamda02*r02[0] + lamda12*r12[0]; - a_f(i2,1) -= lamda02*r02[1] + lamda12*r12[1]; - a_f(i2,2) -= lamda02*r02[2] + lamda12*r12[2]; + a_f(i2,0) -= static_cast(lamda02*r02[0] + lamda12*r12[0]); + a_f(i2,1) -= static_cast(lamda02*r02[1] + lamda12*r12[1]); + a_f(i2,2) -= static_cast(lamda02*r02[2] + lamda12*r12[2]); } if (EVFLAG) { @@ -1409,7 +1408,7 @@ void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2]; - v_tally(ev,count,atomlist,3.0,v); + v_tally(ev,count,atomlist,static_cast(3.0),v); } } @@ -1810,13 +1809,13 @@ int FixShakeKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_1 d_buf = k_buf.view(); if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; + dx = static_cast(pbc[0]*domain->xprd); + dy = static_cast(pbc[1]*domain->yprd); + dz = static_cast(pbc[2]*domain->zprd); } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; + dx = static_cast(pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz); + dy = static_cast(pbc[1]*domain->yprd + pbc[3]*domain->yz); + dz = static_cast(pbc[2]*domain->zprd); } if (pbc_flag) @@ -1833,13 +1832,13 @@ void FixShakeKokkos::operator()(TagFixShakePackForwardComm const int j = d_sendlist(i); if (PBC_FLAG == 0) { - d_buf[3*i] = d_xshake(j,0); - d_buf[3*i+1] = d_xshake(j,1); - d_buf[3*i+2] = d_xshake(j,2); + d_buf[3*i] = static_cast(d_xshake(j,0)); + d_buf[3*i+1] = static_cast(d_xshake(j,1)); + d_buf[3*i+2] = static_cast(d_xshake(j,2)); } else { - d_buf[3*i] = d_xshake(j,0) + dx; - d_buf[3*i+1] = d_xshake(j,1) + dy; - d_buf[3*i+2] = d_xshake(j,2) + dz; + d_buf[3*i] = static_cast(d_xshake(j,0) + dx); + d_buf[3*i+1] = static_cast(d_xshake(j,1) + dy); + d_buf[3*i+2] = static_cast(d_xshake(j,2) + dz); } } @@ -1871,9 +1870,9 @@ void FixShakeKokkos::unpack_forward_comm_kokkos(int n, int first_in, template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakeUnpackForwardComm, const int &i) const { - d_xshake(i + first,0) = d_buf[3*i]; - d_xshake(i + first,1) = d_buf[3*i+1]; - d_xshake(i + first,2) = d_buf[3*i+2]; + d_xshake(i + first,0) = static_cast(d_buf[3*i]); + d_xshake(i + first,1) = static_cast(d_buf[3*i+1]); + d_xshake(i + first,2) = static_cast(d_buf[3*i+2]); } /* ---------------------------------------------------------------------- */ @@ -1897,8 +1896,12 @@ template void FixShakeKokkos::shake_end_of_step(int vflag) { dtv = update->dt; dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; + dtfsq_kk = static_cast(dtfsq); FixShakeKokkos::post_force(vflag); - if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v; + if (!rattle) { + dtfsq = update->dt * update->dt * force->ftm2v; + dtfsq_kk = static_cast(dtfsq); + } } /* ---------------------------------------------------------------------- @@ -1940,6 +1943,8 @@ void FixShakeKokkos::correct_coordinates(int vflag) { // IMPORTANT: use 1 as argument and thereby enforce velocity Verlet dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; + dtv_kk = static_cast(dtv); + dtfsq_kk = static_cast(dtfsq); FixShakeKokkos::post_force(vflag); atomKK->sync(Host,X_MASK|F_MASK); @@ -1974,7 +1979,10 @@ void FixShakeKokkos::correct_coordinates(int vflag) { } } - if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v; + if (!rattle) { + dtfsq = update->dt * update->dt * force->ftm2v; + dtfsq_kk = static_cast(dtfsq); + } // communicate changes // NOTE: for compatibility xshake is temporarily set to x, such that pack/unpack_forward @@ -2010,27 +2018,19 @@ void FixShakeKokkos::v_tally(EV_FLOAT &ev, int n, int *atomlist, KK_ int m; if (vflag_global) { - KK_FLOAT fraction = n/total; - ev.v[0] += fraction*v[0]; - ev.v[1] += fraction*v[1]; - ev.v[2] += fraction*v[2]; - ev.v[3] += fraction*v[3]; - ev.v[4] += fraction*v[4]; - ev.v[5] += fraction*v[5]; + KK_FLOAT fraction = static_cast(n)/total; + for (int j = 0; j < 6; j++) + ev.v[j] += static_cast(fraction*v[j]); } if (vflag_atom) { - KK_FLOAT fraction = 1.0/total; + KK_FLOAT fraction = static_cast(1.0)/total; for (int i = 0; i < n; i++) { auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); m = atomlist[i]; - a_vatom(m,0) += fraction*v[0]; - a_vatom(m,1) += fraction*v[1]; - a_vatom(m,2) += fraction*v[2]; - a_vatom(m,3) += fraction*v[3]; - a_vatom(m,4) += fraction*v[4]; - a_vatom(m,5) += fraction*v[5]; + for (int j = 0; j < 6; j++) + a_vatom(m,j) += static_cast(fraction*v[j]); } } } diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index a78f1ecfc77..6cf5372e5cc 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -221,6 +221,20 @@ class FixShakeKokkos : public FixShake, public KokkosBase { typename AT::t_int_1d d_indices; KK_FLOAT dx,dy,dz; + KK_FLOAT dtv_kk,dtfsq_kk; + + // max double is < 1.0e308, so 1e150 is a reasonable cutoff + // max float is ~3.4e38, so 1e30 is a reasonable cutoff + template + real_t get_overflow_max() { + if constexpr (std::is_same_v) { + return 1e150; + } else { + return 1e30f; + } + } + + KK_FLOAT tolerance_kk, overflow_kk; int *shake_flag_tmp; tagint **shake_atom_tmp; From 64de4611eaf6e9c83eaf92c0adca9e1f6eb009dc Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:07:12 -0700 Subject: [PATCH 103/604] Removed silent conversions from bond_fene_kokkos.*, as relevant for Chain. --- src/KOKKOS/bond_fene_kokkos.cpp | 126 +++++++++++++------------------- 1 file changed, 52 insertions(+), 74 deletions(-) diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index 9dfcd5b1b10..3f2effe7505 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -136,14 +136,14 @@ void BondFENEKokkos::compute(int eflag_in, int vflag_in) else if (h_flag() == 2) error->one(FLERR,"Bad FENE bond"); - if (eflag_global) energy += ev.evdwl; + if (eflag_global) energy += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -184,53 +184,53 @@ void BondFENEKokkos::operator()(TagBondFENECompute(1.0) - rsq/r0sq; // if r -> r0, then rlogarg < 0.0 which is an error // issue a warning and reset rlogarg = epsilon // if r > 2*r0 something serious is wrong, abort - if (rlogarg < 0.1) { - if (rlogarg <= -3.0) + if (rlogarg < static_cast(0.1)) { + if (rlogarg <= static_cast(-3.0)) d_flag() = 2; else d_flag() = 1; - rlogarg = 0.1; + rlogarg = static_cast(0.1); } KK_FLOAT fbond = -k/rlogarg; // force from LJ term - KK_FLOAT sr6 = 0.0; + KK_FLOAT sr6 = 0; KK_FLOAT sigma2 = sigma*sigma; - if (rsq < MY_CUBEROOT2*sigma2) { + if (rsq < static_cast(MY_CUBEROOT2)*sigma2) { const KK_FLOAT sr2 = sigma2/rsq; sr6 = sr2*sr2*sr2; - fbond += 48.0*epsilon*sr6*(sr6-0.5)/rsq; + fbond += static_cast(48.0)*epsilon*sr6*(sr6 - static_cast(0.5))/rsq; } // energy - KK_FLOAT ebond = 0.0; + KK_FLOAT ebond = 0; if (eflag) { - ebond = -0.5 * k*r0sq*log(rlogarg); - if (rsq < MY_CUBEROOT2*sigma2) - ebond += 4.0*epsilon*sr6*(sr6-1.0) + epsilon; + ebond = -static_cast(0.5) * k*r0sq*log(rlogarg); + if (rsq < static_cast(MY_CUBEROOT2)*sigma2) + ebond += static_cast(4.0)*epsilon*sr6*(sr6-static_cast(1.0)) + epsilon; } // apply force to each of 2 atoms if (NEWTON_BOND || i1 < nlocal) { - a_f(i1,0) += delx*fbond; - a_f(i1,1) += dely*fbond; - a_f(i1,2) += delz*fbond; + a_f(i1,0) += static_cast(delx*fbond); + a_f(i1,1) += static_cast(dely*fbond); + a_f(i1,2) += static_cast(delz*fbond); } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) -= delx*fbond; - a_f(i2,1) -= dely*fbond; - a_f(i2,2) -= delz*fbond; + a_f(i2,0) -= static_cast(delx*fbond); + a_f(i2,1) -= static_cast(dely*fbond); + a_f(i2,2) -= static_cast(delz*fbond); } if (EVFLAG) ev_tally(ev,i1,i2,ebond,fbond,delx,dely,delz); @@ -274,10 +274,10 @@ void BondFENEKokkos::coeff(int narg, char **arg) int n = atom->nbondtypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_r0.view_host()[i] = r0[i]; - k_epsilon.view_host()[i] = epsilon[i]; - k_sigma.view_host()[i] = sigma[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_r0.view_host()[i] = static_cast(r0[i]); + k_epsilon.view_host()[i] = static_cast(epsilon[i]); + k_sigma.view_host()[i] = static_cast(sigma[i]); } k_k.modify_host(); @@ -298,10 +298,10 @@ void BondFENEKokkos::read_restart(FILE *fp) int n = atom->nbondtypes; for (int i = 1; i <= n; i++) { - k_k.view_host()[i] = k[i]; - k_r0.view_host()[i] = r0[i]; - k_epsilon.view_host()[i] = epsilon[i]; - k_sigma.view_host()[i] = sigma[i]; + k_k.view_host()[i] = static_cast(k[i]); + k_r0.view_host()[i] = static_cast(r0[i]); + k_epsilon.view_host()[i] = static_cast(epsilon[i]); + k_sigma.view_host()[i] = static_cast(sigma[i]); } k_k.modify_host(); @@ -321,81 +321,59 @@ void BondFENEKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int const KK_FLOAT &ebond, const KK_FLOAT &fbond, const KK_FLOAT &delx, const KK_FLOAT &dely, const KK_FLOAT &delz) const { - KK_FLOAT ebondhalf; - KK_FLOAT v[6]; - // The eatom and vatom arrays are atomic Kokkos::View::value,Kokkos::MemoryTraits > v_eatom = d_eatom; Kokkos::View::value,Kokkos::MemoryTraits > v_vatom = d_vatom; if (eflag_either) { if (eflag_global) { - if (newton_bond) ev.evdwl += ebond; + if (newton_bond) ev.evdwl += static_cast(ebond); else { - ebondhalf = 0.5*ebond; + KK_ACC_FLOAT ebondhalf = static_cast(static_cast(0.5)*ebond); if (i < nlocal) ev.evdwl += ebondhalf; if (j < nlocal) ev.evdwl += ebondhalf; } } if (eflag_atom) { - ebondhalf = 0.5*ebond; + KK_ACC_FLOAT ebondhalf = static_cast(static_cast(0.5)*ebond); if (newton_bond || i < nlocal) v_eatom[i] += ebondhalf; if (newton_bond || j < nlocal) v_eatom[j] += ebondhalf; } } if (vflag_either) { - v[0] = delx*delx*fbond; - v[1] = dely*dely*fbond; - v[2] = delz*delz*fbond; - v[3] = delx*dely*fbond; - v[4] = delx*delz*fbond; - v[5] = dely*delz*fbond; + const KK_ACC_FLOAT v_acc[6] = + { static_cast(delx*delx*fbond), + static_cast(dely*dely*fbond), + static_cast(delz*delz*fbond), + static_cast(delx*dely*fbond), + static_cast(delx*delz*fbond), + static_cast(dely*delz*fbond) }; if (vflag_global) { if (newton_bond) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v_acc[n]; } else { if (i < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(0.5)*v_acc[n]; } if (j < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += static_cast(0.5)*v_acc[n]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { - v_vatom(i,0) += 0.5*v[0]; - v_vatom(i,1) += 0.5*v[1]; - v_vatom(i,2) += 0.5*v[2]; - v_vatom(i,3) += 0.5*v[3]; - v_vatom(i,4) += 0.5*v[4]; - v_vatom(i,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + v_vatom(i,n) += static_cast(0.5)*v_acc[n]; } if (newton_bond || j < nlocal) { - v_vatom(j,0) += 0.5*v[0]; - v_vatom(j,1) += 0.5*v[1]; - v_vatom(j,2) += 0.5*v[2]; - v_vatom(j,3) += 0.5*v[3]; - v_vatom(j,4) += 0.5*v[4]; - v_vatom(j,5) += 0.5*v[5]; + for (int n = 0; n < 6; n++) + v_vatom(j,n) += static_cast(0.5)*v_acc[n]; } } } From 6e943b40359e48157f294b0536f6855392de492a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 27 Oct 2025 16:14:55 -0600 Subject: [PATCH 104/604] Need to include default fields in atom_masks --- src/KOKKOS/atom_vec_kokkos.cpp | 24 ++++++++++++++++++++++++ src/KOKKOS/atom_vec_kokkos.h | 1 + src/atom_vec.h | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index dd3c1e19bb1..40cecacb3e6 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -52,6 +52,16 @@ AtomVecKokkos::~AtomVecKokkos() ngrow = 0; } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecKokkos::setup_fields() +{ + AtomVec::setup_fields(); + set_atom_masks(); +} + /* ---------------------------------------------------------------------- */ template @@ -930,30 +940,44 @@ int AtomVecKokkos::field2mask(std::string field) void AtomVecKokkos::set_atom_masks() { datamask_grow = EMPTY_MASK; + for (int i = 0; i < default_grow.size(); i++) + datamask_grow |= field2mask(default_grow[i]); for (int i = 0; i < ngrow; i++) datamask_grow |= field2mask(fields_grow[i]); datamask_comm = EMPTY_MASK; + for (int i = 0; i < default_comm.size(); i++) + datamask_comm |= field2mask(default_comm[i]); for (int i = 0; i < ncomm; i++) datamask_comm |= field2mask(fields_comm[i]); datamask_comm_vel = EMPTY_MASK; + for (int i = 0; i < default_comm_vel.size(); i++) + datamask_comm_vel |= field2mask(default_comm_vel[i]); for (int i = 0; i < ncomm_vel; i++) datamask_comm_vel |= field2mask(fields_comm_vel[i]); datamask_reverse = EMPTY_MASK; + for (int i = 0; i < default_reverse.size(); i++) + datamask_reverse |= field2mask(default_reverse[i]); for (int i = 0; i < nreverse; i++) datamask_reverse |= field2mask(fields_reverse[i]); datamask_border = EMPTY_MASK; + for (int i = 0; i < default_border.size(); i++) + datamask_border |= field2mask(default_border[i]); for (int i = 0; i < nborder; i++) datamask_border |= field2mask(fields_border[i]); datamask_border_vel = EMPTY_MASK; + for (int i = 0; i < default_border_vel.size(); i++) + datamask_border_vel |= field2mask(default_border_vel[i]); for (int i = 0; i < nborder_vel; i++) datamask_border_vel |= field2mask(fields_border_vel[i]); datamask_exchange = EMPTY_MASK; + for (int i = 0; i < default_exchange.size(); i++) + datamask_exchange |= field2mask(default_exchange[i]); for (int i = 0; i < nexchange; i++) datamask_exchange |= field2mask(fields_exchange[i]); } diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 8375bd93e31..d9532f2c987 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -135,6 +135,7 @@ class AtomVecKokkos : virtual public AtomVec { int datamask_border_vel; int datamask_exchange; + void setup_fields() override; int field2mask(std::string); void set_atom_masks(); diff --git a/src/atom_vec.h b/src/atom_vec.h index 76493cbe505..bcb77e9ac95 100644 --- a/src/atom_vec.h +++ b/src/atom_vec.h @@ -214,7 +214,7 @@ class AtomVec : protected Pointers { void grow_nmax(); int grow_nmax_bonus(int); - void setup_fields(); + virtual void setup_fields(); int process_fields(const std::vector &, const std::vector &, Method *); void init_method(int, Method *); }; From bd6bcddcac177a04a8b1a4df090273530afe8612 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:18:59 -0700 Subject: [PATCH 105/604] Removed silent conversions for remaining components of the Chute benchmark. --- src/KOKKOS/compute_erotate_sphere_kokkos.cpp | 4 ++-- src/KOKKOS/fix_freeze_kokkos.cpp | 18 +++++++++--------- src/KOKKOS/fix_gravity_kokkos.cpp | 20 ++++++++++++-------- src/KOKKOS/fix_gravity_kokkos.h | 2 ++ 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp index d18aa3d27a8..866b86879a2 100644 --- a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp +++ b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp @@ -70,8 +70,8 @@ double ComputeERotateSphereKokkos::compute_scalar() auto omega2 = l_omega(i,2); auto radius = l_radius(i); erotate += - (omega0 * omega0 + omega1 * omega1 + omega2 * omega2) * - radius * radius * l_rmass[i]; + static_cast((omega0 * omega0 + omega1 * omega1 + omega2 * omega2) * + radius * radius * l_rmass[i]); } },erotate); } diff --git a/src/KOKKOS/fix_freeze_kokkos.cpp b/src/KOKKOS/fix_freeze_kokkos.cpp index 44d3fb5b1b0..76030105d14 100644 --- a/src/KOKKOS/fix_freeze_kokkos.cpp +++ b/src/KOKKOS/fix_freeze_kokkos.cpp @@ -63,15 +63,15 @@ template KOKKOS_INLINE_FUNCTION void FixFreezeKokkos::operator()(const int i, OriginalForce &original) const { if (mask[i] & groupbit) { - original.values[0] += f(i,0); - original.values[1] += f(i,1); - original.values[2] += f(i,2); - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; - torque(i,0) = 0.0; - torque(i,1) = 0.0; - torque(i,2) = 0.0; + original.values[0] += static_cast(f(i,0)); + original.values[1] += static_cast(f(i,1)); + original.values[2] += static_cast(f(i,2)); + f(i,0) = 0; + f(i,1) = 0; + f(i,2) = 0; + torque(i,0) = 0; + torque(i,1) = 0; + torque(i,2) = 0; } } diff --git a/src/KOKKOS/fix_gravity_kokkos.cpp b/src/KOKKOS/fix_gravity_kokkos.cpp index 5b7fa0ae8cd..8081b92283f 100644 --- a/src/KOKKOS/fix_gravity_kokkos.cpp +++ b/src/KOKKOS/fix_gravity_kokkos.cpp @@ -77,6 +77,10 @@ void FixGravityKokkos::post_force(int /*vflag*/) eflag = 0; egrav = 0.0; + xacc_kk = static_cast(xacc); + yacc_kk = static_cast(yacc); + zacc_kk = static_cast(zacc); + if (atomKK->rmass) { Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), *this, egrav); } @@ -92,10 +96,10 @@ void FixGravityKokkos::operator()(TagFixGravityRMass, const int i, d { if (mask[i] & groupbit) { KK_FLOAT massone = rmass[i]; - f(i,0) += massone*xacc; - f(i,1) += massone*yacc; - f(i,2) += massone*zacc; - eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2)); + f(i,0) += static_cast(massone*xacc_kk); + f(i,1) += static_cast(massone*yacc_kk); + f(i,2) += static_cast(massone*zacc_kk); + eg -= static_cast(massone * (xacc_kk*x(i,0) + yacc_kk*x(i,1) + zacc_kk*x(i,2))); } } @@ -105,10 +109,10 @@ void FixGravityKokkos::operator()(TagFixGravityMass, const int i, do { if (mask[i] & groupbit) { KK_FLOAT massone = mass[type[i]]; - f(i,0) += massone*xacc; - f(i,1) += massone*yacc; - f(i,2) += massone*zacc; - eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2)); + f(i,0) += static_cast(massone*xacc_kk); + f(i,1) += static_cast(massone*yacc_kk); + f(i,2) += static_cast(massone*zacc_kk); + eg -= static_cast(massone * (xacc_kk*x(i,0) + yacc_kk*x(i,1) + zacc_kk*x(i,2))); } } diff --git a/src/KOKKOS/fix_gravity_kokkos.h b/src/KOKKOS/fix_gravity_kokkos.h index cd82c75b1f7..9c9b732cfc0 100644 --- a/src/KOKKOS/fix_gravity_kokkos.h +++ b/src/KOKKOS/fix_gravity_kokkos.h @@ -52,6 +52,8 @@ class FixGravityKokkos : public FixGravity { typename AT::t_kkfloat_1d_randomread mass; typename AT::t_int_1d type; typename AT::t_int_1d mask; + + KK_FLOAT xacc_kk, yacc_kk, zacc_kk; }; } // namespace LAMMPS_NS From df9b165567479cc6f3d5a8a33a16b3249fcf3eda Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:34:38 -0700 Subject: [PATCH 106/604] Removed silent conversions from the most expensive kernels in pair_reaxff_kokkos.*; some other kernels are numerically unstable in reduced precision or are sufficiently inexpensive. --- src/KOKKOS/pair_reaxff_kokkos.cpp | 555 +++++++++++++++--------------- src/KOKKOS/pair_reaxff_kokkos.h | 1 + 2 files changed, 279 insertions(+), 277 deletions(-) diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 58f18bd0079..1bde2933db2 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -238,36 +238,36 @@ void PairReaxFFKokkos::setup() if (map[i] == -1) continue; // general - k_params_sing.view_host()(i).mass = api->system->reax_param.sbp[map[i]].mass; + k_params_sing.view_host()(i).mass = static_cast(api->system->reax_param.sbp[map[i]].mass); // polarization - k_params_sing.view_host()(i).chi = api->system->reax_param.sbp[map[i]].chi; - k_params_sing.view_host()(i).eta = api->system->reax_param.sbp[map[i]].eta; + k_params_sing.view_host()(i).chi = static_cast(api->system->reax_param.sbp[map[i]].chi); + k_params_sing.view_host()(i).eta = static_cast(api->system->reax_param.sbp[map[i]].eta); // bond order - k_params_sing.view_host()(i).r_s = api->system->reax_param.sbp[map[i]].r_s; - k_params_sing.view_host()(i).r_pi = api->system->reax_param.sbp[map[i]].r_pi; - k_params_sing.view_host()(i).r_pi2 = api->system->reax_param.sbp[map[i]].r_pi_pi; - k_params_sing.view_host()(i).valency = api->system->reax_param.sbp[map[i]].valency; - k_params_sing.view_host()(i).valency_val = api->system->reax_param.sbp[map[i]].valency_val; - k_params_sing.view_host()(i).valency_boc = api->system->reax_param.sbp[map[i]].valency_boc; - k_params_sing.view_host()(i).valency_e = api->system->reax_param.sbp[map[i]].valency_e; - k_params_sing.view_host()(i).nlp_opt = api->system->reax_param.sbp[map[i]].nlp_opt; + k_params_sing.view_host()(i).r_s = static_cast(api->system->reax_param.sbp[map[i]].r_s); + k_params_sing.view_host()(i).r_pi = static_cast(api->system->reax_param.sbp[map[i]].r_pi); + k_params_sing.view_host()(i).r_pi2 = static_cast(api->system->reax_param.sbp[map[i]].r_pi_pi); + k_params_sing.view_host()(i).valency = static_cast(api->system->reax_param.sbp[map[i]].valency); + k_params_sing.view_host()(i).valency_val = static_cast(api->system->reax_param.sbp[map[i]].valency_val); + k_params_sing.view_host()(i).valency_boc = static_cast(api->system->reax_param.sbp[map[i]].valency_boc); + k_params_sing.view_host()(i).valency_e = static_cast(api->system->reax_param.sbp[map[i]].valency_e); + k_params_sing.view_host()(i).nlp_opt = static_cast(api->system->reax_param.sbp[map[i]].nlp_opt); // multibody - k_params_sing.view_host()(i).p_lp2 = api->system->reax_param.sbp[map[i]].p_lp2; - k_params_sing.view_host()(i).p_ovun2 = api->system->reax_param.sbp[map[i]].p_ovun2; - k_params_sing.view_host()(i).p_ovun5 = api->system->reax_param.sbp[map[i]].p_ovun5; + k_params_sing.view_host()(i).p_lp2 = static_cast(api->system->reax_param.sbp[map[i]].p_lp2); + k_params_sing.view_host()(i).p_ovun2 = static_cast(api->system->reax_param.sbp[map[i]].p_ovun2); + k_params_sing.view_host()(i).p_ovun5 = static_cast(api->system->reax_param.sbp[map[i]].p_ovun5); // angular - k_params_sing.view_host()(i).p_val3 = api->system->reax_param.sbp[map[i]].p_val3; - k_params_sing.view_host()(i).p_val5 = api->system->reax_param.sbp[map[i]].p_val5; + k_params_sing.view_host()(i).p_val3 = static_cast(api->system->reax_param.sbp[map[i]].p_val3); + k_params_sing.view_host()(i).p_val5 = static_cast(api->system->reax_param.sbp[map[i]].p_val5); // hydrogen bond - k_params_sing.view_host()(i).p_hbond = api->system->reax_param.sbp[map[i]].p_hbond; + k_params_sing.view_host()(i).p_hbond = static_cast(api->system->reax_param.sbp[map[i]].p_hbond); // acks2 - k_params_sing.view_host()(i).bcut_acks2 = api->system->reax_param.sbp[map[i]].bcut_acks2; + k_params_sing.view_host()(i).bcut_acks2 = static_cast(api->system->reax_param.sbp[map[i]].bcut_acks2); for (j = 1; j <= n; j++) { if (map[j] == -1) continue; @@ -275,42 +275,42 @@ void PairReaxFFKokkos::setup() twbp = &(api->system->reax_param.tbp[map[i]][map[j]]); // vdW - k_params_twbp.view_host()(i,j).gamma = twbp->gamma; - k_params_twbp.view_host()(i,j).gamma_w = twbp->gamma_w; - k_params_twbp.view_host()(i,j).alpha = twbp->alpha; - k_params_twbp.view_host()(i,j).r_vdw = twbp->r_vdW; - k_params_twbp.view_host()(i,j).epsilon = twbp->D; - k_params_twbp.view_host()(i,j).acore = twbp->acore; - k_params_twbp.view_host()(i,j).ecore = twbp->ecore; - k_params_twbp.view_host()(i,j).rcore = twbp->rcore; - k_params_twbp.view_host()(i,j).lgre = twbp->lgre; - k_params_twbp.view_host()(i,j).lgcij = twbp->lgcij; + k_params_twbp.view_host()(i,j).gamma = static_cast(twbp->gamma); + k_params_twbp.view_host()(i,j).gamma_w = static_cast(twbp->gamma_w); + k_params_twbp.view_host()(i,j).alpha = static_cast(twbp->alpha); + k_params_twbp.view_host()(i,j).r_vdw = static_cast(twbp->r_vdW); + k_params_twbp.view_host()(i,j).epsilon = static_cast(twbp->D); + k_params_twbp.view_host()(i,j).acore = static_cast(twbp->acore); + k_params_twbp.view_host()(i,j).ecore = static_cast(twbp->ecore); + k_params_twbp.view_host()(i,j).rcore = static_cast(twbp->rcore); + k_params_twbp.view_host()(i,j).lgre = static_cast(twbp->lgre); + k_params_twbp.view_host()(i,j).lgcij = static_cast(twbp->lgcij); // bond order - k_params_twbp.view_host()(i,j).r_s = twbp->r_s; - k_params_twbp.view_host()(i,j).r_pi = twbp->r_p; - k_params_twbp.view_host()(i,j).r_pi2 = twbp->r_pp; - k_params_twbp.view_host()(i,j).p_bo1 = twbp->p_bo1; - k_params_twbp.view_host()(i,j).p_bo2 = twbp->p_bo2; - k_params_twbp.view_host()(i,j).p_bo3 = twbp->p_bo3; - k_params_twbp.view_host()(i,j).p_bo4 = twbp->p_bo4; - k_params_twbp.view_host()(i,j).p_bo5 = twbp->p_bo5; - k_params_twbp.view_host()(i,j).p_bo6 = twbp->p_bo6; - k_params_twbp.view_host()(i,j).p_boc3 = twbp->p_boc3; - k_params_twbp.view_host()(i,j).p_boc4 = twbp->p_boc4; - k_params_twbp.view_host()(i,j).p_boc5 = twbp->p_boc5; - k_params_twbp.view_host()(i,j).ovc = twbp->ovc; - k_params_twbp.view_host()(i,j).v13cor = twbp->v13cor; + k_params_twbp.view_host()(i,j).r_s = static_cast(twbp->r_s); + k_params_twbp.view_host()(i,j).r_pi = static_cast(twbp->r_p); + k_params_twbp.view_host()(i,j).r_pi2 = static_cast(twbp->r_pp); + k_params_twbp.view_host()(i,j).p_bo1 = static_cast(twbp->p_bo1); + k_params_twbp.view_host()(i,j).p_bo2 = static_cast(twbp->p_bo2); + k_params_twbp.view_host()(i,j).p_bo3 = static_cast(twbp->p_bo3); + k_params_twbp.view_host()(i,j).p_bo4 = static_cast(twbp->p_bo4); + k_params_twbp.view_host()(i,j).p_bo5 = static_cast(twbp->p_bo5); + k_params_twbp.view_host()(i,j).p_bo6 = static_cast(twbp->p_bo6); + k_params_twbp.view_host()(i,j).p_boc3 = static_cast(twbp->p_boc3); + k_params_twbp.view_host()(i,j).p_boc4 = static_cast(twbp->p_boc4); + k_params_twbp.view_host()(i,j).p_boc5 = static_cast(twbp->p_boc5); + k_params_twbp.view_host()(i,j).ovc = static_cast(twbp->ovc); + k_params_twbp.view_host()(i,j).v13cor = static_cast(twbp->v13cor); // bond energy - k_params_twbp.view_host()(i,j).p_be1 = twbp->p_be1; - k_params_twbp.view_host()(i,j).p_be2 = twbp->p_be2; - k_params_twbp.view_host()(i,j).De_s = twbp->De_s; - k_params_twbp.view_host()(i,j).De_p = twbp->De_p; - k_params_twbp.view_host()(i,j).De_pp = twbp->De_pp; + k_params_twbp.view_host()(i,j).p_be1 = static_cast(twbp->p_be1); + k_params_twbp.view_host()(i,j).p_be2 = static_cast(twbp->p_be2); + k_params_twbp.view_host()(i,j).De_s = static_cast(twbp->De_s); + k_params_twbp.view_host()(i,j).De_p = static_cast(twbp->De_p); + k_params_twbp.view_host()(i,j).De_pp = static_cast(twbp->De_pp); // multibody - k_params_twbp.view_host()(i,j).p_ovun1 = twbp->p_ovun1; + k_params_twbp.view_host()(i,j).p_ovun1 = static_cast(twbp->p_ovun1); for (k = 1; k <= n; k++) { if (map[k] == -1) continue; @@ -318,21 +318,21 @@ void PairReaxFFKokkos::setup() // Angular thbh = &(api->system->reax_param.thbp[map[i]][map[j]][map[k]]); thbp = &(thbh->prm[0]); - k_params_thbp.view_host()(i,j,k).cnt = thbh->cnt; - k_params_thbp.view_host()(i,j,k).theta_00 = thbp->theta_00; - k_params_thbp.view_host()(i,j,k).p_val1 = thbp->p_val1; - k_params_thbp.view_host()(i,j,k).p_val2 = thbp->p_val2; - k_params_thbp.view_host()(i,j,k).p_val4 = thbp->p_val4; - k_params_thbp.view_host()(i,j,k).p_val7 = thbp->p_val7; - k_params_thbp.view_host()(i,j,k).p_pen1 = thbp->p_pen1; - k_params_thbp.view_host()(i,j,k).p_coa1 = thbp->p_coa1; + k_params_thbp.view_host()(i,j,k).cnt = static_cast(thbh->cnt); + k_params_thbp.view_host()(i,j,k).theta_00 = static_cast(thbp->theta_00); + k_params_thbp.view_host()(i,j,k).p_val1 = static_cast(thbp->p_val1); + k_params_thbp.view_host()(i,j,k).p_val2 = static_cast(thbp->p_val2); + k_params_thbp.view_host()(i,j,k).p_val4 = static_cast(thbp->p_val4); + k_params_thbp.view_host()(i,j,k).p_val7 = static_cast(thbp->p_val7); + k_params_thbp.view_host()(i,j,k).p_pen1 = static_cast(thbp->p_pen1); + k_params_thbp.view_host()(i,j,k).p_coa1 = static_cast(thbp->p_coa1); // Hydrogen Bond hbp = &(api->system->reax_param.hbp[map[i]][map[j]][map[k]]); - k_params_hbp.view_host()(i,j,k).p_hb1 = hbp->p_hb1; - k_params_hbp.view_host()(i,j,k).p_hb2 = hbp->p_hb2; - k_params_hbp.view_host()(i,j,k).p_hb3 = hbp->p_hb3; - k_params_hbp.view_host()(i,j,k).r0_hb = hbp->r0_hb; + k_params_hbp.view_host()(i,j,k).p_hb1 = static_cast(hbp->p_hb1); + k_params_hbp.view_host()(i,j,k).p_hb2 = static_cast(hbp->p_hb2); + k_params_hbp.view_host()(i,j,k).p_hb3 = static_cast(hbp->p_hb3); + k_params_hbp.view_host()(i,j,k).r0_hb = static_cast(hbp->r0_hb); for (m = 1; m <= n; m++) { if (map[m] == -1) continue; @@ -340,11 +340,11 @@ void PairReaxFFKokkos::setup() // Torsion fbh = &(api->system->reax_param.fbp[map[i]][map[j]][map[k]][map[m]]); fbp = &(fbh->prm[0]); - k_params_fbp.view_host()(i,j,k,m).p_tor1 = fbp->p_tor1; - k_params_fbp.view_host()(i,j,k,m).p_cot1 = fbp->p_cot1; - k_params_fbp.view_host()(i,j,k,m).V1 = fbp->V1; - k_params_fbp.view_host()(i,j,k,m).V2 = fbp->V2; - k_params_fbp.view_host()(i,j,k,m).V3 = fbp->V3; + k_params_fbp.view_host()(i,j,k,m).p_tor1 = static_cast(fbp->p_tor1); + k_params_fbp.view_host()(i,j,k,m).p_cot1 = static_cast(fbp->p_cot1); + k_params_fbp.view_host()(i,j,k,m).V1 = static_cast(fbp->V1); + k_params_fbp.view_host()(i,j,k,m).V2 = static_cast(fbp->V2); + k_params_fbp.view_host()(i,j,k,m).V3 = static_cast(fbp->V3); } } } @@ -356,14 +356,17 @@ void PairReaxFFKokkos::setup() k_params_hbp.modify_host(); // cutoffs - cut_nbsq = api->control->nonb_cut * api->control->nonb_cut; - cut_hbsq = api->control->hbond_cut * api->control->hbond_cut; - cut_bosq = api->control->bond_cut * api->control->bond_cut; + cut_nbsq = static_cast(api->control->nonb_cut * api->control->nonb_cut); + cut_hbsq = static_cast(api->control->hbond_cut * api->control->hbond_cut); + cut_bosq = static_cast(api->control->bond_cut * api->control->bond_cut); // bond order cutoffs - bo_cut = 0.01 * gp[29]; - thb_cut = api->control->thb_cut; - thb_cutsq = 0.000010; //thb_cut*thb_cut; + bo_cut = static_cast(0.01 * gp[29]); + thb_cut = static_cast(api->control->thb_cut); + thb_cutsq = static_cast(0.000010); //thb_cut*thb_cut; + + // misc precision conversions + C_ele_reduced = static_cast(C_ele); if (atom->nmax > nmax) { nmax = atom->nmax; @@ -399,15 +402,15 @@ void PairReaxFFKokkos::init_md() swb2 = swb * swb; swb3 = swb * swb2; - k_tap.view_host()(7) = 20.0/d7; - k_tap.view_host()(6) = -70.0 * (swa + swb) / d7; - k_tap.view_host()(5) = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7; - k_tap.view_host()(4) = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7; - k_tap.view_host()(3) = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7; - k_tap.view_host()(2) =-210.0 * (swa3*swb2 + swa2*swb3) / d7; - k_tap.view_host()(1) = 140.0 * swa3 * swb3 / d7; - k_tap.view_host()(0) = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 - - 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7; + k_tap.view_host()(7) = static_cast(20.0/d7); + k_tap.view_host()(6) = static_cast(-70.0 * (swa + swb) / d7); + k_tap.view_host()(5) = static_cast(84.0 * (swa2 + 3.0*swa*swb + swb2) / d7); + k_tap.view_host()(4) = static_cast(-35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7); + k_tap.view_host()(3) = static_cast(140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7); + k_tap.view_host()(2) = static_cast(-210.0 * (swa3*swb2 + swa2*swb3) / d7); + k_tap.view_host()(1) = static_cast(140.0 * swa3 * swb3 / d7); + k_tap.view_host()(0) = static_cast((-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 - + 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7); k_tap.modify_host(); k_tap.template sync(); @@ -762,7 +765,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) if (eflag_global) { for (int i = 0; i < 14; i++) - pvector[i] = 0.0; + pvector[i] = 0; } EV_FLOAT_REAX ev; @@ -781,7 +784,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); } ev_all += ev; - pvector[13] = ev.ecoul; + pvector[13] = static_cast(ev.ecoul); // LJ + Coulomb if (api->control->tabulate) { @@ -810,8 +813,8 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) } } ev_all += ev; - pvector[10] = ev.evdwl; - pvector[11] = ev.ecoul; + pvector[10] = static_cast(ev.evdwl); + pvector[11] = static_cast(ev.ecoul); if (atom->nmax > nmax) { @@ -940,9 +943,9 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); ev_all += ev; } - pvector[2] = ev.ereax[0]; - pvector[1] = ev.ereax[1]+ev.ereax[2]; - pvector[3] = 0.0; + pvector[2] = static_cast(ev.ereax[0]); + pvector[1] = static_cast(ev.ereax[1]+ev.ereax[2]); + pvector[3] = 0; ev_all.evdwl += ev.ereax[0] + ev.ereax[1] + ev.ereax[2]; int count_angular = 0; @@ -990,9 +993,9 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy>(0,count_angular),*this); ev_all += ev; } - pvector[4] = ev.ereax[3]; - pvector[5] = ev.ereax[4]; - pvector[6] = ev.ereax[5]; + pvector[4] = static_cast(ev.ereax[3]); + pvector[5] = static_cast(ev.ereax[4]); + pvector[6] = static_cast(ev.ereax[5]); ev_all.evdwl += ev.ereax[3] + ev.ereax[4] + ev.ereax[5]; // Torsion @@ -1009,8 +1012,8 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy>(0,count_torsion),*this); ev_all += ev; } - pvector[8] = ev.ereax[6]; - pvector[9] = ev.ereax[7]; + pvector[8] = static_cast(ev.ereax[6]); + pvector[9] = static_cast(ev.ereax[7]); ev_all.evdwl += ev.ereax[6] + ev.ereax[7]; // Hydrogen Bond @@ -1029,7 +1032,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) ev_all += ev; } } - pvector[7] = ev.ereax[8]; + pvector[7] = static_cast(ev.ereax[8]); ev_all.evdwl += ev.ereax[8]; // reduction over duplicated memory @@ -1047,7 +1050,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) else Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); ev_all += ev; - pvector[0] += ev.evdwl; + pvector[0] += static_cast(ev.evdwl); } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); @@ -1056,7 +1059,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) else Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); ev_all += ev; - pvector[0] += ev.evdwl; + pvector[0] += static_cast(ev.evdwl); } // reduction over duplicated memory @@ -1064,17 +1067,17 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::Experimental::contribute(f, dup_f); if (eflag_global) { - eng_vdwl += ev_all.evdwl; - eng_coul += ev_all.ecoul; + eng_vdwl += static_cast(ev_all.evdwl); + eng_coul += static_cast(ev_all.ecoul); } if (vflag_global) { - virial[0] += ev_all.v[0]; - virial[1] += ev_all.v[1]; - virial[2] += ev_all.v[2]; - virial[3] += ev_all.v[3]; - virial[4] += ev_all.v[4]; - virial[5] += ev_all.v[5]; + virial[0] += static_cast(ev_all.v[0]); + virial[1] += static_cast(ev_all.v[1]); + virial[2] += static_cast(ev_all.v[2]); + virial[3] += static_cast(ev_all.v[3]); + virial[4] += static_cast(ev_all.v[4]); + virial[5] += static_cast(ev_all.v[5]); } if (vflag_fdotr) pair_virial_fdotr_compute(this); @@ -1131,13 +1134,13 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputePolar const KK_FLOAT chi = paramssing(itype).chi; const KK_FLOAT eta = paramssing(itype).eta; - KK_FLOAT epol = KCALpMOL_to_EV*(chi*qi+(eta/2.0)*qi*qi); + KK_FLOAT epol = static_cast(KCALpMOL_to_EV)*(chi*qi+(static_cast(0.5)*eta)*qi*qi); /* energy due to coupling with kinetic energy potential */ if (acks2_flag) - epol += KCALpMOL_to_EV*qi*d_s[NN + i]; + epol += static_cast(KCALpMOL_to_EV)*qi*d_s[NN + i]; - if (eflag_global) ev.ecoul += epol; + if (eflag_global) ev.ecoul += static_cast(epol); //if (eflag_atom) this->template ev_tally(ev,i,i,epol,0.0,0.0,0.0,0.0); if (eflag_atom) this->template e_tally_single(ev,i,epol); } @@ -1176,7 +1179,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb::operator()(TagPairReaxComputeLJCoulomb(2.0)); powr_vdw = tmp_var*rij*rij; - powgi_vdw = pow(1.0/gamma_w,gp[28]); - dfn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]-1.0); + powgi_vdw = pow(static_cast(1.0)/gamma_w,gp[28]); + dfn13 = pow(powr_vdw+powgi_vdw,static_cast(1.0)/gp[28]-static_cast(1.0)); fn13 = dfn13*(powr_vdw+powgi_vdw); dfn13 = dfn13*tmp_var; - exp2 = exp(0.5*alpha*(1.0-fn13/r_vdw)); + exp2 = exp(static_cast(0.5)*alpha*(static_cast(1.0)-fn13/r_vdw)); exp1 = exp2*exp2; - etmp = epsilon*(exp1-2.0*exp2); + etmp = epsilon*(exp1-static_cast(2.0)*exp2); evdwl = Tap*etmp; fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*dfn13; } else { - exp2 = exp(0.5*alpha*(1.0-rij/r_vdw)); + exp2 = exp(static_cast(0.5)*alpha*(static_cast(1.0)-rij/r_vdw)); exp1 = exp2*exp2; - etmp = epsilon*(exp1-2.0*exp2); + etmp = epsilon*(exp1-static_cast(2.0)*exp2); evdwl = Tap*etmp; fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*rij; } @@ -1254,7 +1257,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb(1.0)-(rij/rcore))); const KK_FLOAT de_core = -(acore/rcore)*e_core; evdwl += Tap*e_core; fvdwl += dTap*e_core+Tap*de_core/rij; @@ -1266,7 +1269,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb(6.0)*elg*rij5/(rij6+re6); evdwl += Tap*elg; fvdwl += dTap*elg+Tap*delg/rij; } @@ -1276,8 +1279,8 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb::operator()(TagPairReaxComputeLJCoulomb(0.5) * (paramssing(itype).bcut_acks2 + paramssing(jtype).bcut_acks2); if (rij <= xcut) { const KK_FLOAT d = rij / xcut; - const KK_FLOAT bond_softness = gp[34] * pow( d, 3.0 ) - * pow( 1.0 - d, 6.0 ); + const KK_FLOAT bond_softness = gp[34] * pow( d, static_cast(3.0) ) + * pow( static_cast(1.0) - d, static_cast(6.0) ); - if (bond_softness > 0.0) { + if (bond_softness > static_cast(0.0)) { /* Coulombic energy contribution */ const KK_FLOAT effpot_diff = d_s[NN + i] - d_s[NN + j]; - const KK_FLOAT e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + const KK_FLOAT e_ele = -static_cast(0.5) * static_cast(KCALpMOL_to_EV) * bond_softness * SQR( effpot_diff ); ecoul += e_ele; @@ -1305,11 +1308,11 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb(3.0) / xcut * pow( d, static_cast(2.0) ) + * pow( static_cast(1.0) - d, static_cast(5.0) ) * (static_cast(1.0) - static_cast(3.0) * d); + d_bond_softness = -static_cast(0.5) * d_bond_softness * SQR( effpot_diff ); - d_bond_softness = KCALpMOL_to_EV * d_bond_softness + d_bond_softness = static_cast(KCALpMOL_to_EV) * d_bond_softness / rij; fcoul += d_bond_softness; @@ -1318,16 +1321,16 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeLJCoulomb(delx*ftotal); + a_f(j,0) -= static_cast(delx*ftotal); + fytmp += static_cast(dely*ftotal); + a_f(j,1) -= static_cast(dely*ftotal); + fztmp += static_cast(delz*ftotal); + a_f(j,2) -= static_cast(delz*ftotal); if (EVFLAG) { - if (eflag_global) ev.evdwl += evdwl; - if (eflag_global) ev.ecoul += ecoul; + if (eflag_global) ev.evdwl += static_cast(evdwl); + if (eflag_global) ev.ecoul += static_cast(ecoul); if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz); } @@ -1406,25 +1409,27 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo /* Cubic Spline Interpolation */ int r = (int)(rij * t.inv_dx); if (r == 0) ++r; - const KK_FLOAT base = (KK_FLOAT)(r+1) * t.dx; - const KK_FLOAT dif = rij - base; + const KK_FLOAT base = static_cast(r+1) * t.dx; + + // This is a double to match the types of cubic_spline_coef members + const double dif = static_cast(rij - base); const cubic_spline_coef vdW = t.d_vdW[r]; const cubic_spline_coef ele = t.d_ele[r]; const cubic_spline_coef CEvd = t.d_CEvd[r]; const cubic_spline_coef CEclmb = t.d_CEclmb[r]; - const KK_FLOAT evdwl = ((vdW.d*dif + vdW.c)*dif + vdW.b)*dif + - vdW.a; + const KK_FLOAT evdwl = static_cast(((vdW.d*dif + vdW.c)*dif + vdW.b)*dif + + vdW.a); - KK_FLOAT ecoul = (((ele.d*dif + ele.c)*dif + ele.b)*dif + - ele.a)*qi*qj; + KK_FLOAT ecoul = static_cast((((ele.d*dif + ele.c)*dif + ele.b)*dif + + ele.a)*static_cast(qi*qj)); - const KK_FLOAT fvdwl = ((CEvd.d*dif + CEvd.c)*dif + CEvd.b)*dif + - CEvd.a; + const KK_FLOAT fvdwl = static_cast(((CEvd.d*dif + CEvd.c)*dif + CEvd.b)*dif + + CEvd.a); - KK_FLOAT fcoul = (((CEclmb.d*dif+CEclmb.c)*dif+CEclmb.b)*dif + - CEclmb.a)*qi*qj; + KK_FLOAT fcoul = static_cast((((CEclmb.d*dif+CEclmb.c)*dif+CEclmb.b)*dif + + CEclmb.a)*static_cast(qi*qj)); /* contribution to energy and gradients (atoms and cell) * due to geometry-dependent terms in the ACKS2 @@ -1432,19 +1437,19 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo if (acks2_flag) { /* kinetic energy terms */ - KK_FLOAT xcut = 0.5 * (paramssing(itype).bcut_acks2 + KK_FLOAT xcut = static_cast(0.5) * (paramssing(itype).bcut_acks2 + paramssing(jtype).bcut_acks2); if (rij <= xcut) { const KK_FLOAT d = rij / xcut; - const KK_FLOAT bond_softness = gp[34] * pow( d, 3.0 ) - * pow( 1.0 - d, 6.0 ); + const KK_FLOAT bond_softness = gp[34] * pow( d, static_cast(3.0) ) + * pow( static_cast(1.0) - d, static_cast(6.0) ); - if (bond_softness > 0.0) { + if (bond_softness > 0) { /* Coulombic energy contribution */ const KK_FLOAT effpot_diff = d_s[NN + i] - d_s[NN + j]; - const KK_FLOAT e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + const KK_FLOAT e_ele = -static_cast(0.5 * KCALpMOL_to_EV) * bond_softness * SQR( effpot_diff ); ecoul += e_ele; @@ -1452,11 +1457,11 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo /* forces contribution */ KK_FLOAT d_bond_softness; d_bond_softness = gp[34] - * 3.0 / xcut * pow( d, 2.0 ) - * pow( 1.0 - d, 5.0 ) * (1.0 - 3.0 * d); - d_bond_softness = -0.5 * d_bond_softness + * static_cast(3.0) / xcut * pow( d, static_cast(2.0) ) + * pow( static_cast(1.0) - d, static_cast(5.0) ) * (static_cast(1.0) - static_cast(3.0) * d); + d_bond_softness = -static_cast(0.5) * d_bond_softness * SQR( effpot_diff ); - d_bond_softness = KCALpMOL_to_EV * d_bond_softness + d_bond_softness = static_cast(KCALpMOL_to_EV) * d_bond_softness / rij; fcoul += d_bond_softness; @@ -1465,16 +1470,16 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo } const KK_FLOAT ftotal = fvdwl + fcoul; - fxtmp += delx*ftotal; - fytmp += dely*ftotal; - fztmp += delz*ftotal; - a_f(j,0) -= delx*ftotal; - a_f(j,1) -= dely*ftotal; - a_f(j,2) -= delz*ftotal; + fxtmp += static_cast(delx*ftotal); + fytmp += static_cast(dely*ftotal); + fztmp += static_cast(delz*ftotal); + a_f(j,0) -= static_cast(delx*ftotal); + a_f(j,1) -= static_cast(dely*ftotal); + a_f(j,2) -= static_cast(delz*ftotal); if (EVFLAG) { - if (eflag_global) ev.evdwl += evdwl; - if (eflag_global) ev.ecoul += ecoul; + if (eflag_global) ev.evdwl += static_cast(evdwl); + if (eflag_global) ev.ecoul += static_cast(ecoul); if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz); } @@ -1571,12 +1576,12 @@ void PairReaxFFKokkos::allocate_array() template KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::operator()(TagPairReaxZero, const int &n) const { - d_total_bo(n) = 0.0; - d_CdDelta(n) = 0.0; - d_bo_num(n) = 0.0; - d_hb_num(n) = 0.0; + d_total_bo(n) = 0; + d_CdDelta(n) = 0; + d_bo_num(n) = 0; + d_hb_num(n) = 0; for (int j = 0; j < 3; j++) - d_dDeltap_self(n,j) = 0.0; + d_dDeltap_self(n,j) = 0; } /* ---------------------------------------------------------------------- */ @@ -1601,12 +1606,12 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< const int jnum = d_numneigh[i]; KK_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3]; - KK_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0}; - KK_FLOAT total_bo_i = 0.0; + KK_FLOAT dDeltap_self_i[3] = {0, 0, 0}; + KK_FLOAT total_bo_i = 0; int ihb = -1; - if (cut_hbsq > 0.0) + if (cut_hbsq > 0) ihb = paramssing(itype).p_hbond; int nnz; @@ -1684,12 +1689,14 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< d_BO_pi(i,j_index) = BO_pi; d_BO_pi2(i,j_index) = BO_pi2; - KK_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij; - KK_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij; - KK_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij; + KK_FLOAT rsq_inv = static_cast(1.0) / rsq; + + KK_FLOAT Cln_BOp_s = p_bo2 * C12 * rsq_inv; + KK_FLOAT Cln_BOp_pi = p_bo4 * C34 * rsq_inv; + KK_FLOAT Cln_BOp_pi2 = p_bo6 * C56 * rsq_inv; if (nlocal == 0) - Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0; + Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0; for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d]; for (int d = 0; d < 3; d++) dDeltap_self_i[d] += dBOp_i[d]; @@ -1738,7 +1745,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP int ihb = -1; - if (cut_hbsq > 0.0) + if (cut_hbsq > 0) ihb = paramssing(itype).p_hbond; int nnz; @@ -1824,7 +1831,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview 0.0) + if (cut_hbsq > 0) ihb = paramssing(itype).p_hbond; for (int jj = 0; jj < jnum; jj++) { @@ -1947,8 +1954,8 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsFull, const i const int itype = type(i); KK_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3]; - KK_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0}; - KK_FLOAT total_bo_i = 0.0; + KK_FLOAT dDeltap_self_i[3] = {0, 0, 0}; + KK_FLOAT total_bo_i = 0; const int jnum = d_bo_num[i]; for (int j_index = 0; j_index < jnum; j_index++) { @@ -1959,7 +1966,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsFull, const i delij[1] = x(j,1) - ytmp; delij[2] = x(j,2) - ztmp; const KK_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; - const KK_FLOAT rsq_inv = 1.0 / rsq; + const KK_FLOAT rsq_inv = static_cast(1.0) / rsq; // bond_list const KK_FLOAT rij = sqrt(rsq); @@ -1985,7 +1992,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsFull, const i KK_FLOAT Cln_BOp_pi2 = p_bo6 * C56 * rsq_inv; if (nlocal == 0) - Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0; + Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0; for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d]; for (int d = 0; d < 3; d++) dDeltap_self_i[d] += dBOp_i[d]; @@ -2020,20 +2027,20 @@ void PairReaxFFKokkos::compute_bo(KK_FLOAT rij, int itype, int jtype const KK_FLOAT r_pi = paramstwbp(itype,jtype).r_pi; const KK_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2; - if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) { - C12 = p_bo1 * ((p_bo2 != 0) ? (pow(rij/r_s,p_bo2)) : 1.0); - BO_s = (1.0+bo_cut)*exp(C12); - } else BO_s = C12 = 0.0; + if (paramssing(itype).r_s > 0 && paramssing(jtype).r_s > 0) { + C12 = p_bo1 * ((p_bo2 != 0) ? (pow(rij/r_s,p_bo2)) : static_cast(1.0)); + BO_s = (static_cast(1.0)+bo_cut)*exp(C12); + } else BO_s = C12 = 0; - if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) { - C34 = p_bo3 * ((p_bo4 != 0) ? (pow(rij/r_pi,p_bo4)) : 1.0); + if (paramssing(itype).r_pi > 0 && paramssing(jtype).r_pi > 0) { + C34 = p_bo3 * ((p_bo4 != 0) ? (pow(rij/r_pi,p_bo4)) : static_cast(1.0)); BO_pi = exp(C34); - } else BO_pi = C34 = 0.0; + } else BO_pi = C34 = 0; - if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) { - C56 = p_bo5 * ((p_bo6 != 0) ? (pow(rij/r_pi2,p_bo6)) : 1.0); + if (paramssing(itype).r_pi2 > 0 && paramssing(jtype).r_pi2 > 0) { + C56 = p_bo5 * ((p_bo6 != 0) ? (pow(rij/r_pi2,p_bo6)) : static_cast(1.0)); BO_pi2 = exp(C56); - } else BO_pi2 = C56 = 0.0; + } else BO_pi2 = C56 = 0; } @@ -2086,20 +2093,20 @@ void PairReaxFFKokkos::operator()(TagPairReaxBondOrder2, const int & const KK_FLOAT p_boc4 = paramstwbp(itype,jtype).p_boc4; const KK_FLOAT p_boc5 = paramstwbp(itype,jtype).p_boc5; - if (ovc < 0.001 && v13cor < 0.001) { - d_C1dbo(i,j_index) = 1.0; - d_C2dbo(i,j_index) = 0.0; - d_C3dbo(i,j_index) = 0.0; - d_C1dbopi(i,j_index) = 1.0; - d_C2dbopi(i,j_index) = 0.0; - d_C3dbopi(i,j_index) = 0.0; - d_C4dbopi(i,j_index) = 0.0; - d_C1dbopi2(i,j_index) = 1.0; - d_C2dbopi2(i,j_index) = 0.0; - d_C3dbopi2(i,j_index) = 0.0; - d_C4dbopi2(i,j_index) = 0.0; + if (ovc < static_cast(0.001) && v13cor < static_cast(0.001)) { + d_C1dbo(i,j_index) = static_cast(1.0); + d_C2dbo(i,j_index) = 0; + d_C3dbo(i,j_index) = 0; + d_C1dbopi(i,j_index) = static_cast(1.0); + d_C2dbopi(i,j_index) = 0; + d_C3dbopi(i,j_index) = 0; + d_C4dbopi(i,j_index) = 0; + d_C1dbopi2(i,j_index) = static_cast(1.0); + d_C2dbopi2(i,j_index) = 0; + d_C3dbopi2(i,j_index) = 0; + d_C4dbopi2(i,j_index) = 0; } else { - if (ovc >= 0.001) { + if (ovc >= static_cast(0.001)) { exp_p1i = exp((double)(-p_boc1 * d_Deltap[i])); exp_p2i = exp((double)(-p_boc2 * d_Deltap[i])); exp_p1j = exp((double)(-p_boc1 * d_Deltap[j])); @@ -2209,8 +2216,8 @@ void PairReaxFFKokkos::operator()(TagPairReaxBondOrder3, const int & d_Delta_lp_temp[i] = paramssing(itype).nlp_opt - nlp_temp; } - d_sum_ovun(i,1) = 0.0; - d_sum_ovun(i,2) = 0.0; + d_sum_ovun(i,1) = 0; + d_sum_ovun(i,2) = 0; } /* ---------------------------------------------------------------------- */ @@ -3772,7 +3779,7 @@ void PairReaxFFKokkos::ev_tally(EV_FLOAT_REAX &ev, const int &i, con auto a_vatom = v_vatom.template access>(); if (eflag_atom) { - const KK_FLOAT epairhalf = 0.5 * epair; + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * epair); a_eatom[i] += epairhalf; a_eatom[j] += epairhalf; } @@ -3786,27 +3793,27 @@ void PairReaxFFKokkos::ev_tally(EV_FLOAT_REAX &ev, const int &i, con const KK_FLOAT v5 = dely*delz*fpair; if (vflag_global) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; + ev.v[0] += static_cast(v0); + ev.v[1] += static_cast(v1); + ev.v[2] += static_cast(v2); + ev.v[3] += static_cast(v3); + ev.v[4] += static_cast(v4); + ev.v[5] += static_cast(v5); } if (vflag_atom) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + a_vatom(i,0) += static_cast(static_cast(0.5)*v0); + a_vatom(i,1) += static_cast(static_cast(0.5)*v1); + a_vatom(i,2) += static_cast(static_cast(0.5)*v2); + a_vatom(i,3) += static_cast(static_cast(0.5)*v3); + a_vatom(i,4) += static_cast(static_cast(0.5)*v4); + a_vatom(i,5) += static_cast(static_cast(0.5)*v5); + a_vatom(j,0) += static_cast(static_cast(0.5)*v0); + a_vatom(j,1) += static_cast(static_cast(0.5)*v1); + a_vatom(j,2) += static_cast(static_cast(0.5)*v2); + a_vatom(j,3) += static_cast(static_cast(0.5)*v3); + a_vatom(j,4) += static_cast(static_cast(0.5)*v4); + a_vatom(j,5) += static_cast(static_cast(0.5)*v5); } } } @@ -3824,7 +3831,7 @@ void PairReaxFFKokkos::e_tally(EV_FLOAT_REAX & /*ev*/, const int &i, auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); - const KK_FLOAT epairhalf = 0.5 * epair; + const KK_ACC_FLOAT epairhalf = static_cast(static_cast(0.5) * epair); a_eatom[i] += epairhalf; a_eatom[j] += epairhalf; } @@ -3842,7 +3849,7 @@ void PairReaxFFKokkos::e_tally_single(EV_FLOAT_REAX & /*ev*/, const auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); auto a_eatom = v_eatom.template access>(); - a_eatom[i] += epair; + a_eatom[i] += static_cast(epair); } /* ---------------------------------------------------------------------- */ @@ -3853,14 +3860,16 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::v_tally(EV_FLOAT_REAX &ev, const int &i, KK_ACC_FLOAT *fi, KK_FLOAT *drij) const { - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; + + const KK_FLOAT half = static_cast(0.5); - v[0] = 0.5*drij[0]*fi[0]; - v[1] = 0.5*drij[1]*fi[1]; - v[2] = 0.5*drij[2]*fi[2]; - v[3] = 0.5*drij[0]*fi[1]; - v[4] = 0.5*drij[0]*fi[2]; - v[5] = 0.5*drij[1]*fi[2]; + v[0] = static_cast(half * drij[0])*fi[0]; + v[1] = static_cast(half * drij[1])*fi[1]; + v[2] = static_cast(half * drij[2])*fi[2]; + v[3] = static_cast(half * drij[0])*fi[1]; + v[4] = static_cast(half * drij[0])*fi[2]; + v[5] = static_cast(half * drij[1])*fi[2]; if (vflag_global) { ev.v[0] += v[0]; @@ -3892,14 +3901,14 @@ void PairReaxFFKokkos::v_tally3(EV_FLOAT_REAX &ev, const int &i, con auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; - v[0] = drij[0]*fj[0] + drik[0]*fk[0]; - v[1] = drij[1]*fj[1] + drik[1]*fk[1]; - v[2] = drij[2]*fj[2] + drik[2]*fk[2]; - v[3] = drij[0]*fj[1] + drik[0]*fk[1]; - v[4] = drij[0]*fj[2] + drik[0]*fk[2]; - v[5] = drij[1]*fj[2] + drik[1]*fk[2]; + v[0] = static_cast(drij[0])*fj[0] + static_cast(drik[0])*fk[0]; + v[1] = static_cast(drij[1])*fj[1] + static_cast(drik[1])*fk[1]; + v[2] = static_cast(drij[2])*fj[2] + static_cast(drik[2])*fk[2]; + v[3] = static_cast(drij[0])*fj[1] + static_cast(drik[0])*fk[1]; + v[4] = static_cast(drij[0])*fj[2] + static_cast(drik[0])*fk[2]; + v[5] = static_cast(drij[1])*fj[2] + static_cast(drik[1])*fk[2]; if (vflag_global) { ev.v[0] += v[0]; @@ -3911,12 +3920,12 @@ void PairReaxFFKokkos::v_tally3(EV_FLOAT_REAX &ev, const int &i, con } if (vflag_atom) { - a_vatom(i,0) += THIRD * v[0]; a_vatom(i,1) += THIRD * v[1]; a_vatom(i,2) += THIRD * v[2]; - a_vatom(i,3) += THIRD * v[3]; a_vatom(i,4) += THIRD * v[4]; a_vatom(i,5) += THIRD * v[5]; - a_vatom(j,0) += THIRD * v[0]; a_vatom(j,1) += THIRD * v[1]; a_vatom(j,2) += THIRD * v[2]; - a_vatom(j,3) += THIRD * v[3]; a_vatom(j,4) += THIRD * v[4]; a_vatom(j,5) += THIRD * v[5]; - a_vatom(k,0) += THIRD * v[0]; a_vatom(k,1) += THIRD * v[1]; a_vatom(k,2) += THIRD * v[2]; - a_vatom(k,3) += THIRD * v[3]; a_vatom(k,4) += THIRD * v[4]; a_vatom(k,5) += THIRD * v[5]; + for (int n = 0; n < 6; n++) + v[n] *= static_cast(THIRD); + + for (int n = 0; n < 6; n++) a_vatom(i,n) += v[n]; + for (int n = 0; n < 6; n++) a_vatom(j,n) += v[n]; + for (int n = 0; n < 6; n++) a_vatom(k,n) += v[n]; } } @@ -3930,14 +3939,14 @@ void PairReaxFFKokkos::v_tally4(EV_FLOAT_REAX &ev, const int &i, con { // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; - v[0] = dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0]; - v[1] = dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1]; - v[2] = dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2]; - v[3] = dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1]; - v[4] = dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2]; - v[5] = dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2]; + v[0] = static_cast(dril[0])*fi[0] + static_cast(drjl[0])*fj[0] + static_cast(drkl[0])*fk[0]; + v[1] = static_cast(dril[1])*fi[1] + static_cast(drjl[1])*fj[1] + static_cast(drkl[1])*fk[1]; + v[2] = static_cast(dril[2])*fi[2] + static_cast(drjl[2])*fj[2] + static_cast(drkl[2])*fk[2]; + v[3] = static_cast(dril[0])*fi[1] + static_cast(drjl[0])*fj[1] + static_cast(drkl[0])*fk[1]; + v[4] = static_cast(dril[0])*fi[2] + static_cast(drjl[0])*fj[2] + static_cast(drkl[0])*fk[2]; + v[5] = static_cast(dril[1])*fi[2] + static_cast(drjl[1])*fj[2] + static_cast(drkl[1])*fk[2]; if (vflag_global) { ev.v[0] += v[0]; @@ -3952,14 +3961,11 @@ void PairReaxFFKokkos::v_tally4(EV_FLOAT_REAX &ev, const int &i, con auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); - a_vatom(i,0) += 0.25 * v[0]; a_vatom(i,1) += 0.25 * v[1]; a_vatom(i,2) += 0.25 * v[2]; - a_vatom(i,3) += 0.25 * v[3]; a_vatom(i,4) += 0.25 * v[4]; a_vatom(i,5) += 0.25 * v[5]; - a_vatom(j,0) += 0.25 * v[0]; a_vatom(j,1) += 0.25 * v[1]; a_vatom(j,2) += 0.25 * v[2]; - a_vatom(j,3) += 0.25 * v[3]; a_vatom(j,4) += 0.25 * v[4]; a_vatom(j,5) += 0.25 * v[5]; - a_vatom(k,0) += 0.25 * v[0]; a_vatom(k,1) += 0.25 * v[1]; a_vatom(k,2) += 0.25 * v[2]; - a_vatom(k,3) += 0.25 * v[3]; a_vatom(k,4) += 0.25 * v[4]; a_vatom(k,5) += 0.25 * v[5]; - a_vatom(l,0) += 0.25 * v[0]; a_vatom(l,1) += 0.25 * v[1]; a_vatom(l,2) += 0.25 * v[2]; - a_vatom(l,3) += 0.25 * v[3]; a_vatom(l,4) += 0.25 * v[4]; a_vatom(l,5) += 0.25 * v[5]; + for (int n = 0; n < 6; n++) v[n] *= static_cast(0.25); + for (int n = 0; n < 6; n++) a_vatom(i,n) += v[n]; + for (int n = 0; n < 6; n++) a_vatom(j,n) += v[n]; + for (int n = 0; n < 6; n++) a_vatom(k,n) += v[n]; + for (int n = 0; n < 6; n++) a_vatom(l,n) += v[n]; } } @@ -3971,27 +3977,22 @@ void PairReaxFFKokkos::v_tally3_atom(EV_FLOAT_REAX &ev, const int &i const int & /*k*/, KK_ACC_FLOAT *fj, KK_ACC_FLOAT *fk, KK_FLOAT *drji, KK_FLOAT *drjk) const { - KK_FLOAT v[6]; + KK_ACC_FLOAT v[6]; - v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]); - v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]); - v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]); - v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]); - v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]); - v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]); + v[0] = static_cast(THIRD) * (static_cast(drji[0])*fj[0] + static_cast(drjk[0])*fk[0]); + v[1] = static_cast(THIRD) * (static_cast(drji[1])*fj[1] + static_cast(drjk[1])*fk[1]); + v[2] = static_cast(THIRD) * (static_cast(drji[2])*fj[2] + static_cast(drjk[2])*fk[2]); + v[3] = static_cast(THIRD) * (static_cast(drji[0])*fj[1] + static_cast(drjk[0])*fk[1]); + v[4] = static_cast(THIRD) * (static_cast(drji[0])*fj[2] + static_cast(drjk[0])*fk[2]); + v[5] = static_cast(THIRD) * (static_cast(drji[1])*fj[2] + static_cast(drjk[1])*fk[2]); if (vflag_global) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; + for (int n = 0; n < 6; n++) + ev.v[n] += v[n]; } if (vflag_atom) { - d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2]; - d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5]; + for (int n = 0; n < 6; n++) d_vatom(i,n) += v[n]; } } @@ -4090,7 +4091,7 @@ void PairReaxFFKokkos::FindBond(int &numbonds, int groupbit) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); - bo_cut_bond = api->control->bg_cut; + bo_cut_bond = static_cast(api->control->bg_cut); atomKK->sync(execution_space,TAG_MASK|MASK_MASK); tag = atomKK->k_tag.view(); diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index afcfc36e97a..05677c7692b 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -478,6 +478,7 @@ class PairReaxFFKokkos : public PairReaxFF { int nlocal,nn,NN,eflag,vflag,acks2_flag; KK_FLOAT cut_nbsq, cut_hbsq, cut_bosq, bo_cut, thb_cut, thb_cutsq; KK_FLOAT bo_cut_bond; + KK_FLOAT C_ele_reduced; int vdwflag, lgflag; KK_FLOAT gp[39], p_boc1, p_boc2; From ba6ef895452f8894be8c3e580888cce05c798615 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:41:58 -0700 Subject: [PATCH 107/604] Removed silent conversions from fix_qeq_reaxff_kokkos.*; improved the numerical stability of the iterative Krylov solve in reduced precision. --- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 145 ++++++++++++++------------- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 3 +- 2 files changed, 79 insertions(+), 69 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 89741e3d3b7..66d5ac40ffc 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -113,13 +113,13 @@ void FixQEqReaxFFKokkos::init() params = k_params.template view(); for (int n = 1; n <= ntypes; n++) { - k_params.view_host()(n).chi = chi[n]; - k_params.view_host()(n).eta = eta[n]; - k_params.view_host()(n).gamma = gamma[n]; + k_params.view_host()(n).chi = static_cast(chi[n]); + k_params.view_host()(n).eta = static_cast(eta[n]); + k_params.view_host()(n).gamma = static_cast(gamma[n]); } k_params.modify_host(); - cutsq = swb * swb; + cutsq = static_cast(swb * swb); init_shielding_k(); init_hist(); @@ -140,7 +140,7 @@ void FixQEqReaxFFKokkos::init_shielding_k() for (i = 1; i <= ntypes; ++i) for (j = 1; j <= ntypes; ++j) - k_shield.view_host()(i,j) = pow(gamma[i] * gamma[j], -1.5); + k_shield.view_host()(i,j) = static_cast(pow(gamma[i] * gamma[j], -1.5)); k_shield.modify_host(); k_shield.template sync(); @@ -149,7 +149,7 @@ void FixQEqReaxFFKokkos::init_shielding_k() d_tap = k_tap.template view(); for (i = 0; i < 8; i ++) - k_tap.view_host()(i) = Tap[i]; + k_tap.view_host()(i) = static_cast(Tap[i]); k_tap.modify_host(); k_tap.template sync(); @@ -385,7 +385,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqZero, const int &ii) const const int itype = type(i); if (mask[i] & groupbit) { - d_Hdia_inv[i] = 1.0 / params(itype).eta; + d_Hdia_inv[i] = static_cast(1.0) / params(itype).eta; d_b_st(i,0) = -params(itype).chi - d_chi_field[i]; d_b_st(i,1) = -1.0; d_st(i,0) = 0.0; @@ -694,7 +694,7 @@ KK_FLOAT FixQEqReaxFFKokkos::calculate_H_k(const KK_FLOAT &r, const denom = r * r * r + shld; denom = cbrt(denom); - return taper * EV_TO_KCAL_PER_MOL / denom; + return taper * static_cast(EV_TO_KCAL_PER_MOL) / denom; } /* ---------------------------------------------------------------------- */ @@ -707,7 +707,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqInitMatvec, const int &ii) const int itype = type(i); if (mask[i] & groupbit) { - d_Hdia_inv[i] = 1.0 / params(itype).eta; + d_Hdia_inv[i] = static_cast(1.0) / params(itype).eta; d_b_st(i,0) = -params(itype).chi - d_chi_field[i]; d_b_st(i,1) = -1.0; d_st(i,0) = 4*(d_s_hist(i,0)+d_s_hist(i,2))-(6*d_s_hist(i,1)+d_s_hist(i,3)); @@ -932,11 +932,11 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half::operator()(TagQEqSparseMatvec2_Full, const if (k < nn) { const int i = d_ilist[k]; if (mask[i] & groupbit) { - KK_double2 doitmp; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, KK_double2& doi) { + KK_FLOAT2 doitmp; + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, KK_FLOAT2& doi) { const int j = d_jlist(jj); const auto d_val_jj = d_val(jj); if (!(converged & 1)) @@ -997,15 +997,17 @@ void FixQEqReaxFFKokkos::operator()(TagQEqNorm1, const int &ii, KK_d if (mask[i] & groupbit) { const auto d_Hdia_inv_i = d_Hdia_inv[i]; if (!(converged & 1)) { - d_r(i,0) = 1.0*d_b_st(i,0) + -1.0*d_o(i,0); - d_d(i,0) = d_r(i,0) * d_Hdia_inv_i; - out.v[0] += d_b_st(i,0) * d_b_st(i,0); + double r = static_cast(d_b_st(i,0)) - static_cast(d_o(i,0)); + d_d(i,0) = static_cast(r * static_cast(d_Hdia_inv_i)); + d_r(i,0) = static_cast(r); + out.v[0] += static_cast(d_b_st(i,0) * d_b_st(i,0)); } if (!(converged & 2)) { - d_r(i,1) = 1.0*d_b_st(i,1) + -1.0*d_o(i,1); - d_d(i,1) = d_r(i,1) * d_Hdia_inv_i; - out.v[1] += d_b_st(i,1) * d_b_st(i,1); + double r = static_cast(d_b_st(i,1)) - static_cast(d_o(i,1)); + d_d(i,1) = static_cast(r * static_cast(d_Hdia_inv_i)); + d_r(i,1) = static_cast(r); + out.v[1] += static_cast(d_b_st(i,1) * d_b_st(i,1)); } } } @@ -1019,9 +1021,9 @@ void FixQEqReaxFFKokkos::operator()(TagQEqDot1, const int &ii, KK_do const int i = d_ilist[ii]; if (mask[i] & groupbit) { if (!(converged & 1)) - out.v[0] += d_r(i,0) * d_d(i,0); + out.v[0] += static_cast(d_r(i,0) * d_d(i,0)); if (!(converged & 2)) - out.v[1] += d_r(i,1) * d_d(i,1); + out.v[1] += static_cast(d_r(i,1) * d_d(i,1)); } } @@ -1034,9 +1036,9 @@ void FixQEqReaxFFKokkos::operator()(TagQEqDot2, const int &ii, KK_do const int i = d_ilist[ii]; if (mask[i] & groupbit) { if (!(converged & 1)) - out.v[0] += d_d(i,0) * d_o(i,0); + out.v[0] += static_cast(d_d(i,0) * d_o(i,0)); if (!(converged & 2)) - out.v[1] += d_d(i,1) * d_o(i,1); + out.v[1] += static_cast(d_d(i,1) * d_o(i,1)); } } @@ -1046,22 +1048,28 @@ template KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqDot3, const int &ii, KK_double2& out) const { + // As much precision as possible needs to be preserved in this function (in practice), + // otherwise the CG solve tends to become unstable in reduced precision or outright fail to converge. const int i = d_ilist[ii]; if (mask[i] & groupbit) { - const auto d_Hdia_inv_i = d_Hdia_inv[i]; + const double d_Hdia_inv_i = static_cast(d_Hdia_inv[i]); if (!(converged & 1)) { - const auto alpha_0 = alpha[0]; - d_st(i,0) += alpha_0 * d_d(i,0); - d_r(i,0) += -alpha_0 * d_o(i,0); - d_p(i,0) = d_r(i,0) * d_Hdia_inv_i; - out.v[0] += d_r(i,0) * d_p(i,0); + const double alpha_0 = alpha[0]; + d_st(i,0) += static_cast(alpha_0 * static_cast(d_d(i,0))); + double r = static_cast(d_r(i, 0)) - alpha_0 * static_cast(d_o(i,0)); + double p = r * static_cast(d_Hdia_inv_i); + out.v[0] += r * p; + d_p(i,0) = static_cast(p); + d_r(i,0) = static_cast(r); } if (!(converged & 2)) { - const auto alpha_1 = alpha[1]; - d_st(i,1) += alpha_1 * d_d(i,1); - d_r(i,1) += -alpha_1 * d_o(i,1); - d_p(i,1) = d_r(i,1) * d_Hdia_inv_i; - out.v[1] += d_r(i,1) * d_p(i,1); + const double alpha_1 = alpha[1]; + d_st(i,1) += static_cast(alpha_1 * static_cast(d_d(i,1))); + double r = static_cast(d_r(i, 1)) - alpha_1 * static_cast(d_o(i,1)); + double p = r * static_cast(d_Hdia_inv_i); + out.v[1] += r * p; + d_p(i,1) = static_cast(p); + d_r(i,1) = static_cast(r); } } } @@ -1075,9 +1083,9 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSum1, const int &ii) const const int i = d_ilist[ii]; if (mask[i] & groupbit) { if (!(converged & 1)) - d_d(i,0) = 1.0 * d_p(i,0) + beta[0] * d_d(i,0); + d_d(i,0) = static_cast(static_cast(d_p(i,0)) + beta[0] * static_cast(d_d(i,0))); if (!(converged & 2)) - d_d(i,1) = 1.0 * d_p(i,1) + beta[1] * d_d(i,1); + d_d(i,1) = static_cast(static_cast(d_p(i,1)) + beta[1] * static_cast(d_d(i,1))); } } @@ -1089,8 +1097,8 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSum2, const int &ii, KK_do { const int i = d_ilist[ii]; if (mask[i] & groupbit) { - out.v[0] += d_st(i,0); - out.v[1] += d_st(i,1); + out.v[0] += static_cast(d_st(i,0)); + out.v[1] += static_cast(d_st(i,1)); } } @@ -1102,7 +1110,8 @@ void FixQEqReaxFFKokkos::operator()(TagQEqCalculateQ, const int &ii) { const int i = d_ilist[ii]; if (mask[i] & groupbit) { - q(i) = d_st(i,0) - delta * d_st(i,1); + // Preserve bits in the subtraction to avoid precision loss + q(i) = static_cast(static_cast(d_st(i,0)) - delta * static_cast(d_st(i,1))); for (int k = nprev-1; k > 0; --k) { d_s_hist(i,k) = d_s_hist(i,k-1); @@ -1136,14 +1145,14 @@ void FixQEqReaxFFKokkos::operator()(TagQEqPackForwardComm, const int if (pack_flag == 1) { if (!(converged & 1)) - d_buf[i*2] = d_d(j,0); + d_buf[i*2] = static_cast(d_d(j,0)); if (!(converged & 2)) - d_buf[i*2+1] = d_d(j,1); + d_buf[i*2+1] = static_cast(d_d(j,1)); } else if (pack_flag == 2) { - d_buf[i*2] = d_st(j,0); - d_buf[i*2+1] = d_st(j,1); + d_buf[i*2] = static_cast(d_st(j,0)); + d_buf[i*2+1] = static_cast(d_st(j,1)); } else if (pack_flag == 3) - d_buf[i] = q[j]; + d_buf[i] = static_cast(q[j]); } /* ---------------------------------------------------------------------- */ @@ -1164,14 +1173,14 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqUnpackForwardComm, const int &i) const { if (pack_flag == 1) { if (!(converged & 1)) - d_d(i+first,0) = d_buf[i*2]; + d_d(i+first,0) = static_cast(d_buf[i*2]); if (!(converged & 2)) - d_d(i+first,1) = d_buf[i*2+1]; + d_d(i+first,1) = static_cast(d_buf[i*2+1]); } else if (pack_flag == 2) { - d_st(i+first,0) = d_buf[i*2]; - d_st(i+first,1) = d_buf[i*2+1]; + d_st(i+first,0) = static_cast(d_buf[i*2]); + d_st(i+first,1) = static_cast(d_buf[i*2+1]); } else if (pack_flag == 3) - q[i + first] = d_buf[i]; + q[i + first] = static_cast(d_buf[i]); } /* ---------------------------------------------------------------------- */ @@ -1187,15 +1196,15 @@ int FixQEqReaxFFKokkos::pack_forward_comm(int n, int *list, double * k_d.sync_host(); for (m = 0; m < n; m++) { if (!(converged & 1)) - buf[m*2] = h_d(list[m],0); + buf[m*2] = static_cast(h_d(list[m],0)); if (!(converged & 2)) - buf[m*2+1] = h_d(list[m],1); + buf[m*2+1] = static_cast(h_d(list[m],1)); } } else if (pack_flag == 2) { k_st.sync_host(); for (m = 0; m < n; m++) { - buf[m*2] = h_st(list[m],0); - buf[m*2+1] = h_st(list[m],1); + buf[m*2] = static_cast(h_st(list[m],0)); + buf[m*2+1] = static_cast(h_st(list[m],1)); } } else if (pack_flag == 3) { atomKK->sync(Host,Q_MASK); @@ -1217,16 +1226,16 @@ void FixQEqReaxFFKokkos::unpack_forward_comm(int n, int first, doubl k_d.sync_host(); for (m = 0, i = first; m < n; m++, i++) { if (!(converged & 1)) - h_d(i,0) = buf[m*2]; + h_d(i,0) = static_cast(buf[m*2]); if (!(converged & 2)) - h_d(i,1) = buf[m*2+1]; + h_d(i,1) = static_cast(buf[m*2+1]); } k_d.modify_host(); } else if (pack_flag == 2) { k_st.sync_host(); for (m = 0, i = first; m < n; m++, i++) { - h_st(i,0) = buf[m*2]; - h_st(i,1) = buf[m*2+1]; + h_st(i,0) = static_cast(buf[m*2]); + h_st(i,1) = static_cast(buf[m*2+1]); } k_st.modify_host(); } else if (pack_flag == 3) { @@ -1245,9 +1254,9 @@ int FixQEqReaxFFKokkos::pack_reverse_comm(int n, int first, double * k_o.sync_host(); for (m = 0, i = first; m < n; m++, i++) { if (!(converged & 1)) - buf[m*2] = h_o(i,0); + buf[m*2] = static_cast(h_o(i,0)); if (!(converged & 2)) - buf[m*2+1] = h_o(i,1); + buf[m*2+1] = static_cast(h_o(i,1)); } return n*2; } @@ -1260,9 +1269,9 @@ void FixQEqReaxFFKokkos::unpack_reverse_comm(int n, int *list, doubl k_o.sync_host(); for (int m = 0; m < n; m++) { if (!(converged & 1)) - h_o(list[m],0) += buf[m*2]; + h_o(list[m],0) += static_cast(buf[m*2]); if (!(converged & 2)) - h_o(list[m],1) += buf[m*2+1]; + h_o(list[m],1) += static_cast(buf[m*2+1]); } k_o.modify_host(); } @@ -1284,11 +1293,11 @@ double FixQEqReaxFFKokkos::memory_usage() { double bytes; - bytes = atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist - bytes += (double)atom->nmax*8 * sizeof(double); // storage + bytes = atom->nmax*nprev*2 * sizeof(KK_FLOAT); // s_hist & t_hist + bytes += (double)atom->nmax*8 * sizeof(KK_FLOAT); // storage bytes += (double)n_cap*2 * sizeof(int); // matrix... bytes += (double)m_cap_big * sizeof(int); - bytes += (double)m_cap_big * sizeof(double); + bytes += (double)m_cap_big * sizeof(KK_FLOAT); return bytes; } @@ -1358,8 +1367,8 @@ KOKKOS_INLINE_FUNCTION void FixQEqReaxFFKokkos::operator()(TagQEqPackExchange, const int &mysend) const { const int i = d_exchange_sendlist(mysend); - for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = d_s_hist(i,m); - for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = d_t_hist(i,m); + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + m) = static_cast(d_s_hist(i,m)); + for (int m = 0; m < nprev; m++) d_buf(mysend*nprev*2 + nprev+m) = static_cast(d_t_hist(i,m)); const int j = d_copylist(mysend); @@ -1412,8 +1421,8 @@ void FixQEqReaxFFKokkos::operator()(TagQEqUnpackExchange, const int int index = d_indices(i); if (index > -1) { - for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m); - for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m); + for (int m = 0; m < nprev; m++) d_s_hist(index,m) = static_cast(d_buf(i*nprev*2 + m)); + for (int m = 0; m < nprev; m++) d_t_hist(index,m) = static_cast(d_buf(i*nprev*2 + nprev+m)); } } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 7724a08b310..f5110e98798 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -280,7 +280,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { double alpha[2]; double beta[2]; - double delta, cutsq; + double delta; + KK_FLOAT cutsq; void grow_arrays(int) override; void copy_arrays(int, int, int) override; From dfe017ca96e5935ed4e5ba0bfff7cfedbd1f3496 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:45:01 -0700 Subject: [PATCH 108/604] Removed silent conversions from pair_zbl_kokkos.*, as relevant for part of SNAP. --- src/KOKKOS/pair_zbl_kokkos.cpp | 126 ++++++++++++++++++--------------- src/KOKKOS/pair_zbl_kokkos.h | 3 + 2 files changed, 70 insertions(+), 59 deletions(-) diff --git a/src/KOKKOS/pair_zbl_kokkos.cpp b/src/KOKKOS/pair_zbl_kokkos.cpp index eda8e846359..7c9bae944dc 100644 --- a/src/KOKKOS/pair_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_zbl_kokkos.cpp @@ -78,7 +78,7 @@ void PairZBLKokkos::init_style() { PairZBL::init_style(); - Kokkos::deep_copy(d_cutsq,cut_globalsq); + Kokkos::deep_copy(d_cutsq,static_cast(cut_globalsq)); // error if rRESPA with inner levels @@ -133,10 +133,18 @@ void PairZBLKokkos::compute(int eflag_in, int vflag_in) nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; newton_pair = force->newton_pair; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; + special_lj[0] = static_cast(force->special_lj[0]); + special_lj[1] = static_cast(force->special_lj[1]); + special_lj[2] = static_cast(force->special_lj[2]); + special_lj[3] = static_cast(force->special_lj[3]); + + c1_kk = static_cast(c1); + c2_kk = static_cast(c2); + c3_kk = static_cast(c3); + c4_kk = static_cast(c4); + + cut_inner_kk = static_cast(cut_inner); + cut_innersq_kk = static_cast(cut_innersq); k_z.sync(); k_d1a.sync(); @@ -154,14 +162,14 @@ void PairZBLKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); - if (eflag_global) eng_vdwl += ev.evdwl; + if (eflag_global) eng_vdwl += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (eflag_atom) { @@ -188,14 +196,14 @@ compute_fpair(const KK_FLOAT& rsq, const int &, const int &, const int &itype, c const KK_FLOAT r = sqrt(rsq); KK_FLOAT fpair = dzbldr(r, itype, jtype); - if (rsq > cut_innersq) { - const KK_FLOAT t = r - cut_inner; + if (rsq > cut_innersq_kk) { + const KK_FLOAT t = r - cut_inner_kk; const KK_FLOAT fswitch = t*t * (d_sw1(itype,jtype) + d_sw2(itype,jtype)*t); fpair += fswitch; } - fpair *= -1.0/r; + fpair *= -static_cast(1.0) / r; return fpair; } @@ -207,8 +215,8 @@ compute_evdwl(const KK_FLOAT &rsq, const int &, const int &, const int &itype, c const KK_FLOAT r = sqrt(rsq); KK_FLOAT evdwl = e_zbl(r, itype, jtype); evdwl += d_sw5(itype,jtype); - if (rsq > cut_innersq) { - const KK_FLOAT t = r - cut_inner; + if (rsq > cut_innersq_kk) { + const KK_FLOAT t = r - cut_inner_kk; const KK_FLOAT eswitch = t*t*t * (d_sw3(itype,jtype) + d_sw4(itype,jtype)*t); evdwl += eswitch; @@ -263,18 +271,18 @@ double PairZBLKokkos::init_one(int i, int j) { double cutone = PairZBL::init_one(i,j); - k_z.view_host()(i) = z[i]; - k_z.view_host()(j) = z[j]; - k_d1a.view_host()(i,j) = k_d1a.view_host()(j,i) = d1a[i][j]; - k_d2a.view_host()(i,j) = k_d2a.view_host()(j,i) = d2a[i][j]; - k_d3a.view_host()(i,j) = k_d3a.view_host()(j,i) = d3a[i][j]; - k_d4a.view_host()(i,j) = k_d4a.view_host()(j,i) = d4a[i][j]; - k_zze.view_host()(i,j) = k_zze.view_host()(j,i) = zze[i][j]; - k_sw1.view_host()(i,j) = k_sw1.view_host()(j,i) = sw1[i][j]; - k_sw2.view_host()(i,j) = k_sw2.view_host()(j,i) = sw2[i][j]; - k_sw3.view_host()(i,j) = k_sw3.view_host()(j,i) = sw3[i][j]; - k_sw4.view_host()(i,j) = k_sw4.view_host()(j,i) = sw4[i][j]; - k_sw5.view_host()(i,j) = k_sw5.view_host()(j,i) = sw5[i][j]; + k_z.view_host()(i) = static_cast(z[i]); + k_z.view_host()(j) = static_cast(z[j]); + k_d1a.view_host()(i,j) = k_d1a.view_host()(j,i) = static_cast(d1a[i][j]); + k_d2a.view_host()(i,j) = k_d2a.view_host()(j,i) = static_cast(d2a[i][j]); + k_d3a.view_host()(i,j) = k_d3a.view_host()(j,i) = static_cast(d3a[i][j]); + k_d4a.view_host()(i,j) = k_d4a.view_host()(j,i) = static_cast(d4a[i][j]); + k_zze.view_host()(i,j) = k_zze.view_host()(j,i) = static_cast(zze[i][j]); + k_sw1.view_host()(i,j) = k_sw1.view_host()(j,i) = static_cast(sw1[i][j]); + k_sw2.view_host()(i,j) = k_sw2.view_host()(j,i) = static_cast(sw2[i][j]); + k_sw3.view_host()(i,j) = k_sw3.view_host()(j,i) = static_cast(sw3[i][j]); + k_sw4.view_host()(i,j) = k_sw4.view_host()(j,i) = static_cast(sw4[i][j]); + k_sw5.view_host()(i,j) = k_sw5.view_host()(j,i) = static_cast(sw5[i][j]); k_z.modify_host(); k_d1a.modify_host(); @@ -289,7 +297,7 @@ double PairZBLKokkos::init_one(int i, int j) k_sw5.modify_host(); if (i(cutone*cutone); } return cutone; @@ -308,12 +316,12 @@ KK_FLOAT PairZBLKokkos::e_zbl(KK_FLOAT r, int i, int j) const { const KK_FLOAT d3aij = d_d3a(i,j); const KK_FLOAT d4aij = d_d4a(i,j); const KK_FLOAT zzeij = d_zze(i,j); - const KK_FLOAT rinv = 1.0/r; + const KK_FLOAT rinv = static_cast(1.0) / r; - KK_FLOAT sum = c1*exp(-d1aij*r); - sum += c2*exp(-d2aij*r); - sum += c3*exp(-d3aij*r); - sum += c4*exp(-d4aij*r); + KK_FLOAT sum = c1_kk*exp(-d1aij*r); + sum += c2_kk*exp(-d2aij*r); + sum += c3_kk*exp(-d3aij*r); + sum += c4_kk*exp(-d4aij*r); KK_FLOAT result = zzeij*sum*rinv; @@ -333,22 +341,22 @@ KK_FLOAT PairZBLKokkos::dzbldr(KK_FLOAT r, int i, int j) const { const KK_FLOAT d3aij = d_d3a(i,j); const KK_FLOAT d4aij = d_d4a(i,j); const KK_FLOAT zzeij = d_zze(i,j); - const KK_FLOAT rinv = 1.0/r; + const KK_FLOAT rinv = static_cast(1.0) / r; const KK_FLOAT e1 = exp(-d1aij*r); const KK_FLOAT e2 = exp(-d2aij*r); const KK_FLOAT e3 = exp(-d3aij*r); const KK_FLOAT e4 = exp(-d4aij*r); - KK_FLOAT sum = c1*e1; - sum += c2*e2; - sum += c3*e3; - sum += c4*e4; + KK_FLOAT sum = c1_kk*e1; + sum += c2_kk*e2; + sum += c3_kk*e3; + sum += c4_kk*e4; - KK_FLOAT sum_p = -c1*d1aij*e1; - sum_p -= c2*d2aij*e2; - sum_p -= c3*d3aij*e3; - sum_p -= c4*d4aij*e4; + KK_FLOAT sum_p = -c1_kk*d1aij*e1; + sum_p -= c2_kk*d2aij*e2; + sum_p -= c3_kk*d3aij*e3; + sum_p -= c4_kk*d4aij*e4; KK_FLOAT result = zzeij*(sum_p - sum*rinv)*rinv; @@ -368,30 +376,30 @@ KK_FLOAT PairZBLKokkos::d2zbldr2(KK_FLOAT r, int i, int j) const { const KK_FLOAT d3aij = d_d3a(i,j); const KK_FLOAT d4aij = d_d4a(i,j); const KK_FLOAT zzeij = d_zze(i,j); - const KK_FLOAT rinv = 1.0/r; + const KK_FLOAT rinv = static_cast(1.0) / r; const KK_FLOAT e1 = exp(-d1aij*r); const KK_FLOAT e2 = exp(-d2aij*r); const KK_FLOAT e3 = exp(-d3aij*r); const KK_FLOAT e4 = exp(-d4aij*r); - KK_FLOAT sum = c1*e1; - sum += c2*e2; - sum += c3*e3; - sum += c4*e4; + KK_FLOAT sum = c1_kk*e1; + sum += c2_kk*e2; + sum += c3_kk*e3; + sum += c4_kk*e4; - KK_FLOAT sum_p = c1*e1*d1aij; - sum_p += c2*e2*d2aij; - sum_p += c3*e3*d3aij; - sum_p += c4*e4*d4aij; + KK_FLOAT sum_p = c1_kk*e1*d1aij; + sum_p += c2_kk*e2*d2aij; + sum_p += c3_kk*e3*d3aij; + sum_p += c4_kk*e4*d4aij; - KK_FLOAT sum_pp = c1*e1*d1aij*d1aij; - sum_pp += c2*e2*d2aij*d2aij; - sum_pp += c3*e3*d3aij*d3aij; - sum_pp += c4*e4*d4aij*d4aij; + KK_FLOAT sum_pp = c1_kk*e1*d1aij*d1aij; + sum_pp += c2_kk*e2*d2aij*d2aij; + sum_pp += c3_kk*e3*d3aij*d3aij; + sum_pp += c4_kk*e4*d4aij*d4aij; - KK_FLOAT result = zzeij*(sum_pp + 2.0*sum_p*rinv + - 2.0*sum*rinv*rinv)*rinv; + KK_FLOAT result = zzeij*(sum_pp + static_cast(2.0)*sum_p*rinv + + static_cast(2.0)*sum*rinv*rinv)*rinv; return result; } diff --git a/src/KOKKOS/pair_zbl_kokkos.h b/src/KOKKOS/pair_zbl_kokkos.h index 1b6b68aebfd..17481eb926e 100644 --- a/src/KOKKOS/pair_zbl_kokkos.h +++ b/src/KOKKOS/pair_zbl_kokkos.h @@ -66,6 +66,9 @@ class PairZBLKokkos : public PairZBL { int neighflag; int nlocal,nall,eflag,vflag; KK_FLOAT special_lj[4]; + KK_FLOAT c1_kk, c2_kk, c3_kk, c4_kk; + KK_FLOAT cut_inner_kk; + KK_FLOAT cut_innersq_kk; KOKKOS_INLINE_FUNCTION KK_FLOAT e_zbl(KK_FLOAT, int, int) const; From 4b2dc1a0dc5fe3ee32eebcd56d1ed9c34381b4b7 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Mon, 27 Oct 2025 15:51:25 -0700 Subject: [PATCH 109/604] Developed best-faith single and mixed precision implementations of SNAP. --- src/KOKKOS/compute_sna_grid_kokkos.h | 19 +- src/KOKKOS/compute_sna_grid_kokkos_impl.h | 185 +++++---- src/KOKKOS/compute_sna_grid_local_kokkos.h | 19 +- .../compute_sna_grid_local_kokkos_impl.h | 172 ++++----- src/KOKKOS/pair_snap_kokkos.h | 25 +- src/KOKKOS/pair_snap_kokkos_impl.h | 353 +++++++++--------- src/KOKKOS/sna_kokkos.h | 11 +- src/KOKKOS/sna_kokkos_impl.h | 239 ++++++------ 8 files changed, 511 insertions(+), 512 deletions(-) diff --git a/src/KOKKOS/compute_sna_grid_kokkos.h b/src/KOKKOS/compute_sna_grid_kokkos.h index b6469fc5143..6b594e1a0b2 100644 --- a/src/KOKKOS/compute_sna_grid_kokkos.h +++ b/src/KOKKOS/compute_sna_grid_kokkos.h @@ -53,7 +53,7 @@ struct TagComputeSNAGrid3D{}; struct TagComputeSNAGridLoopCPU{}; //template -template +template class ComputeSNAGridKokkos : public ComputeSNAGrid { public: typedef DeviceType device_type; @@ -61,6 +61,7 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { static constexpr int vector_length = vector_length_; using real_type = real_type_; + using accum_type = accum_type_; using complex = SNAComplex; static constexpr bool legacy_on_gpu = false; // run the CPU path on the GPU @@ -210,7 +211,7 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { protected: - SNAKokkos snaKK; + SNAKokkos snaKK; int max_neighs, chunk_size, chunk_offset; int host_flag; @@ -251,11 +252,11 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { class DomainKokkos *domainKK; // triclinic vars - double h0, h1, h2, h3, h4, h5; - double lo0, lo1, lo2; + KK_FLOAT h0, h1, h2, h3, h4, h5; + KK_FLOAT lo0, lo1, lo2; // Make SNAKokkos a friend - friend class SNAKokkos; + friend class SNAKokkos; }; // These wrapper classes exist to make the compute style factory happy/avoid having @@ -263,10 +264,10 @@ class ComputeSNAGridKokkos : public ComputeSNAGrid { // of extra template parameters template -class ComputeSNAGridKokkosDevice : public ComputeSNAGridKokkos { +class ComputeSNAGridKokkosDevice : public ComputeSNAGridKokkos { private: - using Base = ComputeSNAGridKokkos; + using Base = ComputeSNAGridKokkos; public: @@ -278,10 +279,10 @@ class ComputeSNAGridKokkosDevice : public ComputeSNAGridKokkos -class ComputeSNAGridKokkosHost : public ComputeSNAGridKokkos { +class ComputeSNAGridKokkosHost : public ComputeSNAGridKokkos { private: - using Base = ComputeSNAGridKokkos; + using Base = ComputeSNAGridKokkos; public: diff --git a/src/KOKKOS/compute_sna_grid_kokkos_impl.h b/src/KOKKOS/compute_sna_grid_kokkos_impl.h index 572928f5fa2..ece3ae12617 100644 --- a/src/KOKKOS/compute_sna_grid_kokkos_impl.h +++ b/src/KOKKOS/compute_sna_grid_kokkos_impl.h @@ -45,8 +45,8 @@ namespace LAMMPS_NS { // Constructor -template -ComputeSNAGridKokkos::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) : ComputeSNAGrid(lmp, narg, arg) +template +ComputeSNAGridKokkos::ComputeSNAGridKokkos(LAMMPS *lmp, int narg, char **arg) : ComputeSNAGrid(lmp, narg, arg) { kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -94,11 +94,11 @@ ComputeSNAGridKokkos::ComputeSNAGridKokkos // start from index 1 because of how compute sna/grid is for (int i = 1; i <= atom->ntypes; i++) { - h_radelem(i-1) = radelem[i]; - h_wjelem(i-1) = wjelem[i]; + h_radelem(i-1) = static_cast(radelem[i]); + h_wjelem(i-1) = static_cast(wjelem[i]); if (switchinnerflag){ - h_sinnerelem(i) = sinnerelem[i]; - h_dinnerelem(i) = dinnerelem[i]; + h_sinnerelem(i) = static_cast(sinnerelem[i]); + h_dinnerelem(i) = static_cast(dinnerelem[i]); } } @@ -120,7 +120,7 @@ ComputeSNAGridKokkos::ComputeSNAGridKokkos } Kokkos::deep_copy(d_test,h_test); - snaKK = SNAKokkos(*this); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0,0,padding_factor); snaKK.init(); @@ -130,8 +130,8 @@ ComputeSNAGridKokkos::ComputeSNAGridKokkos // Destructor -template -ComputeSNAGridKokkos::~ComputeSNAGridKokkos() +template +ComputeSNAGridKokkos::~ComputeSNAGridKokkos() { if (copymode) return; @@ -146,8 +146,8 @@ ComputeSNAGridKokkos::~ComputeSNAGridKokko // Setup -template -void ComputeSNAGridKokkos::setup() +template +void ComputeSNAGridKokkos::setup() { // Do not call ComputeGrid::setup(), we don't wanna allocate the grid array there. // Instead, call ComputeGrid::set_grid_global and set_grid_local to set the n indices. @@ -170,8 +170,8 @@ void ComputeSNAGridKokkos::setup() // Compute -template -void ComputeSNAGridKokkos::compute_array() +template +void ComputeSNAGridKokkos::compute_array() { if (host_flag) { ComputeSNAGrid::compute_array(); @@ -212,15 +212,15 @@ void ComputeSNAGridKokkos::compute_array() const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length; if (triclinic) { - h0 = domain->h[0]; - h1 = domain->h[1]; - h2 = domain->h[2]; - h3 = domain->h[3]; - h4 = domain->h[4]; - h5 = domain->h[5]; - lo0 = domain->boxlo[0]; - lo1 = domain->boxlo[1]; - lo2 = domain->boxlo[2]; + h0 = static_cast(domain->h[0]); + h1 = static_cast(domain->h[1]); + h2 = static_cast(domain->h[2]); + h3 = static_cast(domain->h[3]); + h4 = static_cast(domain->h[4]); + h5 = static_cast(domain->h[5]); + lo0 = static_cast(domain->boxlo[0]); + lo1 = static_cast(domain->boxlo[1]); + lo2 = static_cast(domain->boxlo[2]); } while (chunk_offset < total_range) { // chunk up loop to prevent running out of memory @@ -354,9 +354,9 @@ void ComputeSNAGridKokkos::compute_array() /* Simple team policy functor seeing how many layers deep we can go with the parallelism. */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { // This function follows similar procedure as ComputeNeigh of PairSNAPKokkos. // Main difference is that we don't use the neighbor class or neighbor variables here. @@ -400,9 +400,9 @@ void ComputeSNAGridKokkos::operator() (Tag KK_FLOAT xgrid[3]; - xgrid[0] = ix * delx; - xgrid[1] = iy * dely; - xgrid[2] = iz * delz; + xgrid[0] = static_cast(ix * delx); + xgrid[1] = static_cast(iy * dely); + xgrid[2] = static_cast(iz * delz); if (triclinic) { @@ -413,9 +413,9 @@ void ComputeSNAGridKokkos::operator() (Tag // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats. - xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0; - xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1; - xgrid[2] = h2*xgrid[2] + lo2; + xgrid[0] = static_cast(h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0); + xgrid[1] = static_cast(h1*xgrid[1] + h3*xgrid[2] + lo1); + xgrid[2] = static_cast(h2*xgrid[2] + lo2); } const KK_FLOAT xtmp = xgrid[0]; @@ -435,11 +435,11 @@ void ComputeSNAGridKokkos::operator() (Tag // Looping over ntotal for now. for (int j = 0; j < ntotal; j++){ - const KK_FLOAT dx = x(j,0) - xtmp; - const KK_FLOAT dy = x(j,1) - ytmp; - const KK_FLOAT dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0) - xtmp); + const double dy = static_cast(x(j,1) - ytmp); + const double dz = static_cast(x(j,2) - ztmp); int jtype = type(j); - const KK_FLOAT rsq = dx*dx + dy*dy + dz*dz; + const double rsq = dx*dx + dy*dy + dz*dz; // don't include atoms that share location with grid point if (rsq >= rnd_cutsq(itype,jtype) || rsq < 1e-20) { @@ -457,10 +457,10 @@ void ComputeSNAGridKokkos::operator() (Tag for (int j = 0; j < ntotal; j++){ //const int jtype = type_cache[j]; //if (jtype >= 0) { - const KK_FLOAT dx = x(j,0) - xtmp; - const KK_FLOAT dy = x(j,1) - ytmp; - const KK_FLOAT dz = x(j,2) - ztmp; - const KK_FLOAT rsq = dx*dx + dy*dy + dz*dz; + const double dx = static_cast(x(j,0) - xtmp); + const double dy = static_cast(x(j,1) - ytmp); + const double dz = static_cast(x(j,2) - ztmp); + const double rsq = dx*dx + dy*dy + dz*dz; int jtype = type(j); if (rsq < rnd_cutsq(itype,jtype) && rsq > 1e-20) { int jelem = 0; @@ -471,11 +471,11 @@ void ComputeSNAGridKokkos::operator() (Tag // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); - snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.rcutij(ii,offset) = static_cast((2.0 * static_cast(d_radelem[jelem]))*rcutfac); snaKK.inside(ii,offset) = j; if (switchinnerflag) { - snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = static_cast(0.5)*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = static_cast(0.5)*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) snaKK.element(ii,offset) = jelem; @@ -490,9 +490,9 @@ void ComputeSNAGridKokkos::operator() (Tag Pre-compute the Cayley-Klein parameters for reuse in later routines ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -508,9 +508,9 @@ void ComputeSNAGridKokkos::operator() (Tag and zero terms elsewhere ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -520,9 +520,9 @@ void ComputeSNAGridKokkos::operator() (Tag snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom, const int& j) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom, const int& j) const { if (iatom >= chunk_size) return; int itype = type(iatom); @@ -531,9 +531,9 @@ void ComputeSNAGridKokkos::operator() (Tag snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridPreUi, const int& iatom) const { if (iatom >= chunk_size) return; const int itype = type(iatom); @@ -543,9 +543,9 @@ void ComputeSNAGridKokkos::operator() (Tag snaKK.pre_ui(iatom, j, ielem); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiSmall, +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiSmall, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -570,9 +570,9 @@ void ComputeSNAGridKokkos::operator() (Tag } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiLarge, +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeUiLarge, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -599,25 +599,25 @@ void ComputeSNAGridKokkos::operator() (Tag structure. Zero-initialize ylist. CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (idxu >= snaKK.idxu_max) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom, const int& idxu) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom, const int& idxu) const { if (iatom >= chunk_size) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridTransformUi, const int& iatom) const { if (iatom >= chunk_size) return; for (int idxu = 0; idxu < snaKK.idxu_max; idxu++) snaKK.transform_ui(iatom, idxu); @@ -628,25 +628,25 @@ void ComputeSNAGridKokkos::operator() (Tag view ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjz >= snaKK.idxz_max) return; snaKK.template compute_zi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom, const int& jjz) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom, const int& jjz) const { if (iatom >= chunk_size) return; snaKK.template compute_zi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeZi, const int& iatom) const { if (iatom >= chunk_size) return; for (int jjz = 0; jjz < snaKK.idxz_max; jjz++) snaKK.template compute_zi(iatom, jjz); @@ -656,33 +656,33 @@ void ComputeSNAGridKokkos::operator() (Tag Compute the energy triple products and store in the "blist" view ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjb >= snaKK.idxb_max) return; snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom, const int& jjb) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom, const int& jjb) const { if (iatom >= chunk_size) return; snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridComputeBi, const int& iatom) const { if (iatom >= chunk_size) return; for (int jjb = 0; jjb < snaKK.idxb_max; jjb++) snaKK.template compute_bi(iatom, jjb); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridKokkos::operator() (TagCSNAGridLocalFill, const int& ii) const { +void ComputeSNAGridKokkos::operator() (TagCSNAGridLocalFill, const int& ii) const { // extract grid index int i = ii + chunk_offset; @@ -701,9 +701,9 @@ void ComputeSNAGridKokkos::operator() (Tag int igrid = iz * (nx * ny) + iy * nx + ix; - xgrid[0] = ix * delx; - xgrid[1] = iy * dely; - xgrid[2] = iz * delz; + xgrid[0] = static_cast(ix * delx); + xgrid[1] = static_cast(iy * dely); + xgrid[2] = static_cast(iz * delz); if (triclinic) { // Do a conversion on `xgrid` here like we do in the CPU version. @@ -713,17 +713,14 @@ void ComputeSNAGridKokkos::operator() (Tag // Because calling a __host__ function("lamda2x") from a __host__ __device__ function("operator()") is not allowed // Using domainKK-> gives segfault, use domain-> instead since we're just accessing floats. - xgrid[0] = h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0; - xgrid[1] = h1*xgrid[1] + h3*xgrid[2] + lo1; - xgrid[2] = h2*xgrid[2] + lo2; + xgrid[0] = static_cast(h0*xgrid[0] + h5*xgrid[1] + h4*xgrid[2] + lo0); + xgrid[1] = static_cast(h1*xgrid[1] + h3*xgrid[2] + lo1); + xgrid[2] = static_cast(h2*xgrid[2] + lo2); } - const KK_FLOAT xtmp = xgrid[0]; - const KK_FLOAT ytmp = xgrid[1]; - const KK_FLOAT ztmp = xgrid[2]; - d_grid(igrid,0) = xtmp; - d_grid(igrid,1) = ytmp; - d_grid(igrid,2) = ztmp; + d_grid(igrid,0) = static_cast(xgrid[0]); + d_grid(igrid,1) = static_cast(xgrid[1]); + d_grid(igrid,2) = static_cast(xgrid[2]); const auto idxb_max = snaKK.idxb_max; @@ -732,7 +729,7 @@ void ComputeSNAGridKokkos::operator() (Tag for (int icoeff = 0; icoeff < ncoeff; icoeff++) { const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; - d_grid(igrid,icoeff+3) = snaKK.blist(ii,idx_chem,idxb); + d_grid(igrid,icoeff+3) = static_cast(snaKK.blist(ii,idx_chem,idxb)); } } @@ -741,9 +738,9 @@ void ComputeSNAGridKokkos::operator() (Tag utility functions ------------------------------------------------------------------------- */ -template +template template -void ComputeSNAGridKokkos::check_team_size_for(int inum, int &team_size) { +void ComputeSNAGridKokkos::check_team_size_for(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); @@ -752,9 +749,9 @@ void ComputeSNAGridKokkos::check_team_size team_size = team_size_max/vector_length; } -template +template template -void ComputeSNAGridKokkos::check_team_size_reduce(int inum, int &team_size) { +void ComputeSNAGridKokkos::check_team_size_reduce(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag()); @@ -763,9 +760,9 @@ void ComputeSNAGridKokkos::check_team_size team_size = team_size_max/vector_length; } -template +template template -int ComputeSNAGridKokkos::scratch_size_helper(int values_per_team) { +int ComputeSNAGridKokkos::scratch_size_helper(int values_per_team) { typedef Kokkos::View > ScratchViewType; return ScratchViewType::shmem_size(values_per_team); @@ -780,7 +777,7 @@ int ComputeSNAGridKokkos::scratch_size_hel template ComputeSNAGridKokkosDevice::ComputeSNAGridKokkosDevice(class LAMMPS *lmp, int narg, char **arg) - : ComputeSNAGridKokkos(lmp, narg, arg) { ; } + : ComputeSNAGridKokkos(lmp, narg, arg) { ; } template void ComputeSNAGridKokkosDevice::compute_array() @@ -791,7 +788,7 @@ void ComputeSNAGridKokkosDevice::compute_array() #ifdef LMP_KOKKOS_GPU template ComputeSNAGridKokkosHost::ComputeSNAGridKokkosHost(class LAMMPS *lmp, int narg, char **arg) - : ComputeSNAGridKokkos(lmp, narg, arg) { ; } + : ComputeSNAGridKokkos(lmp, narg, arg) { ; } template void ComputeSNAGridKokkosHost::compute_array() diff --git a/src/KOKKOS/compute_sna_grid_local_kokkos.h b/src/KOKKOS/compute_sna_grid_local_kokkos.h index 8473eda7739..5e8033857ae 100644 --- a/src/KOKKOS/compute_sna_grid_local_kokkos.h +++ b/src/KOKKOS/compute_sna_grid_local_kokkos.h @@ -53,7 +53,7 @@ struct TagComputeSNAGridLocal3D{}; struct TagComputeSNAGridLocalLoopCPU{}; //template -template +template class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { public: typedef DeviceType device_type; @@ -61,6 +61,7 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { static constexpr int vector_length = vector_length_; using real_type = real_type_; + using accum_type = accum_type_; using complex = SNAComplex; static constexpr bool legacy_on_gpu = false; // run the CPU path on the GPU @@ -207,7 +208,7 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { protected: - SNAKokkos snaKK; + SNAKokkos snaKK; int max_neighs, chunk_size, chunk_offset; int host_flag; @@ -249,11 +250,11 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { class DomainKokkos *domainKK; // triclinic vars - double h0, h1, h2, h3, h4, h5; - double lo0, lo1, lo2; + KK_FLOAT h0, h1, h2, h3, h4, h5; + KK_FLOAT lo0, lo1, lo2; // Make SNAKokkos a friend - friend class SNAKokkos; + friend class SNAKokkos; }; // These wrapper classes exist to make the compute style factory happy/avoid having @@ -261,10 +262,10 @@ class ComputeSNAGridLocalKokkos : public ComputeSNAGridLocal { // of extra template parameters template -class ComputeSNAGridLocalKokkosDevice : public ComputeSNAGridLocalKokkos { +class ComputeSNAGridLocalKokkosDevice : public ComputeSNAGridLocalKokkos { private: - using Base = ComputeSNAGridLocalKokkos; + using Base = ComputeSNAGridLocalKokkos; public: @@ -276,10 +277,10 @@ class ComputeSNAGridLocalKokkosDevice : public ComputeSNAGridLocalKokkos -class ComputeSNAGridLocalKokkosHost : public ComputeSNAGridLocalKokkos { +class ComputeSNAGridLocalKokkosHost : public ComputeSNAGridLocalKokkos { private: - using Base = ComputeSNAGridLocalKokkos; + using Base = ComputeSNAGridLocalKokkos; public: diff --git a/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h b/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h index 9fa84990595..49a315a9b98 100644 --- a/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h +++ b/src/KOKKOS/compute_sna_grid_local_kokkos_impl.h @@ -44,8 +44,8 @@ namespace LAMMPS_NS { // Constructor -template -ComputeSNAGridLocalKokkos::ComputeSNAGridLocalKokkos(LAMMPS *lmp, int narg, char **arg) : ComputeSNAGridLocal(lmp, narg, arg) +template +ComputeSNAGridLocalKokkos::ComputeSNAGridLocalKokkos(LAMMPS *lmp, int narg, char **arg) : ComputeSNAGridLocal(lmp, narg, arg) { kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -93,11 +93,11 @@ ComputeSNAGridLocalKokkos::ComputeSNAGridL // start from index 1 because of how compute sna/grid is for (int i = 1; i <= atom->ntypes; i++) { - h_radelem(i-1) = radelem[i]; - h_wjelem(i-1) = wjelem[i]; + h_radelem(i-1) = static_cast(radelem[i]); + h_wjelem(i-1) = static_cast(wjelem[i]); if (switchinnerflag){ - h_sinnerelem(i) = sinnerelem[i]; - h_dinnerelem(i) = dinnerelem[i]; + h_sinnerelem(i) = static_cast(sinnerelem[i]); + h_dinnerelem(i) = static_cast(dinnerelem[i]); } } @@ -119,7 +119,7 @@ ComputeSNAGridLocalKokkos::ComputeSNAGridL } Kokkos::deep_copy(d_test,h_test); - snaKK = SNAKokkos(*this); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0,0,padding_factor); snaKK.init(); @@ -129,8 +129,8 @@ ComputeSNAGridLocalKokkos::ComputeSNAGridL // Destructor -template -ComputeSNAGridLocalKokkos::~ComputeSNAGridLocalKokkos() +template +ComputeSNAGridLocalKokkos::~ComputeSNAGridLocalKokkos() { if (copymode) return; @@ -139,8 +139,8 @@ ComputeSNAGridLocalKokkos::~ComputeSNAGrid // Setup -template -void ComputeSNAGridLocalKokkos::setup() +template +void ComputeSNAGridLocalKokkos::setup() { ComputeGridLocal::setup(); @@ -153,8 +153,8 @@ void ComputeSNAGridLocalKokkos::setup() // Compute -template -void ComputeSNAGridLocalKokkos::compute_local() +template +void ComputeSNAGridLocalKokkos::compute_local() { if (host_flag) { ComputeSNAGridLocal::compute_array(); @@ -196,15 +196,15 @@ void ComputeSNAGridLocalKokkos::compute_lo const int chunk_size_div = (chunk_size + vector_length - 1) / vector_length; if (triclinic) { - h0 = domain->h[0]; - h1 = domain->h[1]; - h2 = domain->h[2]; - h3 = domain->h[3]; - h4 = domain->h[4]; - h5 = domain->h[5]; - lo0 = domain->boxlo[0]; - lo1 = domain->boxlo[1]; - lo2 = domain->boxlo[2]; + h0 = static_cast(domain->h[0]); + h1 = static_cast(domain->h[1]); + h2 = static_cast(domain->h[2]); + h3 = static_cast(domain->h[3]); + h4 = static_cast(domain->h[4]); + h5 = static_cast(domain->h[5]); + lo0 = static_cast(domain->boxlo[0]); + lo1 = static_cast(domain->boxlo[1]); + lo2 = static_cast(domain->boxlo[2]); } while (chunk_offset < total_range) { // chunk up loop to prevent running out of memory @@ -335,9 +335,9 @@ void ComputeSNAGridLocalKokkos::compute_lo /* Simple team policy functor seeing how many layers deep we can go with the parallelism. */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { // This function follows similar procedure as ComputeNeigh of PairSNAPKokkos. // Main difference is that we don't use the neighbor class or neighbor variables here. @@ -379,7 +379,7 @@ void ComputeSNAGridLocalKokkos::operator() iy += nylo; ix += nxlo; - double xgrid[3]; + KK_FLOAT xgrid[3]; // index ii already captures the proper grid point //int igrid = iz * (nx * ny) + iy * nx + ix; @@ -387,9 +387,9 @@ void ComputeSNAGridLocalKokkos::operator() // grid2x converts igrid to ix,iy,iz like we've done before // multiply grid integers by grid spacing delx, dely, delz //grid2x(igrid, xgrid); - xgrid[0] = ix * delx; - xgrid[1] = iy * dely; - xgrid[2] = iz * delz; + xgrid[0] = static_cast(ix * delx); + xgrid[1] = static_cast(iy * dely); + xgrid[2] = static_cast(iz * delz); if (triclinic) { @@ -405,13 +405,13 @@ void ComputeSNAGridLocalKokkos::operator() xgrid[2] = h2*xgrid[2] + lo2; } - const double xtmp = xgrid[0]; - const double ytmp = xgrid[1]; - const double ztmp = xgrid[2]; + const double xtmp = static_cast(xgrid[0]); + const double ytmp = static_cast(xgrid[1]); + const double ztmp = static_cast(xgrid[2]); // Zeroing out the components, which are filled as a sum. for (int icol = size_local_cols_base; icol < size_local_cols; icol++){ - d_alocal(igrid, icol) = 0.0; + d_alocal(igrid, icol) = 0; } // Fill grid info columns @@ -435,9 +435,9 @@ void ComputeSNAGridLocalKokkos::operator() // Looping over ntotal for now. for (int j = 0; j < ntotal; j++){ - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; int jtype = type(j); const double rsq = dx*dx + dy*dy + dz*dz; @@ -457,9 +457,9 @@ void ComputeSNAGridLocalKokkos::operator() for (int j = 0; j < ntotal; j++){ //const int jtype = type_cache[j]; //if (jtype >= 0) { - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; const double rsq = dx*dx + dy*dy + dz*dz; int jtype = type(j); if (rsq < rnd_cutsq(itype,jtype) && rsq > 1e-20) { @@ -471,11 +471,11 @@ void ComputeSNAGridLocalKokkos::operator() // pair snap uses jelem here, but we use jtype, see compute_sna_grid.cpp // actually since the views here have values starting at 0, let's use jelem snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); - snaKK.rcutij(ii,offset) = static_cast((2.0 * d_radelem[jelem])*rcutfac); + snaKK.rcutij(ii,offset) = static_cast((2.0 * static_cast(d_radelem[jelem]))*rcutfac); snaKK.inside(ii,offset) = j; if (switchinnerflag) { - snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = static_cast(0.5)*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = static_cast(0.5)*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) snaKK.element(ii,offset) = jelem; @@ -490,9 +490,9 @@ void ComputeSNAGridLocalKokkos::operator() Pre-compute the Cayley-Klein parameters for reuse in later routines ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -508,9 +508,9 @@ void ComputeSNAGridLocalKokkos::operator() and zero terms elsewhere ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -520,9 +520,9 @@ void ComputeSNAGridLocalKokkos::operator() snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom, const int& j) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom, const int& j) const { if (iatom >= chunk_size) return; int itype = type(iatom); @@ -531,9 +531,9 @@ void ComputeSNAGridLocalKokkos::operator() snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalPreUi, const int& iatom) const { if (iatom >= chunk_size) return; const int itype = type(iatom); @@ -543,10 +543,10 @@ void ComputeSNAGridLocalKokkos::operator() snaKK.pre_ui(iatom, j, ielem); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiSmall, +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiSmall, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -571,10 +571,10 @@ void ComputeSNAGridLocalKokkos::operator() } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiLarge, +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeUiLarge, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -601,25 +601,25 @@ void ComputeSNAGridLocalKokkos::operator() structure. Zero-initialize ylist. CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (idxu >= snaKK.idxu_max) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom, const int& idxu) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom, const int& idxu) const { if (iatom >= chunk_size) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalTransformUi, const int& iatom) const { if (iatom >= chunk_size) return; for (int idxu = 0; idxu < snaKK.idxu_max; idxu++) snaKK.transform_ui(iatom, idxu); @@ -630,25 +630,25 @@ void ComputeSNAGridLocalKokkos::operator() view ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjz >= snaKK.idxz_max) return; snaKK.template compute_zi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom, const int& jjz) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom, const int& jjz) const { if (iatom >= chunk_size) return; snaKK.template compute_zi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeZi, const int& iatom) const { if (iatom >= chunk_size) return; for (int jjz = 0; jjz < snaKK.idxz_max; jjz++) snaKK.template compute_zi(iatom, jjz); @@ -658,33 +658,33 @@ void ComputeSNAGridLocalKokkos::operator() Compute the energy triple products and store in the "blist" view ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjb >= snaKK.idxb_max) return; snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom, const int& jjb) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom, const int& jjb) const { if (iatom >= chunk_size) return; snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocalComputeBi, const int& iatom) const { if (iatom >= chunk_size) return; for (int jjb = 0; jjb < snaKK.idxb_max; jjb++) snaKK.template compute_bi(iatom, jjb); } -template +template KOKKOS_INLINE_FUNCTION -void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocal2Fill, const int& ii) const { +void ComputeSNAGridLocalKokkos::operator() (TagCSNAGridLocal2Fill, const int& ii) const { // extract grid index int igrid = ii + chunk_offset; @@ -699,7 +699,7 @@ void ComputeSNAGridLocalKokkos::operator() iy += nylo; ix += nxlo; - double xgrid[3]; + KK_FLOAT xgrid[3]; // index ii already captures the proper grid point // int igrid = iz * (nx * ny) + iy * nx + ix; @@ -707,9 +707,9 @@ void ComputeSNAGridLocalKokkos::operator() // grid2x converts igrid to ix,iy,iz like we've done before //grid2x(igrid, xgrid); - xgrid[0] = ix * delx; - xgrid[1] = iy * dely; - xgrid[2] = iz * delz; + xgrid[0] = static_cast(ix * delx); + xgrid[1] = static_cast(iy * dely); + xgrid[2] = static_cast(iz * delz); if (triclinic) { // Do a conversion on `xgrid` here like we do in the CPU version. @@ -732,7 +732,7 @@ void ComputeSNAGridLocalKokkos::operator() for (int icoeff = 0; icoeff < ncoeff; icoeff++) { const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; - d_alocal(igrid,icoeff+6) = snaKK.blist(ii,idx_chem,idxb); + d_alocal(igrid,icoeff+6) = static_cast(snaKK.blist(ii,idx_chem,idxb)); } } @@ -741,9 +741,9 @@ void ComputeSNAGridLocalKokkos::operator() utility functions ------------------------------------------------------------------------- */ -template +template template -void ComputeSNAGridLocalKokkos::check_team_size_for(int inum, int &team_size) { +void ComputeSNAGridLocalKokkos::check_team_size_for(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); @@ -752,9 +752,9 @@ void ComputeSNAGridLocalKokkos::check_team team_size = team_size_max/vector_length; } -template +template template -void ComputeSNAGridLocalKokkos::check_team_size_reduce(int inum, int &team_size) { +void ComputeSNAGridLocalKokkos::check_team_size_reduce(int inum, int &team_size) { int team_size_max; team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag()); @@ -763,9 +763,9 @@ void ComputeSNAGridLocalKokkos::check_team team_size = team_size_max/vector_length; } -template +template template -int ComputeSNAGridLocalKokkos::scratch_size_helper(int values_per_team) { +int ComputeSNAGridLocalKokkos::scratch_size_helper(int values_per_team) { typedef Kokkos::View > ScratchViewType; return ScratchViewType::shmem_size(values_per_team); @@ -780,7 +780,7 @@ int ComputeSNAGridLocalKokkos::scratch_siz template ComputeSNAGridLocalKokkosDevice::ComputeSNAGridLocalKokkosDevice(class LAMMPS *lmp, int narg, char **arg) - : ComputeSNAGridLocalKokkos(lmp, narg, arg) { ; } + : ComputeSNAGridLocalKokkos(lmp, narg, arg) { ; } template void ComputeSNAGridLocalKokkosDevice::compute_local() @@ -791,7 +791,7 @@ void ComputeSNAGridLocalKokkosDevice::compute_local() #ifdef LMP_KOKKOS_GPU template ComputeSNAGridLocalKokkosHost::ComputeSNAGridLocalKokkosHost(class LAMMPS *lmp, int narg, char **arg) - : ComputeSNAGridLocalKokkos(lmp, narg, arg) { ; } + : ComputeSNAGridLocalKokkos(lmp, narg, arg) { ; } template void ComputeSNAGridLocalKokkosHost::compute_local() diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 7e17554bd71..daecaa0be2c 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -34,7 +34,7 @@ PairStyle(snap/kk/host,PairSNAPKokkosDevice); namespace LAMMPS_NS { // pre-declare so sna_kokkos.h can refer to it -template class PairSNAPKokkos; +template class PairSNAPKokkos; }; #include "sna_kokkos.h" @@ -69,7 +69,7 @@ struct TagPairSNAPComputeUiCPU{}; struct TagPairSNAPComputeDuidrjCPU{}; struct TagPairSNAPComputeDeidrjCPU{}; -template +template class PairSNAPKokkos : public PairSNAP { public: enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD}; @@ -82,6 +82,7 @@ class PairSNAPKokkos : public PairSNAP { static constexpr bool legacy_on_gpu = false; // run the CPU path on the GPU static constexpr int vector_length = vector_length_; using real_type = real_type_; + using accum_type = accum_type_; using complex = SNAComplex; // Static team/tile sizes for device offload @@ -422,8 +423,8 @@ class PairSNAPKokkos : public PairSNAP { template KOKKOS_INLINE_FUNCTION void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, - const double &fx, const double &fy, const double &fz, - const double &delx, const double &dely, const double &delz) const; + const accum_type &fx, const accum_type &fy, const accum_type &fz, + const real_type &delx, const real_type &dely, const real_type &delz) const; protected: typename AT::t_neighbors_2d d_neighbors; @@ -435,7 +436,7 @@ class PairSNAPKokkos : public PairSNAP { typename AT::t_kkacc_1d d_eatom; typename AT::t_kkacc_1d_6 d_vatom; - SNAKokkos snaKK; + SNAKokkos snaKK; int inum, max_neighs, batched_max_neighs, chunk_size, chunk_offset; int neighflag; @@ -446,12 +447,12 @@ class PairSNAPKokkos : public PairSNAP { Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights - typename SNAKokkos::t_sna_2d_lr d_coeffelem; // element bispectrum coefficients + typename SNAKokkos::t_sna_2d_lr d_coeffelem; // element bispectrum coefficients Kokkos::View d_sinnerelem; // element inner cutoff midpoint Kokkos::View d_dinnerelem; // element inner cutoff half-width Kokkos::View d_map; // mapping from atom types to elements Kokkos::View d_ninside; // ninside for all atoms in list - typename SNAKokkos::t_sna_2d d_beta; // betas for all atoms in list + typename SNAKokkos::t_sna_2d d_beta; // betas for all atoms in list typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; @@ -487,7 +488,7 @@ class PairSNAPKokkos : public PairSNAP { int scratch_size_helper(int values_per_team); // Make SNAKokkos a friend - friend class SNAKokkos; + friend class SNAKokkos; }; @@ -496,10 +497,10 @@ class PairSNAPKokkos : public PairSNAP { // of extra template parameters template -class PairSNAPKokkosDevice : public PairSNAPKokkos { +class PairSNAPKokkosDevice : public PairSNAPKokkos { private: - using Base = PairSNAPKokkos; + using Base = PairSNAPKokkos; public: @@ -515,10 +516,10 @@ class PairSNAPKokkosDevice : public PairSNAPKokkos -class PairSNAPKokkosHost : public PairSNAPKokkos { +class PairSNAPKokkosHost : public PairSNAPKokkos { private: - using Base = PairSNAPKokkos; + using Base = PairSNAPKokkos; public: diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index ce8304e8802..00d7bffcdd9 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -39,8 +39,8 @@ namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ -template -PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) +template +PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) { respa_enable = 0; @@ -53,8 +53,8 @@ PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp /* ---------------------------------------------------------------------- */ -template -PairSNAPKokkos::~PairSNAPKokkos() +template +PairSNAPKokkos::~PairSNAPKokkos() { if (copymode) return; @@ -66,8 +66,8 @@ PairSNAPKokkos::~PairSNAPKokkos() init specific to this pair style ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::init_style() +template +void PairSNAPKokkos::init_style() { if (force->newton_pair == 0) error->all(FLERR,"Pair style SNAP requires newton pair on"); @@ -107,8 +107,8 @@ struct FindMaxNumNeighs { This version is a straightforward implementation ---------------------------------------------------------------------- */ -template -void PairSNAPKokkos::compute(int eflag_in, int vflag_in) +template +void PairSNAPKokkos::compute(int eflag_in, int vflag_in) { eflag = eflag_in; vflag = vflag_in; @@ -392,14 +392,14 @@ void PairSNAPKokkos::compute(int eflag_in, if (need_dup) Kokkos::Experimental::contribute(f, dup_f); - if (eflag_global) eng_vdwl += ev.evdwl; + if (eflag_global) eng_vdwl += static_cast(ev.evdwl); if (vflag_global) { - virial[0] += ev.v[0]; - virial[1] += ev.v[1]; - virial[2] += ev.v[2]; - virial[3] += ev.v[3]; - virial[4] += ev.v[4]; - virial[5] += ev.v[5]; + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); } if (vflag_fdotr) pair_virial_fdotr_compute(this); @@ -431,8 +431,8 @@ void PairSNAPKokkos::compute(int eflag_in, allocate all arrays ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::allocate() +template +void PairSNAPKokkos::allocate() { PairSNAP::allocate(); @@ -447,8 +447,8 @@ void PairSNAPKokkos::allocate() init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ -template -double PairSNAPKokkos::init_one(int i, int j) +template +double PairSNAPKokkos::init_one(int i, int j) { double cutone = PairSNAP::init_one(i,j); k_cutsq.view_host()(i,j) = k_cutsq.view_host()(j,i) = cutone*cutone; @@ -461,8 +461,8 @@ double PairSNAPKokkos::init_one(int i, int set coeffs for one or more type pairs ------------------------------------------------------------------------- */ -template -void PairSNAPKokkos::coeff(int narg, char **arg) +template +void PairSNAPKokkos::coeff(int narg, char **arg) { PairSNAP::coeff(narg,arg); @@ -482,12 +482,12 @@ void PairSNAPKokkos::coeff(int narg, char auto h_map = Kokkos::create_mirror_view(d_map); for (int ielem = 0; ielem < nelements; ielem++) { - h_radelem(ielem) = radelem[ielem]; - h_wjelem(ielem) = wjelem[ielem]; - h_sinnerelem(ielem) = sinnerelem[ielem]; - h_dinnerelem(ielem) = dinnerelem[ielem]; + h_radelem(ielem) = static_cast(radelem[ielem]); + h_wjelem(ielem) = static_cast(wjelem[ielem]); + h_sinnerelem(ielem) = static_cast(sinnerelem[ielem]); + h_dinnerelem(ielem) = static_cast(dinnerelem[ielem]); for (int jcoeff = 0; jcoeff < ncoeffall; jcoeff++) { - h_coeffelem(ielem,jcoeff) = coeffelem[ielem][jcoeff]; + h_coeffelem(ielem,jcoeff) = static_cast(coeffelem[ielem][jcoeff]); } } @@ -502,7 +502,7 @@ void PairSNAPKokkos::coeff(int narg, char Kokkos::deep_copy(d_dinnerelem,h_dinnerelem); Kokkos::deep_copy(d_map,h_map); - snaKK = SNAKokkos(*this); + snaKK = SNAKokkos(*this); snaKK.grow_rij(0, 0, padding_factor); snaKK.init(); } @@ -512,9 +512,9 @@ void PairSNAPKokkos::coeff(int narg, char of AoSoA data layouts and scratch memory for recursive polynomials ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { // extract atom number int ii = team.team_rank() + team.league_rank() * team.team_size(); @@ -531,12 +531,12 @@ void PairSNAPKokkos::operator() (TagPairSN // Load various info about myself up front const int i = d_ilist[ii + chunk_offset]; - const double xtmp = x(i,0); - const double ytmp = x(i,1); - const double ztmp = x(i,2); + const double xtmp = static_cast(x(i,0)); + const double ytmp = static_cast(x(i,1)); + const double ztmp = static_cast(x(i,2)); const int itype = type[i]; const int ielem = d_map[itype]; - const double radi = d_radelem[ielem]; + const double radi = static_cast(d_radelem[ielem]); const int num_neighs = d_numneigh[i]; @@ -551,9 +551,9 @@ void PairSNAPKokkos::operator() (TagPairSN Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,num_neighs), [&] (const int jj, int& count) { T_INT j = d_neighbors(i,jj); - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; int jtype = type(j); const double rsq = dx*dx + dy*dy + dz*dz; @@ -578,19 +578,19 @@ void PairSNAPKokkos::operator() (TagPairSN if (jtype >= 0) { if (final) { T_INT j = d_neighbors(i,jj); - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; const int jelem = d_map[jtype]; snaKK.rij(ii,offset,0) = static_cast(dx); snaKK.rij(ii,offset,1) = static_cast(dy); snaKK.rij(ii,offset,2) = static_cast(dz); snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); - snaKK.rcutij(ii,offset) = static_cast((radi + d_radelem[jelem])*rcutfac); + snaKK.rcutij(ii,offset) = static_cast((radi + static_cast(d_radelem[jelem]))*rcutfac); snaKK.inside(ii,offset) = j; if (switchinnerflag) { - snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = static_cast(0.5)*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = static_cast(0.5)*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) snaKK.element(ii,offset) = jelem; @@ -602,20 +602,20 @@ void PairSNAPKokkos::operator() (TagPairSN }); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy::member_type& team) const { int ii = team.league_rank(); if (ii >= chunk_size) return; const int i = d_ilist[ii + chunk_offset]; - const double xtmp = x(i,0); - const double ytmp = x(i,1); - const double ztmp = x(i,2); + const double xtmp = static_cast(x(i,0)); + const double ytmp = static_cast(x(i,1)); + const double ztmp = static_cast(x(i,2)); const int itype = type[i]; const int ielem = d_map[itype]; - const double radi = d_radelem[ielem]; + const double radi = static_cast(d_radelem[ielem]); const int num_neighs = d_numneigh[i]; @@ -630,9 +630,9 @@ void PairSNAPKokkos::operator() (TagPairSN [&] (const int jj, int& count) { Kokkos::single(Kokkos::PerThread(team), [&] () { T_INT j = d_neighbors(i,jj); - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; const int jtype = type(j); const double rsq = dx*dx + dy*dy + dz*dz; @@ -649,9 +649,9 @@ void PairSNAPKokkos::operator() (TagPairSN [&] (const int jj, int& offset, bool final) { //for (int jj = 0; jj < num_neighs; jj++) { T_INT j = d_neighbors(i,jj); - const double dx = x(j,0) - xtmp; - const double dy = x(j,1) - ytmp; - const double dz = x(j,2) - ztmp; + const double dx = static_cast(x(j,0)) - xtmp; + const double dy = static_cast(x(j,1)) - ytmp; + const double dz = static_cast(x(j,2)) - ztmp; const int jtype = type(j); const double rsq = dx*dx + dy*dy + dz*dz; @@ -663,11 +663,11 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.rij(ii,offset,1) = static_cast(dy); snaKK.rij(ii,offset,2) = static_cast(dz); snaKK.wj(ii,offset) = static_cast(d_wjelem[jelem]); - snaKK.rcutij(ii,offset) = static_cast((radi + d_radelem[jelem])*rcutfac); + snaKK.rcutij(ii,offset) = static_cast((radi + static_cast(d_radelem[jelem]))*rcutfac); snaKK.inside(ii,offset) = j; if (switchinnerflag) { - snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); - snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); + snaKK.sinnerij(ii,offset) = static_cast(0.5)*(d_sinnerelem[ielem] + d_sinnerelem[jelem]); + snaKK.dinnerij(ii,offset) = static_cast(0.5)*(d_dinnerelem[ielem] + d_dinnerelem[jelem]); } if (chemflag) snaKK.element(ii,offset) = jelem; @@ -684,9 +684,9 @@ void PairSNAPKokkos::operator() (TagPairSN GPU only. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -702,9 +702,9 @@ void PairSNAPKokkos::operator() (TagPairSN and zero terms elsewhere; both CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; @@ -714,9 +714,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom, const int& j) const { +void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom, const int& j) const { if (iatom >= chunk_size) return; int itype = type(iatom); @@ -725,9 +725,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.pre_ui(iatom, j, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPPreUi, const int& iatom) const { if (iatom >= chunk_size) return; const int itype = type(iatom); @@ -743,10 +743,10 @@ void PairSNAPKokkos::operator() (TagPairSN staging. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiSmall, +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiSmall, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -775,10 +775,10 @@ void PairSNAPKokkos::operator() (TagPairSN } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiLarge, +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiLarge, const typename Kokkos::TeamPolicy>::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -809,9 +809,9 @@ void PairSNAPKokkos::operator() (TagPairSN ulisttot_re and _im. This routine is CPU only and does not use staging. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); @@ -819,18 +819,18 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_ui_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom, const int& jnbor) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom, const int& jnbor) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); if (jnbor >= ninside) return; snaKK.template compute_ui_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeUiCPU, const int& iatom) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); for (int jnbor = 0; jnbor < ninside; jnbor++) @@ -842,25 +842,25 @@ void PairSNAPKokkos::operator() (TagPairSN structure. Zero-initialize ylist. CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (idxu >= snaKK.idxu_max) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom, const int& idxu) const { +void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom, const int& idxu) const { if (iatom >= chunk_size) return; snaKK.transform_ui(iatom, idxu); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPTransformUi, const int& iatom) const { if (iatom >= chunk_size) return; for (int idxu = 0; idxu < snaKK.idxu_max; idxu++) snaKK.transform_ui(iatom, idxu); @@ -872,18 +872,18 @@ void PairSNAPKokkos::operator() (TagPairSN CPU and GPU. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { const int iatom = iatom_mod + yi_batch * iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjz >= snaKK.idxz_max) return; snaKK.template compute_zi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom, const int& jjz) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom, const int& jjz) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -894,9 +894,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_zi(iatom_shift, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeZi, const int& iatom) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -913,18 +913,18 @@ void PairSNAPKokkos::operator() (TagPairSN CPU and GPU. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom_mod, const int& jjb, const int& iatom_div) const { const int iatom = iatom_mod + yi_batch * iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjb >= snaKK.idxb_max) return; snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom, const int& jjb) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom, const int& jjb) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -935,9 +935,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_bi(iatom, jjb); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBi, const int& iatom) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -955,9 +955,9 @@ void PairSNAPKokkos::operator() (TagPairSN contribution is added in a subsequent kernel. CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom_mod, const int& idxb, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom_mod, const int& idxb, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (idxb >= snaKK.idxb_max) return; @@ -969,9 +969,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.compute_beta_linear(iatom, idxb, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom, const int& idxb) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom, const int& idxb) const { if (iatom >= chunk_size) return; const int i = d_ilist[iatom + chunk_offset]; @@ -981,9 +981,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.compute_beta_linear(iatom, idxb, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaLinear, const int& iatom) const { if (iatom >= chunk_size) return; const int i = d_ilist[iatom + chunk_offset]; @@ -1001,9 +1001,9 @@ void PairSNAPKokkos::operator() (TagPairSN CPU and GPU. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom_mod, const int& idxb, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom_mod, const int& idxb, const int& iatom_div) const { const int iatom = iatom_mod + iatom_div * vector_length; if (iatom >= chunk_size) return; if (idxb >= snaKK.idxb_max) return; @@ -1015,9 +1015,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_beta_quadratic(iatom, idxb, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom, const int& idxb) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom, const int& idxb) const { if (iatom >= chunk_size) return; const int i = d_ilist[iatom + chunk_offset]; @@ -1027,9 +1027,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_beta_quadratic(iatom, idxb, ielem); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom) const { if (iatom >= chunk_size) return; const int i = d_ilist[iatom + chunk_offset]; @@ -1045,18 +1045,18 @@ void PairSNAPKokkos::operator() (TagPairSN adjoint matrices Y (ylist_re, _im) on non-energy timesteps. CPU and GPU. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom_mod, const int& jjz, const int& iatom_div) const { const int iatom = iatom_mod + yi_batch * iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjz >= snaKK.idxz_max) return; snaKK.template compute_yi(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom, const int& jjz) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom, const int& jjz) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -1067,9 +1067,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_yi(iatom_shift, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYi, const int& iatom) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -1086,18 +1086,18 @@ void PairSNAPKokkos::operator() (TagPairSN matrices Y (ylist_re, _im) on non-energy timesteps. CPU and GPU. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom_mod, const int& jjz, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom_mod, const int& jjz, const int& iatom_div) const { const int iatom = iatom_mod + yi_batch * iatom_div * vector_length; if (iatom >= chunk_size) return; if (jjz >= snaKK.idxz_max) return; snaKK.template compute_yi_with_zlist(iatom, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom, const int& jjz) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom, const int& jjz) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -1108,9 +1108,9 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.template compute_yi_with_zlist(iatom_shift, jjz); } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeYiWithZlist, const int& iatom) const { int iatom_shift = iatom; if constexpr (yi_batch != 1) { const int iatom_div = iatom / vector_length; @@ -1129,10 +1129,10 @@ void PairSNAPKokkos::operator() (TagPairSN staging. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjSmall,const typename Kokkos::TeamPolicy >::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjSmall,const typename Kokkos::TeamPolicy >::member_type& team) const { // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj; @@ -1157,10 +1157,10 @@ void PairSNAPKokkos::operator() (TagPairSN } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjLarge,const typename Kokkos::TeamPolicy >::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjLarge,const typename Kokkos::TeamPolicy >::member_type& team) const { // extract flattened atom_div / neighbor number / bend location int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj; @@ -1184,9 +1184,9 @@ void PairSNAPKokkos::operator() (TagPairSN /* These are super-fused routines that handle all directions at once */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjAllSmall, +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjAllSmall, const typename Kokkos::TeamPolicy::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -1212,9 +1212,9 @@ void PairSNAPKokkos::operator() (TagPairSN } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjAllLarge, +void PairSNAPKokkos::operator() (TagPairSNAPComputeFusedDeidrjAllLarge, const typename Kokkos::TeamPolicy::member_type& team) const { // extract flattened atom_div / neighbor number / bend location @@ -1242,9 +1242,9 @@ void PairSNAPKokkos::operator() (TagPairSN "dulist". CPU only. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); @@ -1252,18 +1252,18 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.compute_duidrj_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom, const int& jnbor) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom, const int& jnbor) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); if (jnbor >= ninside) return; snaKK.compute_duidrj_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); for (int jnbor = 0; jnbor < ninside; jnbor++) @@ -1276,9 +1276,9 @@ void PairSNAPKokkos::operator() (TagPairSN of the Wigner matrices U. CPU only. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom_mod, const int& jnbor, const int& iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); @@ -1286,18 +1286,18 @@ void PairSNAPKokkos::operator() (TagPairSN snaKK.compute_deidrj_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom, const int& jnbor) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom, const int& jnbor) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); if (jnbor >= ninside) return; snaKK.compute_deidrj_cpu(iatom, jnbor); } -template +template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom) const { if (iatom >= chunk_size) return; const int ninside = d_ninside(iatom); for (int jnbor = 0; jnbor < ninside; jnbor++) @@ -1310,10 +1310,10 @@ void PairSNAPKokkos::operator() (TagPairSN also accumulates the total energy and the virial. CPU and GPU. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeForce, const int& ii, EV_FLOAT& ev) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce, const int& ii, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); @@ -1326,11 +1326,13 @@ void PairSNAPKokkos::operator() (TagPairSN for (int jj = 0; jj < ninside; jj++) { int j = snaKK.inside(ii,jj); - double fij[3]; + accum_type fij[3]; fij[0] = snaKK.dedr(ii,jj,0); fij[1] = snaKK.dedr(ii,jj,1); fij[2] = snaKK.dedr(ii,jj,2); + // in practice KK_ACC_FLOAT is the same as accum_type, so there is no need for an + // explicit cast to a_f's type (KK_ACC_FLOAT). a_f(i,0) += fij[0]; a_f(i,1) += fij[1]; a_f(i,2) += fij[2]; @@ -1359,7 +1361,7 @@ void PairSNAPKokkos::operator() (TagPairSN // evdwl = energy of atom I, sum over coeffs_k * Bi_k - auto evdwl = d_coeffi[0]; + accum_type evdwl = static_cast(d_coeffi[0]); // E = beta.B + 0.5*B^t.alpha.B @@ -1370,7 +1372,7 @@ void PairSNAPKokkos::operator() (TagPairSN for (int icoeff = 0; icoeff < ncoeff; icoeff++) { const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; - evdwl += d_coeffi[icoeff+1]*snaKK.blist(ii,idx_chem,idxb); + evdwl += static_cast(d_coeffi[icoeff+1]*snaKK.blist(ii,idx_chem,idxb)); } // quadratic contributions @@ -1380,73 +1382,64 @@ void PairSNAPKokkos::operator() (TagPairSN const auto idxb = icoeff % idxb_max; const auto idx_chem = icoeff / idxb_max; real_type bveci = snaKK.blist(ii,idx_chem,idxb); - evdwl += 0.5*d_coeffi[k++]*bveci*bveci; + evdwl += static_cast(static_cast(0.5)*d_coeffi[k++]*bveci*bveci); for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { auto jdxb = jcoeff % idxb_max; auto jdx_chem = jcoeff / idxb_max; auto bvecj = snaKK.blist(ii,jdx_chem,jdxb); - evdwl += d_coeffi[k++]*bveci*bvecj; + evdwl += static_cast(d_coeffi[k++]*bveci*bvecj); } } } //ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); + // in practice KK_ACC_FLOAT is the same as accum_type, so there is no need for an + // explicit cast to ev.evdwl or d_eatom[i]'s type (KK_ACC_FLOAT). if (eflag_global) ev.evdwl += evdwl; if (eflag_atom) d_eatom[i] += evdwl; } } } -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const int& ii) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const int& ii) const { EV_FLOAT ev; this->template operator()(TagPairSNAPComputeForce(), ii, ev); } /* ---------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, - const double &fx, const double &fy, const double &fz, - const double &delx, const double &dely, const double &delz) const +void PairSNAPKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, + const accum_type &fx, const accum_type &fy, const accum_type &fz, + const real_type &delx, const real_type &dely, const real_type &delz) const { // The vatom array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); - const double v0 = delx*fx; - const double v1 = dely*fy; - const double v2 = delz*fz; - const double v3 = delx*fy; - const double v4 = delx*fz; - const double v5 = dely*fz; + const accum_type v[6] = + { static_cast(delx)*fx, + static_cast(dely)*fy, + static_cast(delz)*fz, + static_cast(delx)*fy, + static_cast(delx)*fz, + static_cast(dely)*fz }; if (vflag_global) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; + for (int n = 0; n < 6; n++) + ev.v[n] += v[n]; } if (vflag_atom) { - a_vatom(i,0) += 0.5*v0; - a_vatom(i,1) += 0.5*v1; - a_vatom(i,2) += 0.5*v2; - a_vatom(i,3) += 0.5*v3; - a_vatom(i,4) += 0.5*v4; - a_vatom(i,5) += 0.5*v5; - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; + for (int n = 0; n < 6; n++) { + a_vatom(i,n) += static_cast(0.5)*v[n]; + a_vatom(j,n) += static_cast(0.5)*v[n]; + } } } @@ -1454,8 +1447,8 @@ void PairSNAPKokkos::v_tally_xyz(EV_FLOAT memory usage ------------------------------------------------------------------------- */ -template -double PairSNAPKokkos::memory_usage() +template +double PairSNAPKokkos::memory_usage() { double bytes = Pair::memory_usage(); bytes += MemKK::memory_usage(d_beta); @@ -1469,9 +1462,9 @@ double PairSNAPKokkos::memory_usage() return bytes; } -template +template template -int PairSNAPKokkos::scratch_size_helper(int values_per_team) { +int PairSNAPKokkos::scratch_size_helper(int values_per_team) { typedef Kokkos::View > ScratchViewType; return ScratchViewType::shmem_size(values_per_team); @@ -1483,7 +1476,7 @@ int PairSNAPKokkos::scratch_size_helper(in template PairSNAPKokkosDevice::PairSNAPKokkosDevice(class LAMMPS *lmp) - : PairSNAPKokkos(lmp) { ; } + : PairSNAPKokkos(lmp) { ; } template void PairSNAPKokkosDevice::coeff(int narg, char **arg) @@ -1518,7 +1511,7 @@ double PairSNAPKokkosDevice::memory_usage() #ifdef LMP_KOKKOS_GPU template PairSNAPKokkosHost::PairSNAPKokkosHost(class LAMMPS *lmp) - : PairSNAPKokkos(lmp) { ; } + : PairSNAPKokkos(lmp) { ; } template void PairSNAPKokkosHost::coeff(int narg, char **arg) diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 340d30764c9..b23d2c709eb 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { // copied from pair_snap_kokkos.h // pre-declare so sna_kokkos.h can refer to it -template class PairSNAPKokkos; +template class PairSNAPKokkos; // This class acts as a shared memory backing to what otherwise looks like a // per-thread register array. It is specialized to complex numbers, and automatically @@ -182,11 +182,12 @@ struct alignas(16) idxz_struct { }; -template +template class SNAKokkos { public: using real_type = real_type_; + using accum_type = accum_type_; using complex = SNAComplex; static constexpr int vector_length = vector_length_; @@ -211,6 +212,8 @@ class SNAKokkos { typedef Kokkos::View t_sna_3d3; typedef Kokkos::View t_sna_5d; + typedef Kokkos::View t_sna_accum_3d; + typedef Kokkos::View t_sna_1c; typedef Kokkos::View> t_sna_1c_atomic; typedef Kokkos::View t_sna_2c; @@ -228,7 +231,7 @@ class SNAKokkos { SNAKokkos() {}; KOKKOS_INLINE_FUNCTION - SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); + SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); template inline @@ -359,7 +362,7 @@ class SNAKokkos { t_sna_2d sinnerij; t_sna_2d dinnerij; t_sna_2i element; - t_sna_3d dedr; + t_sna_accum_3d dedr; int natom, natom_pad, nmax; void grow_rij(int newnatom, int newnmax, int padding_factor = 1); diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index aa889d6b2fb..93d52e6459f 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -29,14 +29,15 @@ namespace LAMMPS_NS { static const double MY_PI = 3.14159265358979323846; // pi static const double MY_PI2 = 1.57079632679489661923; // pi/2 -template +template template inline -SNAKokkos::SNAKokkos(const CopyClass& copy) - : twojmax(copy.twojmax), d_coeffelem(copy.d_coeffelem), rmin0(copy.rmin0), - rfac0(copy.rfac0), switch_flag(copy.switchflag), switch_inner_flag(copy.switchinnerflag), +SNAKokkos::SNAKokkos(const CopyClass& copy) + : twojmax(copy.twojmax), d_coeffelem(copy.d_coeffelem), rmin0(static_cast(copy.rmin0)), + rfac0(static_cast(copy.rfac0)), legacy_on_gpu(copy.legacy_on_gpu), switch_flag(copy.switchflag), + switch_inner_flag(copy.switchinnerflag), chem_flag(copy.chemflag), bnorm_flag(copy.bnormflag), wselfall_flag(copy.wselfallflag), - quadratic_flag(copy.quadraticflag), bzero_flag(copy.bzeroflag), legacy_on_gpu(copy.legacy_on_gpu) + quadratic_flag(copy.quadraticflag), bzero_flag(copy.bzeroflag) { wself = static_cast(1.0); @@ -62,27 +63,27 @@ SNAKokkos::SNAKokkos(const CopyClass& copy MemKK::realloc_kokkos(bzero,"sna:bzero",twojmax+1); auto h_bzero = Kokkos::create_mirror_view(bzero); - double www = wself*wself*wself; + real_type www = wself*wself*wself; for (int j = 0; j <= twojmax; j++) if (bnorm_flag) h_bzero[j] = www; else - h_bzero[j] = www*(j+1); + h_bzero[j] = www * static_cast(j+1); Kokkos::deep_copy(bzero,h_bzero); } } /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -SNAKokkos::~SNAKokkos() +SNAKokkos::~SNAKokkos() { } -template +template inline -void SNAKokkos::build_indexlist() +void SNAKokkos::build_indexlist() { // index list for cglist @@ -278,17 +279,17 @@ void SNAKokkos::build_indexlist() /* ---------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init() +void SNAKokkos::init() { init_clebsch_gordan(); init_rootpqarray(); } -template +template inline -void SNAKokkos::grow_rij(int newnatom, int newnmax, int padding_factor) +void SNAKokkos::grow_rij(int newnatom, int newnmax, int padding_factor) { if (newnatom <= natom && newnmax <= nmax) return; natom = newnatom; @@ -347,9 +348,9 @@ void SNAKokkos::grow_rij(int newnatom, int ComputeFusedDeidrj, which are one warp per atom-neighbor pair. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_cayley_klein(const int& iatom, const int& jnbor) const +void SNAKokkos::compute_cayley_klein(const int& iatom, const int& jnbor) const { const real_type x = rij(iatom,jnbor,0); const real_type y = rij(iatom,jnbor,1); @@ -423,9 +424,9 @@ void SNAKokkos::compute_cayley_klein(const // we need to explicitly zero `dedr` somewhere before hitting // ComputeFusedDeidrj --- this is just a convenient place to do it. - dedr(iatom, jnbor, 0) = static_cast(0.); - dedr(iatom, jnbor, 1) = static_cast(0.); - dedr(iatom, jnbor, 2) = static_cast(0.); + dedr(iatom, jnbor, 0) = 0; + dedr(iatom, jnbor, 1) = 0; + dedr(iatom, jnbor, 2) = 0; } @@ -435,9 +436,9 @@ void SNAKokkos::compute_cayley_klein(const advantage of the symmetry of the Wigner U matrices. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::pre_ui(const int& iatom, const int& j, const int& ielem) const +void SNAKokkos::pre_ui(const int& iatom, const int& j, const int& ielem) const { for (int jelem = 0; jelem < nelements; jelem++) { int jju_half = idxu_half_block(j); @@ -467,10 +468,10 @@ void SNAKokkos::pre_ui(const int& iatom, c // Version of the code that exposes additional parallelism by threading over `j_bend` values -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui_small(const typename Kokkos::TeamPolicy::member_type& team, +void SNAKokkos::compute_ui_small(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int first_jnbor, const int ninside, const int iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; @@ -520,10 +521,10 @@ void SNAKokkos::compute_ui_small(const typ // Version of the code that loops over all `j_bend` values which reduces integer arithmetic // and some amount of load imbalance, at the expense of reducing parallelism -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui_large(const typename Kokkos::TeamPolicy::member_type& team, +void SNAKokkos::compute_ui_large(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int first_jnbor, const int ninside, const int iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; @@ -582,10 +583,10 @@ void SNAKokkos::compute_ui_large(const typ } // Core "evaluation" kernel that gets reused in `compute_ui_small` and `compute_ui_large` -template +template template KOKKOS_FORCEINLINE_FUNCTION -void SNAKokkos::evaluate_ui_jbend( +void SNAKokkos::evaluate_ui_jbend( const MultiWignerWrapper& ulist_wrapper, const Kokkos::Array& a, const Kokkos::Array& b, @@ -739,9 +740,9 @@ void SNAKokkos::evaluate_ui_jbend( which still keeps the recursive calculation simple. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui_cpu(const int& iatom, const int& jnbor) const +void SNAKokkos::compute_ui_cpu(const int& iatom, const int& jnbor) const { // utot(j,ma,mb) = 0 for all j,ma,ma // utot(j,ma,ma) = 1 for all j,ma @@ -755,7 +756,7 @@ void SNAKokkos::compute_ui_cpu(const int& const real_type rsq = x * x + y * y + z * z; const real_type r = sqrt(rsq); - const real_type theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0); + const real_type theta0 = (r - rmin0) * rfac0 * static_cast(MY_PI) / (rcutij(iatom,jnbor) - rmin0); // theta0 = (r - rmin0) * rscale0; const real_type z0 = r / tan(theta0); @@ -863,9 +864,9 @@ void SNAKokkos::compute_ui_cpu(const int& structure, fused in with zeroing ylist ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::transform_ui(const int& iatom, const int& idxu) const +void SNAKokkos::transform_ui(const int& iatom, const int& idxu) const { int elem_count = chem_flag ? nelements : 1; @@ -895,9 +896,9 @@ void SNAKokkos::transform_ui(const int& ia compute Zi by summing over products of Ui ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_zi(const int& iatom, const int& jjz) const +void SNAKokkos::compute_zi(const int& iatom, const int& jjz) const { int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, idxcg; idxz(jjz).get_zi(j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, idxcg); @@ -930,9 +931,9 @@ void SNAKokkos::compute_zi(const int& iato This gets used in both `compute_zi` and `compute_yi` ------------------------------------------------------------------------- */ -template +template template KOKKOS_FORCEINLINE_FUNCTION -auto SNAKokkos::evaluate_zi(const int& j1, const int& j2, const int& j, +auto SNAKokkos::evaluate_zi(const int& j1, const int& j2, const int& j, const int& ma1min, const int& ma2max, const int& mb1min, const int& mb2max, const int& na, const int& nb, const int& iatom, const int& elem1, const int& elem2, const real_type* cgblock) const { Kokkos::Array zval; @@ -992,9 +993,9 @@ auto SNAKokkos::evaluate_zi(const int& j1, divergence. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_bi(const int& iatom, const int& jjb) const +void SNAKokkos::compute_bi(const int& iatom, const int& jjb) const { // for j1 = 0,...,twojmax // for j2 = 0,twojmax @@ -1038,9 +1039,9 @@ void SNAKokkos::compute_bi(const int& iato This gets used in `compute_bi` ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -auto SNAKokkos::evaluate_bi(const int& j, const int& jjz, const int& jju, const int& iatom, const int& elem1, const int& elem2, const int& elem3) const +auto SNAKokkos::evaluate_bi(const int& j, const int& jjz, const int& jju, const int& iatom, const int& elem1, const int& elem2, const int& elem3) const { // this computes the: // b(j1,j2,j) = 0 @@ -1051,7 +1052,9 @@ auto SNAKokkos::evaluate_bi(const int& j, // portion const int idouble = elem1 * nelements + elem2; - Kokkos::Array bval; + + // For mixed precision runs, accumulate bval in a higher precision + Kokkos::Array bval; register_loop([&] (int n) -> void { bval[n] = 0; }); @@ -1063,8 +1066,8 @@ auto SNAKokkos::evaluate_bi(const int& j, register_loop([&] (int n) -> void { const complex utot = ulisttot(iatom + n * vector_length, elem3, jju_index); const complex zloc = zlist(iatom + n * vector_length, idouble, jjz_index); - bval[n] += utot.re * zloc.re; - bval[n] += utot.im * zloc.im; + bval[n] += static_cast(utot.re * zloc.re); + bval[n] += static_cast(utot.im * zloc.im); }); jju_index++; jjz_index++; @@ -1073,11 +1076,6 @@ auto SNAKokkos::evaluate_bi(const int& j, // For j even, special treatment for middle column if (j % 2 == 0) { - Kokkos::Array btmp; - register_loop([&] (int n) -> void { - btmp[n] = 0; - }); - const int mb = j / 2; int jju_index = jju + mb * (j + 1); int jjz_index = jjz + mb * (j + 1); @@ -1085,40 +1083,45 @@ auto SNAKokkos::evaluate_bi(const int& j, register_loop([&] (int n) -> void { const complex utot = ulisttot(iatom + n * vector_length, elem3, jju_index); const complex zloc = zlist(iatom + n * vector_length, idouble, jjz_index); - btmp[n] += utot.re * zloc.re; - btmp[n] += utot.im * zloc.im; + bval[n] += static_cast(utot.re * zloc.re); + bval[n] += static_cast(utot.im * zloc.im); }); jju_index++; jjz_index++; } - register_loop([&] (int n) -> void { - bval[n] += btmp[n]; - }); - // const int ma = mb; // const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; // const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; register_loop([&] (int n) -> void { const complex utot = ulisttot(iatom + n * vector_length, elem3, jju_index); const complex zloc = zlist(iatom + n * vector_length, idouble, jjz_index); - bval[n] += static_cast(0.5) * (utot.re * zloc.re + utot.im * zloc.im); + bval[n] += static_cast(static_cast(0.5) * (utot.re * zloc.re + utot.im * zloc.im)); }); } // end if jeven register_loop([&] (int n) -> void { - bval[n] *= static_cast(2.0); + bval[n] *= static_cast(2.0); if (bzero_flag) { if (!wselfall_flag) { if (elem1 == elem2 && elem1 == elem3) { - bval[n] -= bzero[j]; + bval[n] -= static_cast(bzero[j]); } } else { - bval[n] -= bzero[j]; + bval[n] -= static_cast(bzero[j]); } } }); - return bval; + + if constexpr (std::is_same_v) { + return bval; + } else { + Kokkos::Array bval_reduced; + register_loop([&] (int n) -> void { + bval_reduced[n] = static_cast(bval[n]); + }); + return bval_reduced; + } //} // end loop over j //} // end loop over j1, j2 } @@ -1129,9 +1132,9 @@ auto SNAKokkos::evaluate_bi(const int& j, or accumulating the quadratic terms from blist ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_beta_linear(const int& iatom, const int& idxb, const int& ielem) const +void SNAKokkos::compute_beta_linear(const int& iatom, const int& idxb, const int& ielem) const { auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); @@ -1141,10 +1144,10 @@ void SNAKokkos::compute_beta_linear(const } } -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_beta_quadratic(const int& iatom, const int& idxb, const int& ielem) const +void SNAKokkos::compute_beta_quadratic(const int& iatom, const int& idxb, const int& ielem) const { auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL); @@ -1203,10 +1206,10 @@ void SNAKokkos::compute_beta_quadratic(con Compute Yi from Ui without storing Zi, looping over zlist indices. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_yi(const int& iatom, const int& jjz) const +void SNAKokkos::compute_yi(const int& iatom, const int& jjz) const { int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju_half, idxcg; idxz(jjz).get_yi(j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju_half, idxcg); @@ -1263,10 +1266,10 @@ void SNAKokkos::compute_yi(const int& iato compute Yi from Ui with the precomputed Zi. ------------------------------------------------------------------------- */ -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_yi_with_zlist(const int& iatom, const int& jjz) const +void SNAKokkos::compute_yi_with_zlist(const int& iatom, const int& jjz) const { int j1, j2, j, jju_half; idxz(jjz).get_yi_with_zlist(j1, j2, j, jju_half); @@ -1327,9 +1330,9 @@ void SNAKokkos::compute_yi_with_zlist(cons `beta` value which gets used in both `compute_yi` and `compute_yi_from_zlist` ------------------------------------------------------------------------- */ -template +template template KOKKOS_FORCEINLINE_FUNCTION -auto SNAKokkos::evaluate_beta_scaled(const int& j1, const int& j2, const int& j, +auto SNAKokkos::evaluate_beta_scaled(const int& j1, const int& j2, const int& j, const int& iatom, const int& elem1, const int& elem2, const int& elem3) const { int itriple_jjb = 0; @@ -1390,10 +1393,10 @@ auto SNAKokkos::evaluate_beta_scaled(const ------------------------------------------------------------------------- */ // Version of the code that exposes additional parallelism by threading over `j_bend` values -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const +void SNAKokkos::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; // get shared memory offset @@ -1422,21 +1425,21 @@ void SNAKokkos::compute_fused_deidrj_small }); // compute the contribution to dedr_full_sum for one "bend" location - const Kokkos::Array dedr_full_sum = evaluate_duidrj_jbend(ulist_wrapper, a, b, sfac, dulist_wrapper, da, db, dsfacu, + const Kokkos::Array dedr_full_sum = evaluate_duidrj_jbend(ulist_wrapper, a, b, sfac, dulist_wrapper, da, db, dsfacu, jelem, iatom, j_bend); // dedr gets zeroed out at the start of each iteration in compute_cayley_klein register_loop([&] (int d) -> void { - Kokkos::atomic_add(&(dedr(iatom, jnbor, start + d)), static_cast(2.0) * dedr_full_sum[d]); + Kokkos::atomic_add(&(dedr(iatom, jnbor, start + d)), static_cast(2.0) * dedr_full_sum[d]); }); } // Version of the code that loops over all `j_bend` values which reduces integer arithmetic // and some amount of load imbalance, at the expense of reducing parallelism -template +template template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const +void SNAKokkos::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const { const int iatom = iatom_mod + vector_length * iatom_div; // get shared memory offset @@ -1465,13 +1468,13 @@ void SNAKokkos::compute_fused_deidrj_large }); // compute the contributions to dedr_full_sum for all "bend" locations - Kokkos::Array dedr_full_sum; + Kokkos::Array dedr_full_sum; register_loop([&] (int d) -> void { dedr_full_sum[d] = 0; }); for (int j_bend = 0; j_bend <= twojmax; j_bend++) { - const Kokkos::Array accum = evaluate_duidrj_jbend(ulist_wrapper, a, b, sfac, dulist_wrapper, da, db, dsfacu, + const Kokkos::Array accum = evaluate_duidrj_jbend(ulist_wrapper, a, b, sfac, dulist_wrapper, da, db, dsfacu, jelem, iatom, j_bend); register_loop([&] (int d) -> void { dedr_full_sum[d] += accum[d]; @@ -1480,22 +1483,22 @@ void SNAKokkos::compute_fused_deidrj_large // there's one thread per atom, neighbor pair, so no need to make this atomic register_loop([&] (int d) -> void { - dedr(iatom, jnbor, start + d) = static_cast(2.0) * dedr_full_sum[d]; + dedr(iatom, jnbor, start + d) = static_cast(2.0) * dedr_full_sum[d]; }); } // Core "evaluation" kernel that gets reused in `compute_fused_deidrj_small` and // `compute_fused_deidrj_large` -template +template template KOKKOS_INLINE_FUNCTION -auto SNAKokkos::evaluate_duidrj_jbend(const WignerWrapper& ulist_wrapper, +auto SNAKokkos::evaluate_duidrj_jbend(const WignerWrapper& ulist_wrapper, const complex& a, const complex& b, const real_type& sfac, const MultiWignerWrapper& dulist_wrapper, const Kokkos::Array& da, const Kokkos::Array& db, const Kokkos::Array& dsfacu, const int& jelem, const int& iatom, const int& j_bend) const { - Kokkos::Array dedr_full_sum; + Kokkos::Array dedr_full_sum; register_loop([&] (int d) -> void { dedr_full_sum[d] = 0; }); @@ -1600,7 +1603,7 @@ auto SNAKokkos::evaluate_duidrj_jbend(cons // Directly accumulate deidrj into sum_tmp register_loop([&] (int d) -> void { const complex du_prod = (dsfacu[d] * ulist_prev) + (sfac * dulist_prev[d]); - dedr_full_sum[d] += du_prod.re * y_local.re + du_prod.im * y_local.im; + dedr_full_sum[d] += static_cast(du_prod.re * y_local.re + du_prod.im * y_local.im); }); // next value @@ -1640,7 +1643,7 @@ auto SNAKokkos::evaluate_duidrj_jbend(cons // Directly accumulate deidrj into sum_tmp register_loop([&] (int d) -> void { const complex du_prod = (dsfacu[d] * ulist_prev) + (sfac * dulist_prev[d]); - dedr_full_sum[d] += du_prod.re * y_local.re + du_prod.im * y_local.im; + dedr_full_sum[d] += static_cast(du_prod.re * y_local.re + du_prod.im * y_local.im); }); } @@ -1653,9 +1656,9 @@ auto SNAKokkos::evaluate_duidrj_jbend(cons data layout ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duidrj_cpu(const int& iatom, const int& jnbor) const +void SNAKokkos::compute_duidrj_cpu(const int& iatom, const int& jnbor) const { complex da[3], db[3]; real_type u[3], dz0[3], dr0inv[3]; @@ -1674,12 +1677,12 @@ void SNAKokkos::compute_duidrj_cpu(const i // begin what was compute_duarray_cpu - real_type rinv = 1.0 / r; + real_type rinv = static_cast(1.0) / r; u[0] = x * rinv; u[1] = y * rinv; u[2] = z * rinv; - real_type r0inv = 1.0 / sqrt(r * r + z0 * z0); + real_type r0inv = static_cast(1.0) / sqrt(r * r + z0 * z0); complex a = { z0 * r0inv, -z * r0inv }; complex b = { y * r0inv, -x * r0inv }; @@ -1836,11 +1839,11 @@ void SNAKokkos::compute_duidrj_cpu(const i dulist only uses the "half" data layout part of that structure. ------------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_deidrj_cpu(const int& iatom, const int& jnbor) const +void SNAKokkos::compute_deidrj_cpu(const int& iatom, const int& jnbor) const { - real_type force_sum[3] = { 0, 0, 0 }; + accum_type force_sum[3] = { 0, 0, 0 }; const int jelem = element(iatom, jnbor); for (int j = 0; j <= twojmax; j++) { @@ -1851,8 +1854,8 @@ void SNAKokkos::compute_deidrj_cpu(const i for (int ma = 0; ma <= j; ma++) { complex y = { ylist_re(iatom, jelem, jju_half), ylist_im(iatom, jelem, jju_half) }; for (int k = 0; k < 3; k++) - force_sum[k] += dulist_cpu(iatom, jnbor, jju_cache, k).re * y.re + - dulist_cpu(iatom, jnbor, jju_cache, k).im * y.im; + force_sum[k] += static_cast(dulist_cpu(iatom, jnbor, jju_cache, k).re * y.re + + dulist_cpu(iatom, jnbor, jju_cache, k).im * y.im); jju_half++; jju_cache++; } } //end loop over ma mb @@ -1864,8 +1867,8 @@ void SNAKokkos::compute_deidrj_cpu(const i for (int ma = 0; ma <= j; ma++) { complex y = { ylist_re(iatom, jelem, jju_half), ylist_im(iatom, jelem, jju_half) }; for (int k = 0; k < 3; k++) - force_sum[k] += static_cast(0.5) * (dulist_cpu(iatom, jnbor, jju_cache, k).re * y.re + - dulist_cpu(iatom, jnbor, jju_cache, k).im * y.im); + force_sum[k] += static_cast(static_cast(0.5) * (dulist_cpu(iatom, jnbor, jju_cache, k).re * y.re + + dulist_cpu(iatom, jnbor, jju_cache, k).im * y.im)); jju_half++; jju_cache++; } } // end if jeven @@ -1873,16 +1876,16 @@ void SNAKokkos::compute_deidrj_cpu(const i } for (int k = 0; k < 3; k++) - dedr(iatom, jnbor, k) = 2 * force_sum[k]; + dedr(iatom, jnbor, k) = static_cast(2 * force_sum[k]); } /* ---------------------------------------------------------------------- factorial n, wrapper for precomputed table ------------------------------------------------------------------------- */ -template +template inline -double SNAKokkos::factorial(int n) +double SNAKokkos::factorial(int n) { //if (n < 0 || n > nmaxfactorial) { // char str[128]; @@ -1897,8 +1900,8 @@ double SNAKokkos::factorial(int n) factorial n table, size SNA::nmaxfactorial+1 ------------------------------------------------------------------------- */ -template -const double SNAKokkos::nfac_table[] = { +template +const double SNAKokkos::nfac_table[] = { 1, 1, 2, @@ -2073,9 +2076,9 @@ const double SNAKokkos::nfac_table[] = { the function delta given by VMK Eq. 8.2(1) ------------------------------------------------------------------------- */ -template +template inline -double SNAKokkos::deltacg(int j1, int j2, int j) +double SNAKokkos::deltacg(int j1, int j2, int j) { double sfaccg = factorial((j1 + j2 + j) / 2 + 1); return sqrt(factorial((j1 + j2 - j) / 2) * @@ -2088,9 +2091,9 @@ double SNAKokkos::deltacg(int j1, int j2, the quasi-binomial formula VMK 8.2.1(3) ------------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init_clebsch_gordan() +void SNAKokkos::init_clebsch_gordan() { auto h_cglist = Kokkos::create_mirror_view(cglist); @@ -2113,7 +2116,7 @@ void SNAKokkos::init_clebsch_gordan() m = (aa2 + bb2 + j) / 2; if (m < 0 || m > j) { - h_cglist[idxcg_count] = 0.0; + h_cglist[idxcg_count] = 0; idxcg_count++; continue; } @@ -2145,7 +2148,7 @@ void SNAKokkos::init_clebsch_gordan() factorial((j - cc2) / 2) * (j + 1)); - h_cglist[idxcg_count] = sum * dcg * sfaccg; + h_cglist[idxcg_count] = static_cast(sum * dcg * sfaccg); idxcg_count++; } } @@ -2158,9 +2161,9 @@ void SNAKokkos::init_clebsch_gordan() the p = 0, q = 0 entries are allocated and skipped for convenience. ------------------------------------------------------------------------- */ -template +template inline -void SNAKokkos::init_rootpqarray() +void SNAKokkos::init_rootpqarray() { auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray); for (int p = 1; p <= twojmax; p++) @@ -2171,9 +2174,9 @@ void SNAKokkos::init_rootpqarray() /* ---------------------------------------------------------------------- */ -template +template inline -int SNAKokkos::compute_ncoeff() +int SNAKokkos::compute_ncoeff() { int ncount; @@ -2194,9 +2197,9 @@ int SNAKokkos::compute_ncoeff() /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -real_type SNAKokkos::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const +real_type SNAKokkos::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const { real_type sfac_outer; constexpr real_type one = static_cast(1.0); @@ -2227,9 +2230,9 @@ real_type SNAKokkos::compute_sfac(real_typ /* ---------------------------------------------------------------------- */ -template +template KOKKOS_INLINE_FUNCTION -real_type SNAKokkos::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const +real_type SNAKokkos::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const { real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner; constexpr real_type one = static_cast(1.0); @@ -2275,9 +2278,9 @@ real_type SNAKokkos::compute_dsfac(real_ty return zero; // dummy return } -template +template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) const { +void SNAKokkos::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) const { real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner; constexpr real_type one = static_cast(1.0); @@ -2317,8 +2320,8 @@ void SNAKokkos::compute_s_dsfac(const real memory usage of arrays ------------------------------------------------------------------------- */ -template -double SNAKokkos::memory_usage() +template +double SNAKokkos::memory_usage() { double bytes = 0; From 28c32c5a4a430dfb57b756e7daa2a9b882fe4136 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 27 Oct 2025 18:56:40 -0400 Subject: [PATCH 110/604] must not overwrite maxexchange value from atom style. --- src/create_box.cpp | 2 +- src/read_data.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/create_box.cpp b/src/create_box.cpp index c103991b32e..3a8b458e86c 100644 --- a/src/create_box.cpp +++ b/src/create_box.cpp @@ -193,7 +193,7 @@ void CreateBox::command(int narg, char **arg) // process optional args that can overwrite default settings - int maxexchange = 0; + int maxexchange = atom->avec->maxexchange; while (iarg < narg) { if (strcmp(arg[iarg], "bond/types") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "create_box bond/type", error); diff --git a/src/read_data.cpp b/src/read_data.cpp index 391891a13c3..b09e5a984a8 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -710,8 +710,6 @@ void ReadData::command(int narg, char **arg) if (firstpass) { delete lmap; lmap = new LabelMap(lmp, ntypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes); - // reset maxexchange if this not an added data file - if (addflag == NONE) atom->avec->maxexchange = 0; } // ------------------------------------------------------------------------------------- From 07d7b0c29f2c5d20407760d2ad9465b29f9a55c2 Mon Sep 17 00:00:00 2001 From: Michael Lostica Date: Tue, 28 Oct 2025 02:38:52 +0000 Subject: [PATCH 111/604] Fix #4038: Support >2 billion rigid bodies in fix rigid/small Changed nbody from int to bigint and fixed MPI type mismatch. Added unit tests for bigint type capacity and MPI communication. --- src/RIGID/fix_rigid_small.cpp | 4 +- src/RIGID/fix_rigid_small.h | 2 +- unittest/utils/CMakeLists.txt | 13 ++ unittest/utils/test_nbody_bigint.cpp | 131 ++++++++++++++++++ unittest/utils/test_nbody_mpi.cpp | 199 +++++++++++++++++++++++++++ 5 files changed, 346 insertions(+), 3 deletions(-) create mode 100644 unittest/utils/test_nbody_bigint.cpp create mode 100644 unittest/utils/test_nbody_mpi.cpp diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 0df71543954..33a2f06cd9d 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -438,13 +438,13 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // print statistics - int one = 0; + bigint one = 0; bigint atomone = 0; for (i = 0; i < nlocal; i++) { if (bodyown[i] >= 0) one++; if (bodytag[i] > 0) atomone++; } - MPI_Allreduce(&one,&nbody,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&one,&nbody,1,MPI_LMP_BIGINT,MPI_SUM,world); bigint atomall; MPI_Allreduce(&atomone,&atomall,1,MPI_LMP_BIGINT,MPI_SUM,world); diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index c1769129988..938446b9891 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -78,7 +78,7 @@ class FixRigidSmall : public Fix { int earlyflag; // 1 if forces/torques are computed at post_force() int commflag; // various modes of forward/reverse comm int customflag; // 1 if custom property/variable define bodies - int nbody; // total # of rigid bodies + bigint nbody; // total # of rigid bodies (supports >2^31) int nlinear; // total # of linear rigid bodies tagint maxmol; // max mol-ID double maxextent; // furthest distance from body owner to body atom diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt index 143eb8edb32..b9ad41365f6 100644 --- a/unittest/utils/CMakeLists.txt +++ b/unittest/utils/CMakeLists.txt @@ -12,6 +12,19 @@ add_executable(test_lmptype test_lmptype.cpp) target_link_libraries(test_lmptype PRIVATE lammps GTest::GMockMain) add_test(NAME LmpType COMMAND test_lmptype) +# Test nbody bigint type capacity (>INT_MAX values) +add_executable(test_nbody_bigint test_nbody_bigint.cpp) +target_link_libraries(test_nbody_bigint PRIVATE lammps GTest::GMockMain) +add_test(NAME NbodyBigint COMMAND test_nbody_bigint) + +# Test nbody MPI communication with large values (requires MPI) +if(BUILD_MPI) + add_executable(test_nbody_mpi test_nbody_mpi.cpp) + target_link_libraries(test_nbody_mpi PRIVATE lammps GTest::GTest MPI::MPI_CXX) + add_mpi_test(NAME NbodyMPI NUM_PROCS 4 COMMAND $) + set_tests_properties(NbodyMPI PROPERTIES LABELS "mpi") +endif() + add_executable(test_argutils test_argutils.cpp) target_link_libraries(test_argutils PRIVATE lammps GTest::GMockMain) add_test(NAME ArgUtils COMMAND test_argutils) diff --git a/unittest/utils/test_nbody_bigint.cpp b/unittest/utils/test_nbody_bigint.cpp new file mode 100644 index 00000000000..dcb69ec199d --- /dev/null +++ b/unittest/utils/test_nbody_bigint.cpp @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// Unit tests for nbody bigint type capacity +// Tests that bigint can handle values > INT_MAX without allocating actual rigid bodies + +#include "lmptype.h" +#include "gtest/gtest.h" + +#include +#include + +using namespace LAMMPS_NS; + +namespace { + +// Test that bigint can store values > INT_MAX +TEST(NbodyBigint, CanStoreLargeValues) +{ + // INT_MAX = 2,147,483,647 + bigint nbody = static_cast(INT_MAX) + 1; + EXPECT_GT(nbody, INT_MAX); + EXPECT_EQ(nbody, 2147483648LL); + + // Test much larger value (3 billion) + nbody = 3000000000LL; + EXPECT_GT(nbody, INT_MAX); + EXPECT_EQ(nbody, 3000000000LL); +} + +// Test arithmetic operations used in fix_rigid_small.cpp:3640 +// double tfactor = force->mvv2e / ((6.0*nbody - nlinear) * force->boltz); +TEST(NbodyBigint, ArithmeticOperations) +{ + bigint nbody = static_cast(INT_MAX) + 1000; + int nlinear = 5; + + // Test the actual expression from the code + double result = 6.0 * nbody - nlinear; + + // Expected: 6 * 2147484647 - 5 = 12,884,907,877 + double expected = 6.0 * (static_cast(INT_MAX) + 1000) - 5.0; + EXPECT_DOUBLE_EQ(result, expected); + EXPECT_GT(result, 6.0 * INT_MAX); // Verify it exceeds int range +} + +// Test increment operation (used in fix_rigid_small.cpp:2853) +TEST(NbodyBigint, IncrementOperation) +{ + bigint nbody = static_cast(INT_MAX) + 100; + bigint original = nbody; + + nbody++; + EXPECT_EQ(nbody, original + 1); + EXPECT_EQ(nbody, static_cast(INT_MAX) + 101); +} + +// Test comparison operations +TEST(NbodyBigint, ComparisonOperations) +{ + bigint nbody_large = static_cast(INT_MAX) + 1; + bigint nbody_zero = 0; + int int_max_value = INT_MAX; + + EXPECT_GT(nbody_large, 0); + EXPECT_GT(nbody_large, int_max_value); + EXPECT_NE(nbody_large, int_max_value); + EXPECT_EQ(nbody_zero, 0); +} + +// Test boundary values +TEST(NbodyBigint, BoundaryValues) +{ + bigint at_limit = INT_MAX; + bigint over_limit = static_cast(INT_MAX) + 1; + bigint way_over = static_cast(INT_MAX) * 2; + + EXPECT_EQ(at_limit, 2147483647); + EXPECT_EQ(over_limit, 2147483648LL); + EXPECT_EQ(way_over, 4294967294LL); + + // Verify they're all different + EXPECT_NE(at_limit, over_limit); + EXPECT_NE(over_limit, way_over); + EXPECT_LT(at_limit, over_limit); + EXPECT_LT(over_limit, way_over); +} + +// Test that calculations don't overflow when converting to double +TEST(NbodyBigint, DoubleConversion) +{ + bigint nbody = 3000000000LL; // 3 billion + + // This conversion happens in compute_scalar() for temperature calculation + double as_double = static_cast(nbody); + EXPECT_DOUBLE_EQ(as_double, 3000000000.0); + + // Test arithmetic after conversion + double result = 6.0 * as_double; + EXPECT_DOUBLE_EQ(result, 18000000000.0); // 18 billion +} + +// Test realistic scenario: 2.5 billion rigid bodies +TEST(NbodyBigint, RealisticLargeScale) +{ + bigint nbody = 2500000000LL; // 2.5 billion bodies + int nlinear = 1000; // Some linear bodies + + // Simulate the DOF calculation from compute_scalar() + double dof_removed = 6.0 * nbody - nlinear; + + EXPECT_GT(dof_removed, 0.0); + EXPECT_DOUBLE_EQ(dof_removed, 15000000000.0 - 1000.0); + + // Verify we can do division (for temperature calculation) + double temperature_factor = 1.0 / dof_removed; + EXPECT_GT(temperature_factor, 0.0); + EXPECT_LT(temperature_factor, 1.0); +} + +} // namespace diff --git a/unittest/utils/test_nbody_mpi.cpp b/unittest/utils/test_nbody_mpi.cpp new file mode 100644 index 00000000000..74a0555ae40 --- /dev/null +++ b/unittest/utils/test_nbody_mpi.cpp @@ -0,0 +1,199 @@ +// Unit tests for nbody MPI communication with large values +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +// Unit tests for nbody MPI communication with large values +// Tests that MPI_Allreduce with MPI_LMP_BIGINT works correctly for values > INT_MAX + +#include "lmptype.h" +#include "gtest/gtest.h" + +#include +#include + +using namespace LAMMPS_NS; + +// MPI environment setup +class MPIEnvironment : public ::testing::Environment { +public: + void SetUp() override { + int flag; + MPI_Initialized(&flag); + if (!flag) { + int argc = 0; + char **argv = nullptr; + MPI_Init(&argc, &argv); + } + } + + void TearDown() override { + int flag; + MPI_Finalized(&flag); + if (!flag) { + MPI_Finalize(); + } + } +}; + +namespace { + +// Test MPI_Allreduce with MPI_LMP_BIGINT (validates fix_rigid_small.cpp:447) +TEST(NbodyMPI, AllreduceSumBasic) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Each rank contributes 1 (simulates counting local bodies) + bigint local_count = 1; + bigint global_count = 0; + + MPI_Allreduce(&local_count, &global_count, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + EXPECT_EQ(global_count, nprocs); +} + +// Test with large values that would overflow int +TEST(NbodyMPI, AllreduceLargeValues) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Each rank contributes a value that when summed exceeds INT_MAX + // Use INT_MAX/nprocs + 1 to ensure total > INT_MAX + bigint local_nbody = static_cast(INT_MAX) / nprocs + 1; + bigint global_nbody = 0; + + MPI_Allreduce(&local_nbody, &global_nbody, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + // Verify sum exceeds INT_MAX + EXPECT_GT(global_nbody, INT_MAX); + + // Verify sum is correct + bigint expected = (static_cast(INT_MAX) / nprocs + 1) * nprocs; + EXPECT_EQ(global_nbody, expected); +} + +// Test boundary case at INT_MAX +TEST(NbodyMPI, AllreduceBoundary) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Rank 0 contributes INT_MAX, others contribute 1 + bigint local_nbody = (rank == 0) ? INT_MAX : 1; + bigint global_nbody = 0; + + MPI_Allreduce(&local_nbody, &global_nbody, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + bigint expected = static_cast(INT_MAX) + (nprocs - 1); + EXPECT_EQ(global_nbody, expected); + EXPECT_GT(global_nbody, INT_MAX); +} + +// Test very large values (billions) +TEST(NbodyMPI, AllreduceVeryLarge) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Each rank contributes 1 billion + bigint local_nbody = 1000000000LL; + bigint global_nbody = 0; + + MPI_Allreduce(&local_nbody, &global_nbody, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + bigint expected = 1000000000LL * nprocs; + EXPECT_EQ(global_nbody, expected); + + // With 4 ranks, total should be 4 billion + if (nprocs >= 4) { + EXPECT_GE(global_nbody, 4000000000LL); + } +} + +// Test that the actual pattern from fix_rigid_small.cpp works +// This mimics lines 441-447 of the fixed code +TEST(NbodyMPI, ActualCodePattern) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Simulate the actual code pattern + bigint one = 0; // Changed from int to bigint (our fix!) + + // Simulate counting bodies (each rank has some) + int nlocal_body = rank + 1; // Rank 0 has 1, rank 1 has 2, etc. + for (int i = 0; i < nlocal_body; i++) { + one++; + } + + bigint nbody = 0; + MPI_Allreduce(&one, &nbody, 1, MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + // Total should be sum of 1+2+3+...+nprocs = nprocs*(nprocs+1)/2 + bigint expected = static_cast(nprocs) * (nprocs + 1) / 2; + EXPECT_EQ(nbody, expected); +} + +// Test zero values (edge case) +TEST(NbodyMPI, AllreduceZero) +{ + bigint local_nbody = 0; + bigint global_nbody = 0; + + MPI_Allreduce(&local_nbody, &global_nbody, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + EXPECT_EQ(global_nbody, 0); +} + +// Test asymmetric distribution (realistic scenario) +TEST(NbodyMPI, AllreduceAsymmetric) +{ + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Rank 0 has most bodies, others have few + bigint local_nbody; + if (rank == 0) { + local_nbody = static_cast(INT_MAX) + 1000; + } else { + local_nbody = 100; + } + + bigint global_nbody = 0; + MPI_Allreduce(&local_nbody, &global_nbody, 1, + MPI_LMP_BIGINT, MPI_SUM, MPI_COMM_WORLD); + + bigint expected = static_cast(INT_MAX) + 1000 + 100 * (nprocs - 1); + EXPECT_EQ(global_nbody, expected); + EXPECT_GT(global_nbody, INT_MAX); +} + +} // namespace + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + ::testing::AddGlobalTestEnvironment(new MPIEnvironment); + return RUN_ALL_TESTS(); +} From d300cb84ec6e11436debdb0fdad00c087ef7e18f Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 28 Oct 2025 08:46:39 -0600 Subject: [PATCH 112/604] Remove unused atom masks and make atom masks 64 bits --- src/KOKKOS/atom_kokkos.cpp | 6 +- src/KOKKOS/atom_kokkos.h | 6 +- src/KOKKOS/atom_vec_angle_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_angle_kokkos.h | 6 +- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_atomic_kokkos.h | 6 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_bond_kokkos.h | 6 +- src/KOKKOS/atom_vec_charge_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_charge_kokkos.h | 6 +- src/KOKKOS/atom_vec_dipole_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_dipole_kokkos.h | 6 +- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_dpd_kokkos.h | 6 +- src/KOKKOS/atom_vec_full_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_full_kokkos.h | 6 +- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 66 ++++++------- src/KOKKOS/atom_vec_hybrid_kokkos.h | 6 +- src/KOKKOS/atom_vec_kokkos.cpp | 53 ++++++++++- src/KOKKOS/atom_vec_kokkos.h | 8 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_molecular_kokkos.h | 6 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_sphere_kokkos.h | 6 +- src/KOKKOS/atom_vec_spin_kokkos.cpp | 6 +- src/KOKKOS/atom_vec_spin_kokkos.h | 6 +- src/KOKKOS/dynamical_matrix_kokkos.cpp | 2 +- src/KOKKOS/fix_property_atom_kokkos.cpp | 6 +- src/KOKKOS/fix_property_atom_kokkos.h | 6 +- src/KOKKOS/mliap_data_kokkos.cpp | 4 +- src/KOKKOS/mliap_data_kokkos.h | 4 +- src/KOKKOS/neigh_bond_kokkos.h | 2 +- src/KOKKOS/third_order_kokkos.cpp | 2 +- src/KOKKOS/verlet_kokkos.cpp | 4 +- src/atom_masks.h | 112 ++++++++--------------- 35 files changed, 211 insertions(+), 196 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index ca50f382cb3..d9d85e2a3e7 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -152,7 +152,7 @@ void AtomKokkos::update_property_atom() /* ---------------------------------------------------------------------- */ -void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) +void AtomKokkos::sync(const ExecutionSpace space, uint64_t mask) { if ((space == Device || space == HostKK) && lmp->kokkos->auto_sync) { @@ -171,7 +171,7 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) +void AtomKokkos::modified(const ExecutionSpace space, uint64_t mask) { avecKK->modified(space, mask); for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask); @@ -184,7 +184,7 @@ void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomKokkos::sync_pinned(const ExecutionSpace space, unsigned int mask, int async_flag) +void AtomKokkos::sync_pinned(const ExecutionSpace space, uint64_t mask, int async_flag) { avecKK->sync_pinned(space, mask, async_flag); for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_pinned(space, mask, async_flag); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 92c56b32ed3..47587521619 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -161,9 +161,9 @@ class AtomKokkos : public Atom { void init() override; void update_property_atom(); void allocate_type_arrays() override; - void sync(const ExecutionSpace space, unsigned int mask); - void modified(const ExecutionSpace space, unsigned int mask); - void sync_pinned(const ExecutionSpace space, unsigned int mask, int async_flag = 0); + void sync(const ExecutionSpace space, uint64_t mask); + void modified(const ExecutionSpace space, uint64_t mask); + void sync_pinned(const ExecutionSpace space, uint64_t mask, int async_flag = 0); void sort() override; int add_custom(const char *, int, int, int border = 0) override; void remove_custom(int, int, int) override; diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 6047996046f..15a66b579bb 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -662,7 +662,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, i /* ---------------------------------------------------------------------- */ -void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecAngleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -744,7 +744,7 @@ void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -837,7 +837,7 @@ void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, in /* ---------------------------------------------------------------------- */ -void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecAngleKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 73fb5770a9c..783cd2c9a4a 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -50,9 +50,9 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: tagint *molecule; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 20625a7e461..7917c019a3b 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -474,7 +474,7 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, /* ---------------------------------------------------------------------- */ -void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecAtomicKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -505,7 +505,7 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -542,7 +542,7 @@ void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i /* ---------------------------------------------------------------------- */ -void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecAtomicKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 172c84cf0d4..551aa5364ce 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -51,9 +51,9 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: DAT::t_tagint_1d d_tag; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 91a3ff4a4a1..5c85292f0f3 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -602,7 +602,7 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, in /* ---------------------------------------------------------------------- */ -void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecBondKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -663,7 +663,7 @@ void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -732,7 +732,7 @@ void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecBondKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 1f0582e9bbc..f15bc4d859d 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -50,9 +50,9 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: tagint *molecule; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 25eb08b76f2..723ae149c0b 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -551,7 +551,7 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, /* ---------------------------------------------------------------------- */ -void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecChargeKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -585,7 +585,7 @@ void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecChargeKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -619,7 +619,7 @@ void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecChargeKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecChargeKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index a9ee88fede7..5fbdda24869 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -51,9 +51,9 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index c476139e93b..72c6c62754d 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -578,7 +578,7 @@ int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, /* ---------------------------------------------------------------------- */ -void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecDipoleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -615,7 +615,7 @@ void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecDipoleKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -652,7 +652,7 @@ void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDipoleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecDipoleKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index 0a4306d375e..bfae1ca6081 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -51,9 +51,9 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index a6725a006d0..2c56af049d0 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -943,7 +943,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int /* ---------------------------------------------------------------------- */ -void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecDPDKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -998,7 +998,7 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1067,7 +1067,7 @@ void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecDPDKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 42caa92d0bc..a60c4676da9 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -59,9 +59,9 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; double *duChem; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index f9d72042cd1..606569be3d4 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -895,7 +895,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, in /* ---------------------------------------------------------------------- */ -void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecFullKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -1028,7 +1028,7 @@ void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1179,7 +1179,7 @@ void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecFullKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index f7a552beb3c..f6039bdcb60 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -50,9 +50,9 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 3f6373a97e3..b1e30a1c0fb 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -70,7 +70,7 @@ struct AtomVecHybridKokkos_PackComm { typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackComm( const AtomKokkos* atomKK, @@ -78,7 +78,7 @@ struct AtomVecHybridKokkos_PackComm { const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, - const unsigned int &datamask): + const uint64_t &datamask): _x(atomKK->k_x.view()), _mu(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()), @@ -234,7 +234,7 @@ struct AtomVecHybridKokkos_PackCommSelf { typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackCommSelf( const AtomKokkos* atomKK, @@ -242,7 +242,7 @@ struct AtomVecHybridKokkos_PackCommSelf { const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, - const unsigned int datamask): + const uint64_t datamask): _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), @@ -390,7 +390,7 @@ struct AtomVecHybridKokkos_PackCommSelfFused { typename AT::t_int_1d_const _sendnum_scan; typename AT::t_int_1d_const _g2l; double _xprd,_yprd,_zprd,_xy,_xz,_yz; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackCommSelfFused( const AtomKokkos* atomKK, @@ -402,7 +402,7 @@ struct AtomVecHybridKokkos_PackCommSelfFused { const typename DAT::tdual_int_1d &g2l, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, - const unsigned int datamask): + const uint64_t datamask): _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), @@ -529,12 +529,12 @@ struct AtomVecHybridKokkos_UnpackComm { typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnpackComm( const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int &first, const unsigned int &datamask): + const int &first, const uint64_t &datamask): _x(atomKK->k_x.view()), _mu(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()), @@ -620,7 +620,7 @@ struct AtomVecHybridKokkos_PackCommVel { double _pbc[6]; double _h_rate[6]; const int _deform_vremap; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackCommVel( const AtomKokkos* atomKK, @@ -630,7 +630,7 @@ struct AtomVecHybridKokkos_PackCommVel { const double &xy, const double &xz, const double &yz, const int* const pbc, const double * const h_rate, const int &deform_vremap, - const unsigned int &datamask): + const uint64_t &datamask): _x(atomKK->k_x.view()), _mask(atomKK->k_mask.view()), _v(atomKK->k_v.view()), @@ -873,7 +873,7 @@ struct AtomVecHybridKokkos_UnpackCommVel { typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnpackCommVel( const AtomKokkos* atomKK, @@ -966,12 +966,12 @@ struct AtomVecHybridKokkos_PackReverse { typename AT::t_kkfloat_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackReverse( const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int &first, const unsigned int &datamask): + const int &first, const uint64_t &datamask): _f(atomKK->k_f.view()), _fm(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()), @@ -1039,13 +1039,13 @@ struct AtomVecHybridKokkos_UnPackReverseSelf { typename AT::t_kkfloat_1d_3 _torquew; typename AT::t_int_1d_const _list; int _nfirst; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnPackReverseSelf( const AtomKokkos* atomKK, const int &nfirst, const typename DAT::tdual_int_1d &list, - const unsigned int &datamask): + const uint64_t &datamask): _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), _fm(atomKK->k_fm.view()),_fmw(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()),_fm_longw(atomKK->k_fm_long.view()), @@ -1110,13 +1110,13 @@ struct AtomVecHybridKokkos_UnPackReverse { typename AT::t_kkfloat_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnPackReverse( const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, - const unsigned int datamask): + const uint64_t datamask): _f(atomKK->k_f.view()), _fm(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()), @@ -1198,14 +1198,14 @@ struct AtomVecHybridKokkos_PackBorder { typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackBorder( const AtomKokkos* atomKK, const typename AT::t_double_2d_lr &buf, const typename AT::t_int_1d_const &list, const double &dx, const double &dy, const double &dz, - const unsigned int &datamask): + const uint64_t &datamask): _buf(buf),_list(list), _x(atomKK->k_x.view()), _tag(atomKK->k_tag.view()), @@ -1357,12 +1357,12 @@ struct AtomVecHybridKokkos_UnpackBorder { typename AT::t_kkfloat_1d _radius,_rmass; typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; int _first; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnpackBorder( const AtomKokkos* atomKK, const typename AT::t_double_2d_lr_const &buf, - const int &first, const unsigned int &datamask): + const int &first, const uint64_t &datamask): _buf(buf), _x(atomKK->k_x.view()), _tag(atomKK->k_tag.view()), @@ -1482,7 +1482,7 @@ struct AtomVecHybridKokkos_PackBorderVel { typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz, _dvx, _dvy, _dvz; const int _deform_groupbit; - const unsigned int _datamask; + const uint64_t _datamask; AtomVecHybridKokkos_PackBorderVel( const AtomKokkos* atomKK, @@ -1491,7 +1491,7 @@ struct AtomVecHybridKokkos_PackBorderVel { const double &dx, const double &dy, const double &dz, const double &dvx, const double &dvy, const double &dvz, const int &deform_groupbit, - const unsigned int &datamask): + const uint64_t &datamask): _buf(buf),_list(list),_datamask(datamask), _x(atomKK->k_x.view()), _tag(atomKK->k_tag.view()), @@ -1692,13 +1692,13 @@ struct AtomVecHybridKokkos_UnpackBorderVel { typename AT::t_kkfloat_1d_3 _omega; typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; int _first; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnpackBorderVel( const AtomKokkos* atomKK, const typename AT::t_double_2d_lr_const &buf, const int &first, - const unsigned int &datamask): + const uint64_t &datamask): _buf(buf), _x(atomKK->k_x.view()), _tag(atomKK->k_tag.view()), @@ -1878,14 +1878,14 @@ struct AtomVecHybridKokkos_PackExchangeFunctor { typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; int _size_exchange; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_PackExchangeFunctor( const AtomKokkos* atomKK, const DAT::tdual_double_2d_lr buf, DAT::tdual_int_1d sendlist, DAT::tdual_int_1d copylist, - const unsigned int datamask): + const uint64_t datamask): _x(atomKK->k_x.view()), _v(atomKK->k_v.view()), _tag(atomKK->k_tag.view()), @@ -2216,6 +2216,8 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double DAT::tdual_int_1d k_copylist, ExecutionSpace space) { + set_size_exchange(); + if (nsend > (int) (k_buf.view_host().extent(0)* k_buf.view_host().extent(1))/size_exchange) { int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; @@ -2277,7 +2279,7 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { int _dim; double _lo,_hi; int _size_exchange; - unsigned int _datamask; + uint64_t _datamask; AtomVecHybridKokkos_UnpackExchangeFunctor( const AtomKokkos* atomKK, @@ -2285,7 +2287,7 @@ struct AtomVecHybridKokkos_UnpackExchangeFunctor { DAT::tdual_int_1d nlocal, DAT::tdual_int_1d indices, int dim, double lo, double hi, - unsigned int datamask): + uint64_t datamask): _x(atomKK->k_x.view()), _v(atomKK->k_v.view()), _tag(atomKK->k_tag.view()), @@ -2507,21 +2509,21 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::sync(ExecutionSpace space, unsigned int h_mask) +void AtomVecHybridKokkos::sync(ExecutionSpace space, uint64_t h_mask) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->sync(space,h_mask); } /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::sync_pinned(ExecutionSpace space, unsigned int h_mask, int async_flag) +void AtomVecHybridKokkos::sync_pinned(ExecutionSpace space, uint64_t h_mask, int async_flag) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->sync_pinned(space,h_mask,async_flag); } /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::modified(ExecutionSpace space, unsigned int h_mask) +void AtomVecHybridKokkos::modified(ExecutionSpace space, uint64_t h_mask) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->modified(space,h_mask); } diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 5de73296e5b..72c03810403 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -96,9 +96,9 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: DAT::t_tagint_1d d_tag; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 40cecacb3e6..94be45e4032 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -891,6 +891,8 @@ int AtomVecKokkos::field2mask(std::string field) return Q_MASK; else if (field == "mu") return MU_MASK; + else if (field == "mu3") + return MU_MASK; else if (field == "radius") return RADIUS_MASK; else if (field == "omega") @@ -899,7 +901,7 @@ int AtomVecKokkos::field2mask(std::string field) return TORQUE_MASK; else if (field == "molecule") return MOLECULE_MASK; - else if (field == "special") + else if (field == "nspecial") return SPECIAL_MASK; else if (field == "num_bond") return BOND_MASK; @@ -937,6 +939,44 @@ int AtomVecKokkos::field2mask(std::string field) /* ---------------------------------------------------------------------- */ +int AtomVecKokkos::field2size(std::string field) +{ + if (field == "id") return 1; + else if (field == "type") return 1; + else if (field == "mask") return 1; + else if (field == "image") return 1; + else if (field == "x") return 3; + else if (field == "v") return 3; + else if (field == "f") return 3; + else if (field == "rmass") return 1; + else if (field == "q") return 1; + else if (field == "mu") return 4; + else if (field == "mu3") return 3; + else if (field == "radius") return 1; + else if (field == "omega") return 3; + else if (field == "torque") return 3; + else if (field == "molecule") return 1; + else if (field == "special") return 3+atom->maxspecial; + else if (field == "num_bond") return 1+2*atom->bond_per_atom; + else if (field == "num_angle") return 1+4*atom->angle_per_atom; + else if (field == "num_dihedral") return 1+5*atom->dihedral_per_atom; + else if (field == "num_improper") return 1+5*atom->dihedral_per_atom; + else if (field == "sp") return 4; + else if (field == "fm") return 3; + else if (field == "fm_long") return 3; + else if (field == "rho") return 1; + else if (field == "dpdTheta") return 1; + else if (field == "uCond") return 1; + else if (field == "uMech") return 1; + else if (field == "uChem") return 1; + else if (field == "uCG") return 1; + else if (field == "uCGnew") return 1; + else if (field == "duChem") return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- */ + void AtomVecKokkos::set_atom_masks() { datamask_grow = EMPTY_MASK; @@ -981,3 +1021,14 @@ void AtomVecKokkos::set_atom_masks() for (int i = 0; i < nexchange; i++) datamask_exchange |= field2mask(fields_exchange[i]); } + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::set_size_exchange() +{ + size_exchange = 1; // 1 to store buffer length + for (int i = 0; i < default_exchange.size(); i++) + size_exchange += field2size(default_exchange[i]); + for (int i = 0; i < nexchange; i++) + size_exchange += field2size(fields_exchange[i]); +} diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index d9532f2c987..8d6e829f01a 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -34,9 +34,9 @@ class AtomVecKokkos : virtual public AtomVec { virtual void sort_kokkos(Kokkos::BinSort &Sorter) = 0; - virtual void sync(ExecutionSpace space, unsigned int mask) = 0; - virtual void modified(ExecutionSpace space, unsigned int mask) = 0; - virtual void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) = 0; + virtual void sync(ExecutionSpace space, uint64_t mask) = 0; + virtual void modified(ExecutionSpace space, uint64_t mask) = 0; + virtual void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) = 0; virtual int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, @@ -137,7 +137,9 @@ class AtomVecKokkos : virtual public AtomVec { void setup_fields() override; int field2mask(std::string); + int field2size(std::string); void set_atom_masks(); + void set_size_exchange(); public: diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 4b90ad273ee..8ed3346845f 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -869,7 +869,7 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_bu /* ---------------------------------------------------------------------- */ -void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecMolecularKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -997,7 +997,7 @@ void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) } } -void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1144,7 +1144,7 @@ void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, unsigned int mask /* ---------------------------------------------------------------------- */ -void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecMolecularKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index bf1021eee2a..b4c6e5cd13f 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -50,9 +50,9 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: tagint *molecule; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 3b875286025..9e1d1bf3bd8 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -1653,7 +1653,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, /* ---------------------------------------------------------------------- */ -void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecSphereKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -1696,7 +1696,7 @@ void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1749,7 +1749,7 @@ void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i /* ---------------------------------------------------------------------- */ -void AtomVecSphereKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecSphereKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index c9c3221a054..dce220f5a5e 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -70,9 +70,9 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { int nlocal, int dim, double lo, double hi, ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: double **torque; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 5b79e803669..101a48be94d 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -617,7 +617,7 @@ void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes) /* ---------------------------------------------------------------------- */ -void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecSpinKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -657,7 +657,7 @@ void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecSpinKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -695,7 +695,7 @@ void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) } } -void AtomVecSpinKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecSpinKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 1b20614a768..4153b22c534 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -51,9 +51,9 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: DAT::t_tagint_1d d_tag; diff --git a/src/KOKKOS/dynamical_matrix_kokkos.cpp b/src/KOKKOS/dynamical_matrix_kokkos.cpp index 57aca408921..83a5015dd82 100644 --- a/src/KOKKOS/dynamical_matrix_kokkos.cpp +++ b/src/KOKKOS/dynamical_matrix_kokkos.cpp @@ -165,7 +165,7 @@ void DynamicalMatrixKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_host = 0; + uint64_t datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 3d1e672d74f..82dbdc3aae6 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -129,7 +129,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) +void FixPropertyAtomKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync_device(); @@ -151,7 +151,7 @@ void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync_device()) @@ -176,7 +176,7 @@ void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask) +void FixPropertyAtomKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify_device(); diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h index 25d4001b419..887e09d5d98 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.h +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -33,9 +33,9 @@ class FixPropertyAtomKokkos : public FixPropertyAtom { ~FixPropertyAtomKokkos() override; void grow_arrays(int) override; - void sync(ExecutionSpace space, unsigned int mask); - void modified(ExecutionSpace space, unsigned int mask); - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0); + void sync(ExecutionSpace space, uint64_t mask); + void modified(ExecutionSpace space, uint64_t mask); + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0); private: int dvector_flag; diff --git a/src/KOKKOS/mliap_data_kokkos.cpp b/src/KOKKOS/mliap_data_kokkos.cpp index 5de8cc7418c..06c133dcf7e 100644 --- a/src/KOKKOS/mliap_data_kokkos.cpp +++ b/src/KOKKOS/mliap_data_kokkos.cpp @@ -270,7 +270,7 @@ void MLIAPDataKokkos::grow_neigharrays() { /* ---------------------------------------------------------------------- */ template -void MLIAPDataKokkos::modified(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync) { +void MLIAPDataKokkos::modified(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync) { if (space == Device) { if (mask & IATOMS_MASK ) k_iatoms .modify_device(); if (mask & IELEMS_MASK ) k_ielems .modify_device(); @@ -316,7 +316,7 @@ void MLIAPDataKokkos::modified(ExecutionSpace space, unsigned int ma /* ---------------------------------------------------------------------- */ template -void MLIAPDataKokkos::sync(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync) { +void MLIAPDataKokkos::sync(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync) { if (space == Device) { if (lmp->kokkos->auto_sync && !ignore_auto_sync) modified(Host, mask, true); diff --git a/src/KOKKOS/mliap_data_kokkos.h b/src/KOKKOS/mliap_data_kokkos.h index 5ca22fc1705..b34d8e099e1 100644 --- a/src/KOKKOS/mliap_data_kokkos.h +++ b/src/KOKKOS/mliap_data_kokkos.h @@ -66,9 +66,9 @@ template class MLIAPDataKokkos : public MLIAPData { void generate_neighdata(class NeighList *, int = 0, int = 0) override; void grow_neigharrays() override; - void modified(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync = false); + void modified(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync = false); - void sync(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync = false); + void sync(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync = false); PairMLIAPKokkos *k_pairmliap; diff --git a/src/KOKKOS/neigh_bond_kokkos.h b/src/KOKKOS/neigh_bond_kokkos.h index 852dd984053..650df4b9579 100644 --- a/src/KOKKOS/neigh_bond_kokkos.h +++ b/src/KOKKOS/neigh_bond_kokkos.h @@ -75,7 +75,7 @@ class NeighBondKokkos : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read,datamask_modify; + uint64_t datamask_read,datamask_modify; int maxbond,maxangle,maxdihedral,maximproper; // size of bond lists int me,nprocs; diff --git a/src/KOKKOS/third_order_kokkos.cpp b/src/KOKKOS/third_order_kokkos.cpp index 06d68934b38..b52dd07fee8 100644 --- a/src/KOKKOS/third_order_kokkos.cpp +++ b/src/KOKKOS/third_order_kokkos.cpp @@ -165,7 +165,7 @@ void ThirdOrderKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_host = 0; + uint64_t datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index e68a3cb0e75..e40604aecf8 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -381,8 +381,8 @@ void VerletKokkos::run(int n) } bool execute_on_host = false; - unsigned int datamask_read_host = 0; - unsigned int datamask_exclude = 0; + uint64_t datamask_read_host = 0; + uint64_t datamask_exclude = 0; int allow_overlap = lmp->kokkos->allow_overlap; if (allow_overlap && atomKK->k_f.view_hostkk().data() != atomKK->k_f.view_device().data()) { diff --git a/src/atom_masks.h b/src/atom_masks.h index 0058c3f57f7..acbab8938f1 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -17,93 +17,53 @@ // per-atom data masks -#define EMPTY_MASK 0x00000000 -#define ALL_MASK 0xffffffff -#define SAMETAG_MASK 0x40000000 -#define EXTENDED_MASK 0x80000000 +#define EMPTY_MASK 0x0000000000000000 +#define ALL_MASK 0xffffffffffffffff // standard -#define X_MASK 0x00000001 -#define V_MASK 0x00000002 -#define F_MASK 0x00000004 -#define TAG_MASK 0x00000008 -#define TYPE_MASK 0x00000010 -#define MASK_MASK 0x00000020 -#define IMAGE_MASK 0x00000040 -#define Q_MASK 0x00000080 -#define MOLECULE_MASK 0x00000100 -#define RMASS_MASK 0x00000200 -#define BOND_MASK 0x00000400 -#define ANGLE_MASK 0x00000800 -#define DIHEDRAL_MASK 0x00001000 -#define IMPROPER_MASK 0x00002000 -#define SPECIAL_MASK 0x00004000 -#define MAP_MASK 0x00008000 -#define ENERGY_MASK 0x00010000 -#define VIRIAL_MASK 0x00020000 -#define MU_MASK 0x00040000 +#define X_MASK 0x0000000000000001 +#define V_MASK 0x0000000000000002 +#define F_MASK 0x0000000000000004 +#define TAG_MASK 0x0000000000000008 +#define TYPE_MASK 0x0000000000000010 +#define MASK_MASK 0x0000000000000020 +#define IMAGE_MASK 0x0000000000000040 +#define Q_MASK 0x0000000000000080 +#define MOLECULE_MASK 0x0000000000000100 +#define RMASS_MASK 0x0000000000000200 +#define BOND_MASK 0x0000000000000400 +#define ANGLE_MASK 0x0000000000000800 +#define DIHEDRAL_MASK 0x0000000000001000 +#define IMPROPER_MASK 0x0000000000002000 +#define SPECIAL_MASK 0x0000000000004000 +#define ENERGY_MASK 0x0000000000008000 +#define VIRIAL_MASK 0x0000000000010000 +#define MU_MASK 0x0000000000020000 // SPIN -#define SP_MASK 0x00000001 -#define FM_MASK 0x00000002 -#define FML_MASK 0x00000004 +#define SP_MASK 0x0000000000040000 +#define FM_MASK 0x0000000000080000 +#define FML_MASK 0x0000000000100000 // DPD -#define DPDRHO_MASK 0x00040000 -#define DPDTHETA_MASK 0x00080000 -#define UCOND_MASK 0x00100000 -#define UMECH_MASK 0x00200000 -#define UCHEM_MASK 0x00400000 -#define UCG_MASK 0x00800000 -#define UCGNEW_MASK 0x01000000 -#define DUCHEM_MASK 0x02000000 -#define DVECTOR_MASK 0x04000000 +#define DPDRHO_MASK 0x0000000000200000 +#define DPDTHETA_MASK 0x0000000000400000 +#define UCOND_MASK 0x0000000000800000 +#define UMECH_MASK 0x0000000001000000 +#define UCHEM_MASK 0x0000000002000000 +#define UCG_MASK 0x0000000004000000 +#define UCGNEW_MASK 0x0000000008000000 +#define DUCHEM_MASK 0x0000000010000000 +#define DVECTOR_MASK 0x0000000020000000 // granular -#define RADIUS_MASK 0x00100000 -#define DENSITY_MASK 0x00200000 -#define OMEGA_MASK 0x00400000 -#define TORQUE_MASK 0x00800000 -#define ANGMOM_MASK 0x01000000 -#define GRANULAR_MASK 0x01f00000 - -// peridynamics - -#define VFRAC_MASK 0x00000001 -#define S0_MASK 0x00000002 -#define X0_MASK 0x00000004 -#define PERI_MASK 0x00000007 - -#define ELLIPSOID_MASK 0x00000008 -#define LINE_MASK 0x00000010 -#define TRI_MASK 0x00000020 - -// electron - -#define SPIN_MASK 0x00000100 -#define ERADIUS_MASK 0x00000200 -#define ERVEL_MASK 0x00000400 -#define ERFORCE_MASK 0x00000800 -#define ERVELFORCE_MASK 0x00001000 - -#define CS_MASK 0x00002000 -#define CSFORCE_MASK 0x00004000 -#define VFORCE_MASK 0x00008000 - -#define ELECTRON_MASK 0x0000ff00 - -// SPH - -#define ETAG_MASK 0x00010000 -#define RHO_MASK 0x00020000 -#define DRHO_MASK 0x00040000 -#define E_MASK 0x00080000 -#define DE_MASK 0x00100000 -#define VEST_MASK 0x00200000 -#define CV_MASK 0x00400000 +#define RADIUS_MASK 0x0000000040000000 +#define OMEGA_MASK 0x0000000080000000 +#define TORQUE_MASK 0x0000000100000000 +#define ANGMOM_MASK 0x0000000200000000 #endif From 806616bf0d3dfa677169ebded8873b4581f92c83 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 28 Oct 2025 10:35:57 -0600 Subject: [PATCH 113/604] Refactor Kokkos atom_vecs, move pack/unpack to parent --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 476 ----- src/KOKKOS/atom_vec_angle_kokkos.h | 52 - src/KOKKOS/atom_vec_atomic_kokkos.cpp | 356 ---- src/KOKKOS/atom_vec_atomic_kokkos.h | 27 - src/KOKKOS/atom_vec_bond_kokkos.cpp | 452 ---- src/KOKKOS/atom_vec_bond_kokkos.h | 41 - src/KOKKOS/atom_vec_charge_kokkos.cpp | 426 ---- src/KOKKOS/atom_vec_charge_kokkos.h | 32 - src/KOKKOS/atom_vec_dipole_kokkos.cpp | 450 ---- src/KOKKOS/atom_vec_dipole_kokkos.h | 33 - src/KOKKOS/atom_vec_dpd_kokkos.cpp | 782 ------- src/KOKKOS/atom_vec_dpd_kokkos.h | 38 - src/KOKKOS/atom_vec_full_kokkos.cpp | 628 ------ src/KOKKOS/atom_vec_full_kokkos.h | 70 - src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 2450 ---------------------- src/KOKKOS/atom_vec_hybrid_kokkos.h | 141 -- src/KOKKOS/atom_vec_kokkos.cpp | 2141 ++++++++++++++++--- src/KOKKOS/atom_vec_kokkos.h | 188 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 610 ------ src/KOKKOS/atom_vec_molecular_kokkos.h | 70 - src/KOKKOS/atom_vec_sphere_kokkos.cpp | 1512 ------------- src/KOKKOS/atom_vec_sphere_kokkos.h | 53 - src/KOKKOS/atom_vec_spin_kokkos.cpp | 437 ---- src/KOKKOS/comm_kokkos.cpp | 33 +- src/KOKKOS/comm_tiled_kokkos.cpp | 5 - 25 files changed, 2022 insertions(+), 9481 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 15a66b579bb..776eb3bbd9d 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -186,482 +186,6 @@ void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort &Sorter /* ---------------------------------------------------------------------- */ -template -struct AtomVecAngleKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecAngleKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecAngleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAngleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecAngleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAngleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecAngleKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecAngleKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAngleKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecAngleKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 to store buffer length - - size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecAngleKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecAngleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAngleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - -/* ---------------------------------------------------------------------- */ - void AtomVecAngleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 783cd2c9a4a..90cadf3aab4 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -35,21 +35,6 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; @@ -59,43 +44,6 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { tagint **special; tagint **bond_atom; tagint **angle_atom1,**angle_atom2,**angle_atom3; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 7917c019a3b..3ee66be1529 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -29,7 +29,6 @@ using namespace LAMMPS_NS; AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecAtomic(lmp) { - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -119,361 +118,6 @@ void AtomVecAtomicKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecAtomicKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - double _dx,_dy,_dz; - - AtomVecAtomicKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecAtomicKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAtomicKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecAtomicKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAtomicKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*6; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - int _first; - - - AtomVecAtomicKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecAtomicKokkos_UnpackBorder f(buf.view_host(),h_x,h_tag,h_type,h_mask,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_UnpackBorder f(buf.view_device(),d_x,d_tag,d_type,d_mask,first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecAtomicKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) -{ - size_exchange = 11; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecAtomicKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()),_dim(dim), - _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecAtomicKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 551aa5364ce..7ab96b87c62 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -36,36 +36,9 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; - - protected: - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; }; } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 5c85292f0f3..cb49a05e03b 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -29,7 +29,6 @@ using namespace LAMMPS_NS; AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecBond(lmp) { - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -151,457 +150,6 @@ void AtomVecBondKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecBondKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecBondKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecBondKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecBondKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecBondKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecBondKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecBondKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecBondKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecBondKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 to store buffer length - - size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecBondKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecBondKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index f15bc4d859d..3978e652c1a 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -35,21 +35,6 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; @@ -58,32 +43,6 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { tagint *molecule; tagint **special; tagint **bond_atom; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; }; } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 723ae149c0b..f9176006bcc 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -29,7 +29,6 @@ using namespace LAMMPS_NS; AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) { - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -126,431 +125,6 @@ void AtomVecChargeKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecChargeKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecChargeKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - double _dx,_dy,_dz; - - AtomVecChargeKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecChargeKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecChargeKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecChargeKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecChargeKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - int _first; - - - AtomVecChargeKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - if (first+n >= nmax) { - grow(first+n+100); - } - if (space==Host) { - struct AtomVecChargeKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,first); - Kokkos::parallel_for(n,f); - } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecChargeKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _q[i]; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 12; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecChargeKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()),_dim(dim), - _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _q[i] = _buf(myrecv,11); - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecChargeKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 5fbdda24869..df3416bd11d 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -36,44 +36,12 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - - HAT::t_kkfloat_1d h_q; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index 72c6c62754d..dafdf5fdc18 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -128,456 +128,6 @@ void AtomVecDipoleKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecDipoleKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDipoleKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_4 &mu, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _mu(mu.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t elements = 7; // size_forward - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_kkfloat_1d_4_randomread _mu; - double _dx,_dy,_dz; - - AtomVecDipoleKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const typename AT::t_kkfloat_1d_4_randomread &mu, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = _mu(j,0); - _buf(i,8) = _mu(j,1); - _buf(i,9) = _mu(j,2); - _buf(i,10) = _mu(j,3); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = _mu(j,0); - _buf(i,8) = _mu(j,1); - _buf(i,9) = _mu(j,2); - _buf(i,10) = _mu(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecDipoleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDipoleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecDipoleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDipoleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - typename AT::t_kkfloat_1d_4 _mu; - int _first; - - - AtomVecDipoleKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - typename AT::t_kkfloat_1d_4 &mu, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - _mu(i+_first,0) = _buf(i,7); - _mu(i+_first,1) = _buf(i,8); - _mu(i+_first,2) = _buf(i,9); - _mu(i+_first,3) = _buf(i,10); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDipoleKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); - if (space==Host) { - struct AtomVecDipoleKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_mu,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDipoleKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_mu,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_kkfloat_1d_4 _muw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecDipoleKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _muw(atom->k_mu.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _q[i]; - _buf(mysend,12) = _mu(i,0); - _buf(mysend,13) = _mu(i,1); - _buf(mysend,14) = _mu(i,2); - _buf(mysend,15) = _mu(i,3); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - _muw(i,0) = _mu(j,0); - _muw(i,1) = _mu(j,1); - _muw(i,2) = _mu(j,2); - _muw(i,3) = _mu(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 16; // # of elements packed - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/12) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecDipoleKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _q[i] = _buf(myrecv,11); - _mu(i,0) = _buf(myrecv,12); - _mu(i,1) = _buf(myrecv,13); - _mu(i,2) = _buf(myrecv,14); - _mu(i,3) = _buf(myrecv,15); - } - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecDipoleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - -/* ---------------------------------------------------------------------- */ - void AtomVecDipoleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index bfae1ca6081..ab5fcd11755 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -36,45 +36,12 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - HAT::t_kkfloat_1d h_q; - DAT::t_kkfloat_1d_4 d_mu; - HAT::t_kkfloat_1d_4 h_mu; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 2c56af049d0..c8833dfeb64 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -31,7 +31,6 @@ using namespace LAMMPS_NS; AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecDPD(lmp) { - no_comm_vel_flag = 1; } /* ---------------------------------------------------------------------- @@ -162,787 +161,6 @@ void AtomVecDPDKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecDPDKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDPDKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _buf(i,3) = _dpdTheta(j); - _buf(i,4) = _uCond(j); - _buf(i,5) = _uMech(j); - _buf(i,6) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackCommSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - int _nfirst; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDPDKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _dpdTheta(i+_nfirst) = _dpdTheta(j); - _uCond(i+_nfirst) = _uCond(j); - _uMech(i+_nfirst) = _uMech(j); - _uChem(i+_nfirst) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_const _buf; - int _first; - - AtomVecDPDKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _dpdTheta(i+_first) = _buf(i,3); - _uCond(i+_first) = _buf(i,4); - _uMech(i+_first) = _buf(i,5); - _uChem(i+_first) = _buf(i,6); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - double _dx,_dy,_dz; - - AtomVecDPDKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &dpdTheta, - const typename AT::t_kkfloat_1d &uCond, - const typename AT::t_kkfloat_1d &uMech, - const typename AT::t_kkfloat_1d &uChem, - const typename AT::t_kkfloat_1d &uCG, - const typename AT::t_kkfloat_1d &uCGnew, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCG), - _uCGnew(uCGnew), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _dpdTheta(j); - _buf(i,7) = _uCond(j); - _buf(i,8) = _uMech(j); - _buf(i,9) = _uChem(j); - _buf(i,10) = _uCG(j); - _buf(i,11) = _uCGnew(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*6; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - int _first; - - - AtomVecDPDKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &dpdTheta, - const typename AT::t_kkfloat_1d &uCond, - const typename AT::t_kkfloat_1d &uMech, - const typename AT::t_kkfloat_1d &uChem, - const typename AT::t_kkfloat_1d &uCG, - const typename AT::t_kkfloat_1d &uCGnew, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCG), - _uCGnew(uCGnew), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _dpdTheta(i+_first) = _buf(i,6); - _uCond(i+_first) = _buf(i,7); - _uMech(i+_first) = _buf(i,8); - _uChem(i+_first) = _buf(i,9); - _uCG(i+_first) = _buf(i,10); - _uCGnew(i+_first) = _buf(i,11); -// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); - if (space==Host) { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecDPDKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _dpdTheta(atom->k_dpdTheta.view()), - _uCond(atom->k_uCond.view()), - _uMech(atom->k_uMech.view()), - _uChem(atom->k_uChem.view()), - _uCG(atom->k_uCG.view()), - _uCGnew(atom->k_uCGnew.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _dpdThetaw(atom->k_dpdTheta.view()), - _uCondw(atom->k_uCond.view()), - _uMechw(atom->k_uMech.view()), - _uChemw(atom->k_uChem.view()), - _uCGw(atom->k_uCG.view()), - _uCGneww(atom->k_uCGnew.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = _tag[i]; - _buf(mysend,8) = _type[i]; - _buf(mysend,9) = _mask[i]; - _buf(mysend,10) = _image[i]; - _buf(mysend,11) = _dpdTheta[i]; - _buf(mysend,12) = _uCond[i]; - _buf(mysend,13) = _uMech[i]; - _buf(mysend,14) = _uChem[i]; - _buf(mysend,15) = _uCG[i]; - _buf(mysend,16) = _uCGnew[i]; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - _dpdThetaw[i] = _dpdTheta(j); - _uCondw[i] = _uCond(j); - _uMechw[i] = _uMech(j); - _uChemw[i] = _uChem(j); - _uCGw[i] = _uCG(j); - _uCGneww[i] = _uCGnew(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) -{ - size_exchange = 17; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | - UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | - DVECTOR_MASK); - if (space == HostKK) { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } else { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } - return nsend*size_exchange; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _dpdTheta; - typename AT::t_kkfloat_1d _uCond; - typename AT::t_kkfloat_1d _uMech; - typename AT::t_kkfloat_1d _uChem; - typename AT::t_kkfloat_1d _uCG; - typename AT::t_kkfloat_1d _uCGnew; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecDPDKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = _buf(myrecv,7); - _type[i] = _buf(myrecv,8); - _mask[i] = _buf(myrecv,9); - _image[i] = _buf(myrecv,10); - _dpdTheta[i] = _buf(myrecv,11); - _uCond[i] = _buf(myrecv,12); - _uMech[i] = _buf(myrecv,13); - _uChem[i] = _buf(myrecv,14); - _uCG[i] = _buf(myrecv,15); - _uCGnew[i] = _buf(myrecv,16); - } - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - - atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | - UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | - DVECTOR_MASK); - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecDPDKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index a60c4676da9..1f6db4ef234 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -36,49 +36,11 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; double *duChem; - - protected: - DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; - HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 606569be3d4..17bc931f424 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -29,7 +29,6 @@ using namespace LAMMPS_NS; AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecFull(lmp) { - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -268,633 +267,6 @@ void AtomVecFullKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecFullKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecFullKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecFullKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecFullKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecFullKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecFullKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecFullKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); - - while (first+n >= nmax) grow(0); - - if (space==Host) { - struct AtomVecFullKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecFullKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = _q(i); - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; - } - - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _num_dihedralw(i) = _num_dihedral(j); - for (k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - _num_improperw(i) = _num_improper(j); - for (k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 charge - // 1 to store buffer length - - size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom+5*atom->improper_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d _num_dihedral; - typename AT::t_int_2d _dihedral_type; - typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d _num_improper; - typename AT::t_int_2d _improper_type; - typename AT::t_tagint_2d _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecFullKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _q(i) = _buf(myrecv,m++); - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecFullKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index f6039bdcb60..af46e8d48d6 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -35,21 +35,6 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; @@ -62,61 +47,6 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { tagint **angle_atom1,**angle_atom2,**angle_atom3; tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - HAT::t_kkfloat_1d h_q; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; - - DAT::t_int_1d d_num_dihedral; - DAT::t_int_2d d_dihedral_type; - DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, - d_dihedral_atom3,d_dihedral_atom4; - DAT::t_int_1d d_num_improper; - DAT::t_int_2d d_improper_type; - DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, - d_improper_atom3,d_improper_atom4; - - HAT::t_int_1d h_num_dihedral; - HAT::t_int_2d h_dihedral_type; - HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, - h_dihedral_atom3,h_dihedral_atom4; - HAT::t_int_1d h_num_improper; - HAT::t_int_2d h_improper_type; - HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, - h_improper_atom3,h_improper_atom4; }; } diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index b1e30a1c0fb..9912cd41604 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -55,2456 +55,6 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort &Sorte (dynamic_cast(styles[k]))->sort_kokkos(Sorter); } -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - uint64_t _datamask; - - AtomVecHybridKokkos_PackComm( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const uint64_t &datamask): - _x(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { - const int size_forward = atomKK->avecKK->size_forward; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; - const size_t elements = size_forward; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - int m = 0; - if (PBC_FLAG == 0) { - _buf(i,m++) = _x(j,0); - _buf(i,m++) = _x(j,1); - _buf(i,m++) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - } - } - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } - - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } - - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) - _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _buf(i,m++) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,datamask_comm); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackCommSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; - int _nfirst; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - uint64_t _datamask; - - AtomVecHybridKokkos_PackCommSelf( - const AtomKokkos* atomKK, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const uint64_t datamask): - _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } - - if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); - } - - if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); - - if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } - atomKK->modified(HostKK,datamask_comm); - } else { - atomKK->sync(Device,datamask_comm); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); - } - } - atomKK->modified(Device,datamask_comm); - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackCommSelfFused { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; - typename AT::t_int_2d_lr_const _list; - typename AT::t_int_2d_const _pbc; - typename AT::t_int_1d_const _pbc_flag; - typename AT::t_int_1d_const _firstrecv; - typename AT::t_int_1d_const _sendnum_scan; - typename AT::t_int_1d_const _g2l; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - uint64_t _datamask; - - AtomVecHybridKokkos_PackCommSelfFused( - const AtomKokkos* atomKK, - const typename DAT::tdual_int_2d_lr &list, - const typename DAT::tdual_int_2d &pbc, - const typename DAT::tdual_int_1d &pbc_flag, - const typename DAT::tdual_int_1d &firstrecv, - const typename DAT::tdual_int_1d &sendnum_scan, - const typename DAT::tdual_int_1d &g2l, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, - const uint64_t datamask): - _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), - _list(list.view()), - _pbc(pbc.view()), - _pbc_flag(pbc_flag.view()), - _firstrecv(firstrecv.view()), - _sendnum_scan(sendnum_scan.view()), - _g2l(g2l.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - - int iswap = 0; - while (ii >= _sendnum_scan[iswap]) iswap++; - int i = ii; - if (iswap > 0) - i = ii - _sendnum_scan[iswap-1]; - - const int _nfirst = _firstrecv[iswap]; - const int nlocal = _firstrecv[0]; - - int j = _list(iswap,i); - if (j >= nlocal) - j = _g2l(j-nlocal); - - if (_pbc_flag(ii) == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - } - } - - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } - - if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); - } - - if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); - - if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, - const DAT::tdual_int_1d &g2l) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm); - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); - } - atomKK->modified(HostKK,datamask_comm); - } else { - atomKK->sync(Device,datamask_comm); - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); - } - atomKK->modified(Device,datamask_comm); - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnpackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_const _buf; - int _first; - uint64_t _datamask; - - AtomVecHybridKokkos_UnpackComm( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const int &first, const uint64_t &datamask): - _x(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _first(first),_datamask(datamask) { - const int size_forward = atomKK->avecKK->size_forward; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; - const size_t elements = size_forward; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - _x(i+_first,0) = _buf(i,m++); - _x(i+_first,1) = _buf(i,m++); - _x(i+_first,2) = _buf(i,m++); - - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - } - - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } - - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); - - if (_datamask & UCOND_MASK) - _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) - _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) - _uChem(i+_first) = _buf(i,m++); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecHybridKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm); - struct AtomVecHybridKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,datamask_comm); - } else { - atomKK->sync(Device,datamask_comm); - struct AtomVecHybridKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,datamask_comm); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - double _h_rate[6]; - const int _deform_vremap; - uint64_t _datamask; - - AtomVecHybridKokkos_PackCommVel( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const double * const h_rate, - const int &deform_vremap, - const uint64_t &datamask): - _x(atomKK->k_x.view()), - _mask(atomKK->k_mask.view()), - _v(atomKK->k_v.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap), - _datamask(datamask) - { - const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; - _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,m++) = _x(j,0); - _buf(i,m++) = _x(j,1); - _buf(i,m++) = _x(j,2); - _buf(i,m++) = _v(j,0); - _buf(i,m++) = _v(j,1); - _buf(i,m++) = _v(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; - } - - if (DEFORM_VREMAP == 0) { - _buf(i,m++) = _v(j,0); - _buf(i,m++) = _v(j,1); - _buf(i,m++) = _v(j,2); - } else { - if (_mask(i) & _deform_vremap) { - _buf(i,m++) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,m++) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,m++) = _v(j,2) + _pbc[2]*_h_rate[2]; - } else { - _buf(i,m++) = _v(j,0); - _buf(i,m++) = _v(j,1); - _buf(i,m++) = _v(j,2); - } - } - } - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } - - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } - - if (_datamask & OMEGA_MASK) { - _buf(i,m++) = _omega(j,0); - _buf(i,m++) = _omega(j,1); - _buf(i,m++) = _omega(j,2); - } - - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) - _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _buf(i,m++) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_comm_vel_kokkos( - const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm_vel); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,datamask_comm_vel); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_PackCommVel f( - atomKK, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, - datamask_comm_vel); - Kokkos::parallel_for(n,f); - } - } - } - - return n*(size_forward + size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnpackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_const _buf; - int _first; - uint64_t _datamask; - - AtomVecHybridKokkos_UnpackCommVel( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const int &first, const int &datamask): - _x(atomKK->k_x.view()), - _v(atomKK->k_v.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _first(first),_datamask(datamask) - { - const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - _x(i+_first,0) = _buf(i,m++); - _x(i+_first,1) = _buf(i,m++); - _x(i+_first,2) = _buf(i,m++); - _v(i+_first,0) = _buf(i,m++); - _v(i+_first,1) = _buf(i,m++); - _v(i+_first,2) = _buf(i,m++); - - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - } - - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } - - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } - - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); - - if (_datamask & UCOND_MASK) - _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) - _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) - _uChem(i+_first) = _buf(i,m++); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecHybridKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm_vel); - struct AtomVecHybridKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,datamask_comm_vel); - } else { - atomKK->sync(Device,datamask_comm_vel); - struct AtomVecHybridKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,datamask_comm_vel); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackReverse { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3_randomread _torque; - typename AT::t_double_2d_lr _buf; - int _first; - uint64_t _datamask; - - AtomVecHybridKokkos_PackReverse( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const int &first, const uint64_t &datamask): - _f(atomKK->k_f.view()), - _fm(atomKK->k_fm.view()), - _fm_long(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()), - _first(first),_datamask(datamask) { - const size_t elements = atomKK->avecKK->size_reverse; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - _buf(i,m++) = _f(i+_first,0); - _buf(i,m++) = _f(i+_first,1); - _buf(i,m++) = _f(i+_first,2); - - if (_datamask & FM_MASK) { - _buf(i,m++) = _fm(i+_first,0); - _buf(i,m++) = _fm(i+_first,1); - _buf(i,m++) = _fm(i+_first,2); - } - - if (_datamask & FML_MASK) { - _buf(i,m++) = _fm_long(i+_first,0); - _buf(i,m++) = _fm_long(i+_first,1); - _buf(i,m++) = _fm_long(i+_first,2); - } - - if (_datamask & TORQUE_MASK) { - _buf(i,m++) = _torque(i+_first,0); - _buf(i,m++) = _torque(i+_first,1); - _buf(i,m++) = _torque(i+_first,2); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_reverse_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,datamask_reverse); - struct AtomVecHybridKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); - Kokkos::parallel_for(n,f); - } else { - atomKK->sync(Device,datamask_reverse); - struct AtomVecHybridKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); - Kokkos::parallel_for(n,f); - } - - return n*size_reverse; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnPackReverseSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3_randomread _torque; - typename AT::t_kkacc_1d_3 _fw,_fmw,_fm_longw; - typename AT::t_kkfloat_1d_3 _torquew; - typename AT::t_int_1d_const _list; - int _nfirst; - uint64_t _datamask; - - AtomVecHybridKokkos_UnPackReverseSelf( - const AtomKokkos* atomKK, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const uint64_t &datamask): - _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), - _fm(atomKK->k_fm.view()),_fmw(atomKK->k_fm.view()), - _fm_long(atomKK->k_fm_long.view()),_fm_longw(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()),_torquew(atomKK->k_torque.view()), - _nfirst(nfirst),_list(list.view()), - _datamask(datamask) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - _fw(j,0) += _f(i+_nfirst,0); - _fw(j,1) += _f(i+_nfirst,1); - _fw(j,2) += _f(i+_nfirst,2); - - if (_datamask & FM_MASK) { - _fmw(j,0) += _fm(i+_nfirst,0); - _fmw(j,1) += _fm(i+_nfirst,1); - _fmw(j,2) += _fm(i+_nfirst,2); - } - - if (_datamask & FML_MASK) { - _fm_longw(j,0) += _fm_long(i+_nfirst,0); - _fm_longw(j,1) += _fm_long(i+_nfirst,1); - _fm_longw(j,2) += _fm_long(i+_nfirst,2); - } - - if (_datamask & TORQUE_MASK) { - _torquew(j,0) += _torque(i+_nfirst,0); - _torquew(j,1) += _torque(i+_nfirst,1); - _torquew(j,2) += _torque(i+_nfirst,2); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst) { - if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,datamask_reverse); - struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,datamask_reverse); - } else { - atomKK->sync(Device,datamask_reverse); - struct AtomVecHybridKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,datamask_reverse); - } - - return n*size_reverse; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnPackReverse { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3 _torque; - typename AT::t_double_2d_lr_const _buf; - typename AT::t_int_1d_const _list; - uint64_t _datamask; - - AtomVecHybridKokkos_UnPackReverse( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const uint64_t datamask): - _f(atomKK->k_f.view()), - _fm(atomKK->k_fm.view()), - _fm_long(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()), - _list(list.view()), - _datamask(datamask) { - const size_t elements = atomKK->avecKK->size_reverse; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - const int j = _list(i); - _f(j,0) += _buf(i,m++); - _f(j,1) += _buf(i,m++); - _f(j,2) += _buf(i,m++); - - if (_datamask & FM_MASK) { - _fm(j,0) += _buf(i,m++); - _fm(j,1) += _buf(i,m++); - _fm(j,2) += _buf(i,m++); - } - - if (_datamask & FML_MASK) { - _fm_long(j,0) += _buf(i,m++); - _fm_long(j,1) += _buf(i,m++); - _fm_long(j,2) += _buf(i,m++); - } - - if (_datamask & TORQUE_MASK) { - _torque(j,0) += _buf(i,m++); - _torque(j,1) += _buf(i,m++); - _torque(j,2) += _buf(i,m++); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecHybridKokkos::unpack_reverse_kokkos(const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf) -{ - // Check whether to always run reverse communication on the host - // Choose correct reverse UnPackReverse kernel - - if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,datamask_reverse); - struct AtomVecHybridKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,datamask_reverse); - } else { - atomKK->sync(Device,datamask_reverse); - struct AtomVecHybridKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,datamask_reverse); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_kkfloat_1d_4 _mu; - const typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - double _dx,_dy,_dz; - uint64_t _datamask; - - AtomVecHybridKokkos_PackBorder( - const AtomKokkos* atomKK, - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const double &dx, const double &dy, const double &dz, - const uint64_t &datamask): - _buf(buf),_list(list), - _x(atomKK->k_x.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _molecule(atomKK->k_molecule.view()), - _q(atomKK->k_q.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - int m = 0; - if (PBC_FLAG == 0) { - _buf(i,m++) = _x(j,0); - _buf(i,m++) = _x(j,1); - _buf(i,m++) = _x(j,2); - } else { - _buf(i,m++) = _x(j,0) + _dx; - _buf(i,m++) = _x(j,1) + _dy; - _buf(i,m++) = _x(j,2) + _dz; - } - - _buf(i,m++) = d_ubuf(_tag(j)).d; - _buf(i,m++) = d_ubuf(_type(j)).d; - _buf(i,m++) = d_ubuf(_mask(j)).d; - - if (_datamask & MOLECULE_MASK) - _buf(i,m++) = d_ubuf(_molecule(j)).d; - - if (_datamask & Q_MASK) - _buf(i,m++) = _q(j); - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - _buf(i,m++) = _mu(j,3); - } - - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } - - if (_datamask & RADIUS_MASK) - _buf(i,m++) = _radius(j); - - if (_datamask & RMASS_MASK) - _buf(i,m++) = _rmass(j); - - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) - _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _buf(i,m++) = _uChem(j); - - if (_datamask & UCG_MASK) - _buf(i,m++) = _uCG(j); - - if (_datamask & UCGNEW_MASK) - _buf(i,m++) = _uCGnew(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - atomKK->sync(space,datamask_border); - - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space == Host) { - AtomVecHybridKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); - } else { - AtomVecHybridKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space == Host) { - AtomVecHybridKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); - } else { - AtomVecHybridKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - typename AT::t_kkfloat_1d _q; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - int _first; - uint64_t _datamask; - - AtomVecHybridKokkos_UnpackBorder( - const AtomKokkos* atomKK, - const typename AT::t_double_2d_lr_const &buf, - const int &first, const uint64_t &datamask): - _buf(buf), - _x(atomKK->k_x.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _molecule(atomKK->k_molecule.view()), - _q(atomKK->k_q.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - _first(first),_datamask(datamask) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - _x(i+_first,0) = _buf(i,m++); - _x(i+_first,1) = _buf(i,m++); - _x(i+_first,2) = _buf(i,m++); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; - - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - - if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,m++); - - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } - - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } - - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); - - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); - - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); - - if (_datamask & UCOND_MASK) - _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) - _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) - _uChem(i+_first) = _buf(i,m++); - - if (_datamask & UCG_MASK) - _uCG(i+_first) = _buf(i,m++); - - if (_datamask & UCGNEW_MASK) - _uCGnew(i+_first) = _buf(i,m++); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->sync(space,datamask_border); - - while (first+n >= nmax) grow(0); - - if (space == Host) { - struct AtomVecHybridKokkos_UnpackBorder - f(atomKK,buf.view_host(),first,datamask_border); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_UnpackBorder - f(atomKK,buf.view_device(),first,datamask_border); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,datamask_border); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_um _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3 _v; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d_4 _mu; - const typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - double _dx,_dy,_dz, _dvx, _dvy, _dvz; - const int _deform_groupbit; - const uint64_t _datamask; - - AtomVecHybridKokkos_PackBorderVel( - const AtomKokkos* atomKK, - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const double &dx, const double &dy, const double &dz, - const double &dvx, const double &dvy, const double &dvz, - const int &deform_groupbit, - const uint64_t &datamask): - _buf(buf),_list(list),_datamask(datamask), - _x(atomKK->k_x.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _v(atomKK->k_v.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - _dx(dx),_dy(dy),_dz(dz), - _dvx(dvx),_dvy(dvy),_dvz(dvz), - _deform_groupbit(deform_groupbit) { - const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,m++) = _x(j,0); - _buf(i,m++) = _x(j,1); - _buf(i,m++) = _x(j,2); - } else { - _buf(i,m++) = _x(j,0) + _dx; - _buf(i,m++) = _x(j,1) + _dy; - _buf(i,m++) = _x(j,2) + _dz; - } - _buf(i,m++) = d_ubuf(_tag(j)).d; - _buf(i,m++) = d_ubuf(_type(j)).d; - _buf(i,m++) = d_ubuf(_mask(j)).d; - - if (DEFORM_VREMAP) { - if (_mask(i) & _deform_groupbit) { - _buf(i,m++) = _v(j,0) + _dvx; - _buf(i,m++) = _v(j,1) + _dvy; - _buf(i,m++) = _v(j,2) + _dvz; - } - } else { - _buf(i,m++) = _v(j,0); - _buf(i,m++) = _v(j,1); - _buf(i,m++) = _v(j,2); - } - - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - _buf(i,m++) = _mu(j,3); - } - - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } - - if (_datamask & RADIUS_MASK) - _buf(i,m++) = _radius(j); - - if (_datamask & RMASS_MASK) - _buf(i,m++) = _rmass(j); - - if (_datamask & OMEGA_MASK) { - _buf(i,m++) = _omega(j,0); - _buf(i,m++) = _omega(j,1); - _buf(i,m++) = _omega(j,2); - } - - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) - _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _buf(i,m++) = _uChem(j); - - if (_datamask & UCG_MASK) - _buf(i,m++) = _uCG(j); - - if (_datamask & UCGNEW_MASK) - _buf(i,m++) = _uCGnew(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_border_vel_kokkos( - int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx = 0, dy = 0, dz = 0; - double dvx = 0, dvy = 0, dvz = 0; - - atomKK->sync(space,datamask_border_vel); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (!deform_vremap) { - if (space == Host) { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } else { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } - } - else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - if (space == Host) { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } else { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } - } - } else { - if (space == Host) { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } else { - AtomVecHybridKokkos_PackBorderVel f( - atomKK, - buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit,datamask_border_vel); - Kokkos::parallel_for(n,f); - } - } - - atomKK->modified(space,datamask_border_vel); - - return n*(size_border + size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnpackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_const_um _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - int _first; - uint64_t _datamask; - - AtomVecHybridKokkos_UnpackBorderVel( - const AtomKokkos* atomKK, - const typename AT::t_double_2d_lr_const &buf, - const int &first, - const uint64_t &datamask): - _buf(buf), - _x(atomKK->k_x.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _v(atomKK->k_v.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - _first(first),_datamask(datamask) - { - const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - int m = 0; - _x(i+_first,0) = _buf(i,m++); - _x(i+_first,1) = _buf(i,m++); - _x(i+_first,2) = _buf(i,m++); - _tag(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); - _type(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); - _mask(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); - _radius(i+_first) = _buf(i,m++); - _rmass(i+_first) = _buf(i,m++); - _v(i+_first,0) = _buf(i,m++); - _v(i+_first,1) = _buf(i,m++); - _v(i+_first,2) = _buf(i,m++); - - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } - - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } - - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); - - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); - - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } - - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); - - if (_datamask & UCOND_MASK) - _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) - _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) - _uChem(i+_first) = _buf(i,m++); - - if (_datamask & UCG_MASK) - _uCG(i+_first) = _buf(i,m++); - - if (_datamask & UCGNEW_MASK) - _uCGnew(i+_first) = _buf(i,m++); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecHybridKokkos::unpack_border_vel_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - while (first+n >= nmax) grow(0); - if (space == Host) { - struct AtomVecHybridKokkos_UnpackBorderVel f( - atomKK, - buf.view_host(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecHybridKokkos_UnpackBorderVel f( - atomKK, - buf.view_device(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,datamask_border_vel); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d_3 _omegaw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - uint64_t _datamask; - - AtomVecHybridKokkos_PackExchangeFunctor( - const AtomKokkos* atomKK, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist, - const uint64_t datamask): - _x(atomKK->k_x.view()), - _v(atomKK->k_v.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _image(atomKK->k_image.view()), - _q(atomKK->k_q.view()), - _molecule(atomKK->k_molecule.view()), - _nspecial(atomKK->k_nspecial.view()), - _special(atomKK->k_special.view()), - _num_bond(atomKK->k_num_bond.view()), - _bond_type(atomKK->k_bond_type.view()), - _bond_atom(atomKK->k_bond_atom.view()), - _num_angle(atomKK->k_num_angle.view()), - _angle_type(atomKK->k_angle_type.view()), - _angle_atom1(atomKK->k_angle_atom1.view()), - _angle_atom2(atomKK->k_angle_atom2.view()), - _angle_atom3(atomKK->k_angle_atom3.view()), - _num_dihedral(atomKK->k_num_dihedral.view()), - _dihedral_type(atomKK->k_dihedral_type.view()), - _dihedral_atom1(atomKK->k_dihedral_atom1.view()), - _dihedral_atom2(atomKK->k_dihedral_atom2.view()), - _dihedral_atom3(atomKK->k_dihedral_atom3.view()), - _dihedral_atom4(atomKK->k_dihedral_atom4.view()), - _num_improper(atomKK->k_num_improper.view()), - _improper_type(atomKK->k_improper_type.view()), - _improper_atom1(atomKK->k_improper_atom1.view()), - _improper_atom2(atomKK->k_improper_atom2.view()), - _improper_atom3(atomKK->k_improper_atom3.view()), - _improper_atom4(atomKK->k_improper_atom4.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - - _xw(atomKK->k_x.view()), - _vw(atomKK->k_v.view()), - _tagw(atomKK->k_tag.view()), - _typew(atomKK->k_type.view()), - _maskw(atomKK->k_mask.view()), - _imagew(atomKK->k_image.view()), - _qw(atomKK->k_q.view()), - _moleculew(atomKK->k_molecule.view()), - _nspecialw(atomKK->k_nspecial.view()), - _specialw(atomKK->k_special.view()), - _num_bondw(atomKK->k_num_bond.view()), - _bond_typew(atomKK->k_bond_type.view()), - _bond_atomw(atomKK->k_bond_atom.view()), - _num_anglew(atomKK->k_num_angle.view()), - _angle_typew(atomKK->k_angle_type.view()), - _angle_atom1w(atomKK->k_angle_atom1.view()), - _angle_atom2w(atomKK->k_angle_atom2.view()), - _angle_atom3w(atomKK->k_angle_atom3.view()), - _num_dihedralw(atomKK->k_num_dihedral.view()), - _dihedral_typew(atomKK->k_dihedral_type.view()), - _dihedral_atom1w(atomKK->k_dihedral_atom1.view()), - _dihedral_atom2w(atomKK->k_dihedral_atom2.view()), - _dihedral_atom3w(atomKK->k_dihedral_atom3.view()), - _dihedral_atom4w(atomKK->k_dihedral_atom4.view()), - _num_improperw(atomKK->k_num_improper.view()), - _improper_typew(atomKK->k_improper_type.view()), - _improper_atom1w(atomKK->k_improper_atom1.view()), - _improper_atom2w(atomKK->k_improper_atom2.view()), - _improper_atom3w(atomKK->k_improper_atom3.view()), - _improper_atom4w(atomKK->k_improper_atom4.view()), - _muw(atomKK->k_mu.view()), - _spw(atomKK->k_sp.view()), - _radiusw(atomKK->k_radius.view()), - _rmassw(atomKK->k_rmass.view()), - _omegaw(atomKK->k_omega.view()), - _dpdThetaw(atomKK->k_dpdTheta.view()), - _uCondw(atomKK->k_uCond.view()), - _uMechw(atomKK->k_uMech.view()), - _uChemw(atomKK->k_uChem.view()), - _uCGw(atomKK->k_uCG.view()), - _uCGneww(atomKK->k_uCGnew.view()), - - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atomKK->avecKK->size_exchange), - _datamask(datamask) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - - if (_datamask & Q_MASK) - _buf(mysend,m++) = _q(i); - - if (_datamask & MOLECULE_MASK) - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - - if (_datamask & BOND_MASK) { - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (int k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - } - - if (_datamask & ANGLE_MASK) { - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (int k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - } - - if (_datamask & DIHEDRAL_MASK) { - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (int k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; - } - } - - if (_datamask & IMPROPER_MASK) { - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (int k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; - } - } - - if (_datamask & SPECIAL_MASK) { - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (int k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - } - - if (_datamask & MU_MASK) { - _buf(mysend,m++) = _mu(i,0); - _buf(mysend,m++) = _mu(i,1); - _buf(mysend,m++) = _mu(i,2); - _buf(mysend,m++) = _mu(i,3); - } - - if (_datamask & SP_MASK) { - _buf(mysend,m++) = _sp(i,0); - _buf(mysend,m++) = _sp(i,1); - _buf(mysend,m++) = _sp(i,2); - _buf(mysend,m++) = _sp(i,3); - } - - if (_datamask & RADIUS_MASK) - _buf(mysend,m++) = _radius(i); - - if (_datamask & RMASS_MASK) - _buf(mysend,m++) = _rmass(i); - - if (_datamask & OMEGA_MASK) { - _buf(mysend,m++) = _omega(i,0); - _buf(mysend,m++) = _omega(i,1); - _buf(mysend,m++) = _omega(i,2); - } - - if (_datamask & DPDTHETA_MASK) - _buf(mysend,m++) = _dpdTheta(i); - - if (_datamask & UCOND_MASK) - _buf(mysend,m++) = _uCond(i); - - if (_datamask & UMECH_MASK) - _buf(mysend,m++) = _uMech(i); - - if (_datamask & UCHEM_MASK) - _buf(mysend,m++) = _uChem(i); - - if (_datamask & UCG_MASK) - _buf(mysend,m++) = _uCG(i); - - if (_datamask & UCGNEW_MASK) - _buf(mysend,m++) = _uCGnew(i); - - const int j = _copylist(mysend); - - if (j > -1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - - if (_datamask & Q_MASK) - _qw(i) = _q(j); - - if (_datamask & MOLECULE_MASK) - _moleculew(i) = _molecule(j); - - if (_datamask & BOND_MASK) { - _num_bondw(i) = _num_bond(j); - for (int k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - } - - if (_datamask & ANGLE_MASK) { - _num_anglew(i) = _num_angle(j); - for (int k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - } - - if (_datamask & DIHEDRAL_MASK) { - _num_dihedralw(i) = _num_dihedral(j); - for (int k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - } - - if (_datamask & IMPROPER_MASK) { - _num_improperw(i) = _num_improper(j); - for (int k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - } - - if (_datamask & SPECIAL_MASK) { - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (int k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - - if (_datamask & MU_MASK) { - _muw(i,0) = _mu(j,0); - _muw(i,1) = _mu(j,1); - _muw(i,2) = _mu(j,2); - _muw(i,3) = _mu(j,3); - } - - if (_datamask & SP_MASK) { - _spw(i,0) = _sp(j,0); - _spw(i,1) = _sp(j,1); - _spw(i,2) = _sp(j,2); - _spw(i,3) = _sp(j,3); - } - - if (_datamask & RADIUS_MASK) - _radiusw(i) = _radius(j); - - if (_datamask & RMASS_MASK) - _rmassw(i) = _rmass(j); - - if (_datamask & OMEGA_MASK) { - _omegaw(i,0) = _omega(j,0); - _omegaw(i,1) = _omega(j,1); - _omegaw(i,2) = _omega(j,2); - } - - if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i) = _dpdTheta(j); - - if (_datamask & UCOND_MASK) - _uCondw(i) = _uCond(j); - - if (_datamask & UMECH_MASK) - _uMechw(i) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _uChemw(i) = _uChem(j); - - if (_datamask & UCG_MASK) - _uCGw(i) = _uCG(j); - - if (_datamask & UCGNEW_MASK) - _uCGneww(i) = _uCGnew(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - set_size_exchange(); - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecHybridKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecHybridKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecHybridKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d _num_dihedral; - typename AT::t_int_2d _dihedral_type; - typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d _num_improper; - typename AT::t_int_2d _improper_type; - typename AT::t_tagint_2d _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - uint64_t _datamask; - - AtomVecHybridKokkos_UnpackExchangeFunctor( - const AtomKokkos* atomKK, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi, - uint64_t datamask): - _x(atomKK->k_x.view()), - _v(atomKK->k_v.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _image(atomKK->k_image.view()), - _q(atomKK->k_q.view()), - _molecule(atomKK->k_molecule.view()), - _nspecial(atomKK->k_nspecial.view()), - _special(atomKK->k_special.view()), - _num_bond(atomKK->k_num_bond.view()), - _bond_type(atomKK->k_bond_type.view()), - _bond_atom(atomKK->k_bond_atom.view()), - _num_angle(atomKK->k_num_angle.view()), - _angle_type(atomKK->k_angle_type.view()), - _angle_atom1(atomKK->k_angle_atom1.view()), - _angle_atom2(atomKK->k_angle_atom2.view()), - _angle_atom3(atomKK->k_angle_atom3.view()), - _num_dihedral(atomKK->k_num_dihedral.view()), - _dihedral_type(atomKK->k_dihedral_type.view()), - _dihedral_atom1(atomKK->k_dihedral_atom1.view()), - _dihedral_atom2(atomKK->k_dihedral_atom2.view()), - _dihedral_atom3(atomKK->k_dihedral_atom3.view()), - _dihedral_atom4(atomKK->k_dihedral_atom4.view()), - _num_improper(atomKK->k_num_improper.view()), - _improper_type(atomKK->k_improper_type.view()), - _improper_atom1(atomKK->k_improper_atom1.view()), - _improper_atom2(atomKK->k_improper_atom2.view()), - _improper_atom3(atomKK->k_improper_atom3.view()), - _improper_atom4(atomKK->k_improper_atom4.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), - - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atomKK->avecKK->size_exchange), - _datamask(datamask) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - if (_datamask & Q_MASK) - _q(i) = _buf(myrecv,m++); - - if (_datamask & MOLECULE_MASK) - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - - if (_datamask & BOND_MASK) { - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } - - if (_datamask & ANGLE_MASK) { - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } - - if (_datamask & DIHEDRAL_MASK) { - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } - - if (_datamask & IMPROPER_MASK) { - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } - - if (_datamask & SPECIAL_MASK) { - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - - if (_datamask & MU_MASK) { - _mu(i,0) = _buf(myrecv,m++); - _mu(i,1) = _buf(myrecv,m++); - _mu(i,2) = _buf(myrecv,m++); - _mu(i,3) = _buf(myrecv,m++); - } - - if (_datamask & SP_MASK) { - _sp(i,0) = _buf(myrecv,m++); - _sp(i,1) = _buf(myrecv,m++); - _sp(i,2) = _buf(myrecv,m++); - _sp(i,3) = _buf(myrecv,m++); - } - - if (_datamask & RADIUS_MASK) - _radius(i) = _buf(myrecv,m++); - - if (_datamask & RMASS_MASK) - _rmass(i) = _buf(myrecv,m++); - - if (_datamask & OMEGA_MASK) { - _omega(i,0) = _buf(myrecv,m++); - _omega(i,1) = _buf(myrecv,m++); - _omega(i,2) = _buf(myrecv,m++); - } - - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i) = _buf(myrecv,m++); - - if (_datamask & UCOND_MASK) - _uCond(i) = _buf(myrecv,m++); - - if (_datamask & UMECH_MASK) - _uMech(i) = _buf(myrecv,m++); - - if (_datamask & UCHEM_MASK) - _uChem(i) = _buf(myrecv,m++); - - if (_datamask & UCG_MASK) - _uCG(i) = _buf(myrecv,m++); - - if (_datamask & UCGNEW_MASK) - _uCGnew(i) = _buf(myrecv,m++); - } - - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecHybridKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - // TODO: move dynamic_cast into init /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 72c03810403..12f8dbbb8f6 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -35,150 +35,9 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { void grow(int) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - - int pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, - const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, - const DAT::tdual_int_1d &pbc_flag, - const DAT::tdual_int_2d &pbc, - const DAT::tdual_int_1d &g2l) override; - - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - - int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - - void unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - - int pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst) override; - - int pack_reverse_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - - void unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf) override; - - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - - int pack_border_vel_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - - void unpack_border_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; - - private: - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_kkfloat_1d_3 d_omega, d_angmom; - HAT::t_kkfloat_1d_3 h_omega, h_angmom; - - // FULL - - DAT::t_kkfloat_1d d_q; - HAT::t_kkfloat_1d h_q; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; - - DAT::t_int_1d d_num_dihedral; - DAT::t_int_2d d_dihedral_type; - DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, - d_dihedral_atom3,d_dihedral_atom4; - DAT::t_int_1d d_num_improper; - DAT::t_int_2d d_improper_type; - DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, - d_improper_atom3,d_improper_atom4; - - HAT::t_int_1d h_num_dihedral; - HAT::t_int_2d h_dihedral_type; - HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, - h_dihedral_atom3,h_dihedral_atom4; - - DAT::t_kkfloat_1d_4 d_mu; - HAT::t_kkfloat_1d_4 h_mu; - - DAT::t_kkfloat_1d_4 d_sp; - DAT::t_kkacc_1d_3 d_fm; - DAT::t_kkacc_1d_3 d_fm_long; - HAT::t_kkfloat_1d_4 h_sp; - HAT::t_kkacc_1d_3 h_fm; - HAT::t_kkacc_1d_3 h_fm_long; - - DAT::t_kkfloat_1d d_radius; - HAT::t_kkfloat_1d h_radius; - DAT::t_kkfloat_1d d_rmass; - HAT::t_kkfloat_1d h_rmass; - DAT::t_kkfloat_1d_3 d_torque; - HAT::t_kkfloat_1d_3 h_torque; - - DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; - HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 94be45e4032..a22cf55f2d2 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -33,10 +33,6 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) kokkosable = 1; buffer = nullptr; buffer_size = 0; - - no_comm_vel_flag = 0; - no_border_vel_flag = 1; - unpack_exchange_indices_flag = 0; size_exchange = 0; k_count = DAT::tdual_int_1d("atom:k_count",1); @@ -64,28 +60,43 @@ void AtomVecKokkos::setup_fields() /* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ + template struct AtomVecKokkos_PackComm { typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + uint64_t _datamask; AtomVecKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_list(list.view()), + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; @@ -93,22 +104,48 @@ struct AtomVecKokkos_PackComm { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); + const int j = _list(i); + int m = 0; + if (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } + } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); } }; @@ -124,56 +161,56 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, + struct AtomVecKokkos_PackComm f(atomKK,buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } @@ -190,44 +227,84 @@ struct AtomVecKokkos_PackCommSelf { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d_4 _spw; + typename AT::t_kkfloat_1d _radiusw,_rmassw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + uint64_t _datamask; AtomVecKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const int &nfirst, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()), + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t datamask): + _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), + _nfirst(nfirst),_list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + const int j = _list(i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; } + } + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); + } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); } }; @@ -236,67 +313,66 @@ struct AtomVecKokkos_PackCommSelf { int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst, const int &pbc_flag, const int* const pbc) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); Kokkos::parallel_for(n,f); } } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } - /* ---------------------------------------------------------------------- */ template @@ -305,7 +381,13 @@ struct AtomVecKokkos_PackCommSelfFused { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d_4 _spw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -313,9 +395,10 @@ struct AtomVecKokkos_PackCommSelfFused { typename AT::t_int_1d_const _sendnum_scan; typename AT::t_int_1d_const _g2l; double _xprd,_yprd,_zprd,_xy,_xz,_yz; + uint64_t _datamask; AtomVecKokkos_PackCommSelfFused( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_int_2d_lr &list, const typename DAT::tdual_int_2d &pbc, const typename DAT::tdual_int_1d &pbc_flag, @@ -323,8 +406,15 @@ struct AtomVecKokkos_PackCommSelfFused { const typename DAT::tdual_int_1d &sendnum_scan, const typename DAT::tdual_int_1d &g2l, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz): - _x(x.view()),_xw(x.view()), + const double &xy, const double &xz, const double &yz, + const uint64_t datamask): + _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -332,7 +422,7 @@ struct AtomVecKokkos_PackCommSelfFused { _sendnum_scan(sendnum_scan.view()), _g2l(g2l.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) {}; + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& ii) const { @@ -365,6 +455,31 @@ struct AtomVecKokkos_PackCommSelfFused { _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } } + + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } + + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); + } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); } }; @@ -374,36 +489,36 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, const DAT::tdual_int_1d &g2l) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); + domain->xy,domain->xz,domain->yz,datamask_comm); Kokkos::parallel_for(n,f); } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } /* ---------------------------------------------------------------------- */ @@ -414,24 +529,62 @@ struct AtomVecKokkos_UnpackComm { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; + uint64_t _datamask; AtomVecKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_x(x.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _first(first),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); } }; @@ -440,15 +593,15 @@ struct AtomVecKokkos_UnpackComm { void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); - struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); + atomKK->sync(HostKK,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); - struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); + atomKK->sync(Device,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } } @@ -462,32 +615,42 @@ struct AtomVecKokkos_PackCommVel { typename AT::t_kkfloat_1d_3_lr_randomread _x; typename AT::t_int_1d _mask; typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; double _h_rate[6]; const int _deform_vremap; + uint64_t _datamask; AtomVecKokkos_PackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_int_1d &mask, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, const double * const h_rate, - const int &deform_vremap): - _x(x.view()), - _mask(mask.view()), - _v(v.view()), + const int &deform_vremap, + const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap) + _deform_vremap(deform_vremap), + _datamask(datamask) { - const size_t elements = 6; + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -498,41 +661,73 @@ struct AtomVecKokkos_PackCommVel { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } if (DEFORM_VREMAP == 0) { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (_mask(i) & _deform_vremap) { - _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + _buf(i,m++) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,m++) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,m++) = _v(j,2) + _pbc[2]*_h_rate[2]; } else { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } } } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); } }; @@ -546,126 +741,126 @@ int AtomVecKokkos::pack_comm_vel_kokkos( const int* const pbc) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); + atomKK->sync(HostKK,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { - atomKK->sync(Device,X_MASK|V_MASK); + atomKK->sync(Device,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } - return n*6; + return n*(size_forward + size_velocity); } /* ---------------------------------------------------------------------- */ @@ -677,31 +872,74 @@ struct AtomVecKokkos_UnpackCommVel { typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; + uint64_t _datamask; AtomVecKokkos_UnpackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _v(v.view()), - _first(first) + const int &first, const int &datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _first(first),_datamask(datamask) { - const size_t elements = 6; + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _v(i+_first,0) = _buf(i,3); - _v(i+_first,1) = _buf(i,4); - _v(i+_first,2) = _buf(i,5); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); } }; @@ -710,15 +948,15 @@ struct AtomVecKokkos_UnpackCommVel { void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); - struct AtomVecKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + atomKK->sync(HostKK,datamask_comm_vel); + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK|V_MASK); + atomKK->modified(HostKK,datamask_comm_vel); } else { - atomKK->sync(Device,X_MASK|V_MASK); - struct AtomVecKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); + atomKK->sync(Device,datamask_comm_vel); + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK|V_MASK); + atomKK->modified(Device,datamask_comm_vel); } } @@ -729,25 +967,50 @@ struct AtomVecKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; + typename AT::t_kkfloat_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; + uint64_t _datamask; AtomVecKokkos_PackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_f(f.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const uint64_t &datamask): + _f(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _torque(atomKK->k_torque.view()), + _first(first),_datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _buf(i,0) = _f(i+_first,0); - _buf(i,1) = _f(i+_first,1); - _buf(i,2) = _f(i+_first,2); + int m = 0; + _buf(i,m++) = _f(i+_first,0); + _buf(i,m++) = _f(i+_first,1); + _buf(i,m++) = _f(i+_first,2); + + if (_datamask & FM_MASK) { + _buf(i,m++) = _fm(i+_first,0); + _buf(i,m++) = _fm(i+_first,1); + _buf(i,m++) = _fm(i+_first,2); + } + + if (_datamask & FML_MASK) { + _buf(i,m++) = _fm_long(i+_first,0); + _buf(i,m++) = _fm_long(i+_first,1); + _buf(i,m++) = _fm_long(i+_first,2); + } + + if (_datamask & TORQUE_MASK) { + _buf(i,m++) = _torque(i+_first,0); + _buf(i,m++) = _torque(i+_first,1); + _buf(i,m++) = _torque(i+_first,2); + } } }; @@ -756,12 +1019,12 @@ struct AtomVecKokkos_PackReverse { int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); Kokkos::parallel_for(n,f); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); + atomKK->sync(Device,datamask_reverse); + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); Kokkos::parallel_for(n,f); } @@ -775,17 +1038,25 @@ struct AtomVecKokkos_UnPackReverseSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; - typename AT::t_kkacc_1d_3 _fw; - int _nfirst; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; + typename AT::t_kkfloat_1d_3_randomread _torque; + typename AT::t_kkacc_1d_3 _fw,_fmw,_fm_longw; + typename AT::t_kkfloat_1d_3 _torquew; typename AT::t_int_1d_const _list; + int _nfirst; + uint64_t _datamask; AtomVecKokkos_UnPackReverseSelf( - const typename DAT::ttransform_kkacc_1d_3 &f, - const int &nfirst, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_fw(f.view()),_nfirst(nfirst),_list(list.view()) { - }; + const AtomKokkos* atomKK, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const uint64_t &datamask): + _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()),_fmw(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()),_fm_longw(atomKK->k_fm_long.view()), + _torque(atomKK->k_torque.view()),_torquew(atomKK->k_torque.view()), + _nfirst(nfirst),_list(list.view()), + _datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { @@ -793,6 +1064,24 @@ struct AtomVecKokkos_UnPackReverseSelf { _fw(j,0) += _f(i+_nfirst,0); _fw(j,1) += _f(i+_nfirst,1); _fw(j,2) += _f(i+_nfirst,2); + + if (_datamask & FM_MASK) { + _fmw(j,0) += _fm(i+_nfirst,0); + _fmw(j,1) += _fm(i+_nfirst,1); + _fmw(j,2) += _fm(i+_nfirst,2); + } + + if (_datamask & FML_MASK) { + _fm_longw(j,0) += _fm_long(i+_nfirst,0); + _fm_longw(j,1) += _fm_long(i+_nfirst,1); + _fm_longw(j,2) += _fm_long(i+_nfirst,2); + } + + if (_datamask & TORQUE_MASK) { + _torquew(j,0) += _torque(i+_nfirst,0); + _torquew(j,1) += _torque(i+_nfirst,1); + _torquew(j,2) += _torque(i+_nfirst,2); + } } }; @@ -801,18 +1090,18 @@ struct AtomVecKokkos_UnPackReverseSelf { int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); + atomKK->modified(HostKK,datamask_reverse); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); + atomKK->sync(Device,datamask_reverse); + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); + atomKK->modified(Device,datamask_reverse); } - return n*3; + return n*size_reverse; } /* ---------------------------------------------------------------------- */ @@ -822,26 +1111,53 @@ struct AtomVecKokkos_UnPackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3 _f; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; + typename AT::t_kkfloat_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; + uint64_t _datamask; AtomVecKokkos_UnPackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_list(list.view()) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const uint64_t datamask): + _f(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _torque(atomKK->k_torque.view()), + _list(list.view()), + _datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); - }; + }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); - _f(j,0) += _buf(i,0); - _f(j,1) += _buf(i,1); - _f(j,2) += _buf(i,2); + _f(j,0) += _buf(i,m++); + _f(j,1) += _buf(i,m++); + _f(j,2) += _buf(i,m++); + + if (_datamask & FM_MASK) { + _fm(j,0) += _buf(i,m++); + _fm(j,1) += _buf(i,m++); + _fm(j,2) += _buf(i,m++); + } + + if (_datamask & FML_MASK) { + _fm_long(j,0) += _buf(i,m++); + _fm_long(j,1) += _buf(i,m++); + _fm_long(j,2) += _buf(i,m++); + } + + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _buf(i,m++); + _torque(j,1) += _buf(i,m++); + _torque(j,2) += _buf(i,m++); + } } }; @@ -855,16 +1171,1343 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, // Choose correct reverse UnPackReverse kernel if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list); + atomKK->sync(HostKK,datamask_reverse); + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + atomKK->modified(HostKK,datamask_reverse); + } else { + atomKK->sync(Device,datamask_reverse); + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + atomKK->modified(Device,datamask_reverse); + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_tagint_1d _molecule; + const typename AT::t_kkfloat_1d _q; + const typename AT::t_kkfloat_1d_4 _mu; + const typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + double _dx,_dy,_dz; + uint64_t _datamask; + + AtomVecKokkos_PackBorder( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const uint64_t &datamask): + _buf(buf),_list(list), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + int m = 0; + if (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + } + + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; + + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); + + if (_datamask & UCG_MASK) + _buf(i,m++) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _buf(i,m++) = _uCGnew(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + atomKK->sync(space,datamask_border); + + double dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (space == Host) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + + } else { + dx = dy = dz = 0; + if (space == Host) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + const typename AT::t_double_2d_lr_const _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + typename AT::t_kkfloat_1d _q; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + uint64_t _datamask; + + AtomVecKokkos_UnpackBorder( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, const uint64_t &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _first(first),_datamask(datamask) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); + + if (_datamask & UCG_MASK) + _uCG(i+_first) = _buf(i,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i+_first) = _buf(i,m++); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space) { + atomKK->sync(space,datamask_border); + + while (first+n >= nmax) grow(0); + + if (space == Host) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list); + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); } + + atomKK->modified(space,datamask_border); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_um _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3 _v; + const typename AT::t_tagint_1d _tag; + const typename AT::t_int_1d _type; + const typename AT::t_int_1d _mask; + const typename AT::t_kkfloat_1d_4 _mu; + const typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + double _dx,_dy,_dz, _dvx, _dvy, _dvz; + const int _deform_groupbit; + const uint64_t _datamask; + + AtomVecKokkos_PackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const double &dvx, const double &dvy, const double &dvz, + const int &deform_groupbit, + const uint64_t &datamask): + _buf(buf),_list(list),_datamask(datamask), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _dx(dx),_dy(dy),_dz(dz), + _dvx(dvx),_dvy(dvy),_dvz(dvz), + _deform_groupbit(deform_groupbit) { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + const int j = _list(i); + if (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + } + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if (DEFORM_VREMAP) { + if (_mask(i) & _deform_groupbit) { + _buf(i,m++) = _v(j,0) + _dvx; + _buf(i,m++) = _v(j,1) + _dvy; + _buf(i,m++) = _v(j,2) + _dvz; + } + } else { + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); + } + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } + + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); + + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); + + if (_datamask & UCG_MASK) + _buf(i,m++) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _buf(i,m++) = _uCGnew(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_border_vel_kokkos( + int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + double dx = 0, dy = 0, dz = 0; + double dvx = 0, dvy = 0, dvz = 0; + + atomKK->sync(space,datamask_border_vel); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + if (space == Host) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + if (space == Host) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + } else { + if (space == Host) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + + atomKK->modified(space,datamask_border_vel); + + return n*(size_border + size_velocity); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_const_um _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + uint64_t _datamask; + + AtomVecKokkos_UnpackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, + const uint64_t &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _first(first),_datamask(datamask) + { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _type(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _mask(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _radius(i+_first) = _buf(i,m++); + _rmass(i+_first) = _buf(i,m++); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); + + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); + + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); + + if (_datamask & UCG_MASK) + _uCG(i+_first) = _buf(i,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i+_first) = _buf(i,m++); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_border_vel_kokkos( + const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { + while (first+n >= nmax) grow(0); + if (space == Host) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + + atomKK->modified(space,datamask_border_vel); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_kkfloat_1d_randomread _q; + typename AT::t_tagint_1d_randomread _molecule; + typename AT::t_int_2d_randomread _nspecial; + typename AT::t_tagint_2d_randomread _special; + typename AT::t_int_1d_randomread _num_bond; + typename AT::t_int_2d_randomread _bond_type; + typename AT::t_tagint_2d_randomread _bond_atom; + typename AT::t_int_1d_randomread _num_angle; + typename AT::t_int_2d_randomread _angle_type; + typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d_randomread _num_dihedral; + typename AT::t_int_2d_randomread _dihedral_type; + typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d_randomread _num_improper; + typename AT::t_int_2d_randomread _improper_type; + typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + + typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_3 _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_kkfloat_1d _qw; + typename AT::t_tagint_1d _moleculew; + typename AT::t_int_2d _nspecialw; + typename AT::t_tagint_2d _specialw; + typename AT::t_int_1d _num_bondw; + typename AT::t_int_2d _bond_typew; + typename AT::t_tagint_2d _bond_atomw; + typename AT::t_int_1d _num_anglew; + typename AT::t_int_2d _angle_typew; + typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; + typename AT::t_int_1d _num_dihedralw; + typename AT::t_int_2d _dihedral_typew; + typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, + _dihedral_atom3w,_dihedral_atom4w; + typename AT::t_int_1d _num_improperw; + typename AT::t_int_2d _improper_typew; + typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, + _improper_atom3w,_improper_atom4w; + typename AT::t_kkfloat_1d_4 _muw; + typename AT::t_kkfloat_1d_4 _spw; + typename AT::t_kkfloat_1d _radiusw,_rmassw; + typename AT::t_kkfloat_1d_3 _omegaw; + typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; + + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _size_exchange; + uint64_t _datamask; + + AtomVecKokkos_PackExchangeFunctor( + const AtomKokkos* atomKK, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d sendlist, + DAT::tdual_int_1d copylist, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + + _xw(atomKK->k_x.view()), + _vw(atomKK->k_v.view()), + _tagw(atomKK->k_tag.view()), + _typew(atomKK->k_type.view()), + _maskw(atomKK->k_mask.view()), + _imagew(atomKK->k_image.view()), + _qw(atomKK->k_q.view()), + _moleculew(atomKK->k_molecule.view()), + _nspecialw(atomKK->k_nspecial.view()), + _specialw(atomKK->k_special.view()), + _num_bondw(atomKK->k_num_bond.view()), + _bond_typew(atomKK->k_bond_type.view()), + _bond_atomw(atomKK->k_bond_atom.view()), + _num_anglew(atomKK->k_num_angle.view()), + _angle_typew(atomKK->k_angle_type.view()), + _angle_atom1w(atomKK->k_angle_atom1.view()), + _angle_atom2w(atomKK->k_angle_atom2.view()), + _angle_atom3w(atomKK->k_angle_atom3.view()), + _num_dihedralw(atomKK->k_num_dihedral.view()), + _dihedral_typew(atomKK->k_dihedral_type.view()), + _dihedral_atom1w(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2w(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3w(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4w(atomKK->k_dihedral_atom4.view()), + _num_improperw(atomKK->k_num_improper.view()), + _improper_typew(atomKK->k_improper_type.view()), + _improper_atom1w(atomKK->k_improper_atom1.view()), + _improper_atom2w(atomKK->k_improper_atom2.view()), + _improper_atom3w(atomKK->k_improper_atom3.view()), + _improper_atom4w(atomKK->k_improper_atom4.view()), + _muw(atomKK->k_mu.view()), + _spw(atomKK->k_sp.view()), + _radiusw(atomKK->k_radius.view()), + _rmassw(atomKK->k_rmass.view()), + _omegaw(atomKK->k_omega.view()), + _dpdThetaw(atomKK->k_dpdTheta.view()), + _uCondw(atomKK->k_uCond.view()), + _uMechw(atomKK->k_uMech.view()), + _uChemw(atomKK->k_uChem.view()), + _uCGw(atomKK->k_uCG.view()), + _uCGneww(atomKK->k_uCGnew.view()), + + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _size_exchange(atomKK->avecKK->size_exchange), + _datamask(datamask) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = _size_exchange; + int m = 1; + + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + + if (_datamask & Q_MASK) + _buf(mysend,m++) = _q(i); + + if (_datamask & MOLECULE_MASK) + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + + if (_datamask & BOND_MASK) { + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (int k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } + } + + if (_datamask & ANGLE_MASK) { + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (int k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } + } + + if (_datamask & DIHEDRAL_MASK) { + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (int k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } + } + + if (_datamask & IMPROPER_MASK) { + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (int k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } + } + + if (_datamask & SPECIAL_MASK) { + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (int k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + } + + if (_datamask & MU_MASK) { + _buf(mysend,m++) = _mu(i,0); + _buf(mysend,m++) = _mu(i,1); + _buf(mysend,m++) = _mu(i,2); + _buf(mysend,m++) = _mu(i,3); + } + + if (_datamask & SP_MASK) { + _buf(mysend,m++) = _sp(i,0); + _buf(mysend,m++) = _sp(i,1); + _buf(mysend,m++) = _sp(i,2); + _buf(mysend,m++) = _sp(i,3); + } + + if (_datamask & RADIUS_MASK) + _buf(mysend,m++) = _radius(i); + + if (_datamask & RMASS_MASK) + _buf(mysend,m++) = _rmass(i); + + if (_datamask & OMEGA_MASK) { + _buf(mysend,m++) = _omega(i,0); + _buf(mysend,m++) = _omega(i,1); + _buf(mysend,m++) = _omega(i,2); + } + + if (_datamask & DPDTHETA_MASK) + _buf(mysend,m++) = _dpdTheta(i); + + if (_datamask & UCOND_MASK) + _buf(mysend,m++) = _uCond(i); + + if (_datamask & UMECH_MASK) + _buf(mysend,m++) = _uMech(i); + + if (_datamask & UCHEM_MASK) + _buf(mysend,m++) = _uChem(i); + + if (_datamask & UCG_MASK) + _buf(mysend,m++) = _uCG(i); + + if (_datamask & UCGNEW_MASK) + _buf(mysend,m++) = _uCGnew(i); + + const int j = _copylist(mysend); + + if (j > -1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw(i) = _tag(j); + _typew(i) = _type(j); + _maskw(i) = _mask(j); + _imagew(i) = _image(j); + + if (_datamask & Q_MASK) + _qw(i) = _q(j); + + if (_datamask & MOLECULE_MASK) + _moleculew(i) = _molecule(j); + + if (_datamask & BOND_MASK) { + _num_bondw(i) = _num_bond(j); + for (int k = 0; k < _num_bond(j); k++) { + _bond_typew(i,k) = _bond_type(j,k); + _bond_atomw(i,k) = _bond_atom(j,k); + } + } + + if (_datamask & ANGLE_MASK) { + _num_anglew(i) = _num_angle(j); + for (int k = 0; k < _num_angle(j); k++) { + _angle_typew(i,k) = _angle_type(j,k); + _angle_atom1w(i,k) = _angle_atom1(j,k); + _angle_atom2w(i,k) = _angle_atom2(j,k); + _angle_atom3w(i,k) = _angle_atom3(j,k); + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedralw(i) = _num_dihedral(j); + for (int k = 0; k < _num_dihedral(j); k++) { + _dihedral_typew(i,k) = _dihedral_type(j,k); + _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improperw(i) = _num_improper(j); + for (int k = 0; k < _num_improper(j); k++) { + _improper_typew(i,k) = _improper_type(j,k); + _improper_atom1w(i,k) = _improper_atom1(j,k); + _improper_atom2w(i,k) = _improper_atom2(j,k); + _improper_atom3w(i,k) = _improper_atom3(j,k); + _improper_atom4w(i,k) = _improper_atom4(j,k); + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecialw(i,0) = _nspecial(j,0); + _nspecialw(i,1) = _nspecial(j,1); + _nspecialw(i,2) = _nspecial(j,2); + for (int k = 0; k < _nspecial(j,2); k++) + _specialw(i,k) = _special(j,k); + } + + if (_datamask & MU_MASK) { + _muw(i,0) = _mu(j,0); + _muw(i,1) = _mu(j,1); + _muw(i,2) = _mu(j,2); + _muw(i,3) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _spw(i,0) = _sp(j,0); + _spw(i,1) = _sp(j,1); + _spw(i,2) = _sp(j,2); + _spw(i,3) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _radiusw(i) = _radius(j); + + if (_datamask & RMASS_MASK) + _rmassw(i) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _omegaw(i,0) = _omega(j,0); + _omegaw(i,1) = _omega(j,1); + _omegaw(i,2) = _omega(j,2); + } + + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i) = _dpdTheta(j); + + if (_datamask & UCOND_MASK) + _uCondw(i) = _uCond(j); + + if (_datamask & UMECH_MASK) + _uMechw(i) = _uMech(j); + + if (_datamask & UCHEM_MASK) + _uChemw(i) = _uChem(j); + + if (_datamask & UCG_MASK) + _uCGw(i) = _uCG(j); + + if (_datamask & UCGNEW_MASK) + _uCGneww(i) = _uCGnew(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + set_size_exchange(); + + if (nsend > (int) (k_buf.view_host().extent(0)* + k_buf.view_host().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; + k_buf.resize(newsize,k_buf.view_host().extent(1)); + } + if (space == HostKK) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + return nsend*size_exchange; + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + return nsend*size_exchange; + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; + int _dim; + double _lo,_hi; + int _size_exchange; + uint64_t _datamask; + + AtomVecKokkos_UnpackExchangeFunctor( + const AtomKokkos* atomKK, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d nlocal, + DAT::tdual_int_1d indices, + int dim, double lo, double hi, + uint64_t datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atomKK->avecKK->size_exchange), + _datamask(datamask) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + double x = _buf(myrecv,_dim+1); + int i = -1; + if (x >= _lo && x < _hi) { + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & Q_MASK) + _q(i) = _buf(myrecv,m++); + + if (_datamask & MOLECULE_MASK) + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & BOND_MASK) { + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & ANGLE_MASK) { + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + + if (_datamask & MU_MASK) { + _mu(i,0) = _buf(myrecv,m++); + _mu(i,1) = _buf(myrecv,m++); + _mu(i,2) = _buf(myrecv,m++); + _mu(i,3) = _buf(myrecv,m++); + } + + if (_datamask & SP_MASK) { + _sp(i,0) = _buf(myrecv,m++); + _sp(i,1) = _buf(myrecv,m++); + _sp(i,2) = _buf(myrecv,m++); + _sp(i,3) = _buf(myrecv,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i) = _buf(myrecv,m++); + + if (_datamask & RMASS_MASK) + _rmass(i) = _buf(myrecv,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _buf(myrecv,m++); + _omega(i,1) = _buf(myrecv,m++); + _omega(i,2) = _buf(myrecv,m++); + } + + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i) = _buf(myrecv,m++); + + if (_datamask & UCOND_MASK) + _uCond(i) = _buf(myrecv,m++); + + if (_datamask & UMECH_MASK) + _uMech(i) = _buf(myrecv,m++); + + if (_datamask & UCHEM_MASK) + _uChem(i) = _buf(myrecv,m++); + + if (_datamask & UCG_MASK) + _uCG(i) = _buf(myrecv,m++); + + if (_datamask & UCGNEW_MASK) + _uCGnew(i) = _buf(myrecv,m++); + } + + if (OUTPUT_INDICES) + _indices(myrecv) = i; + } +}; + +/* ---------------------------------------------------------------------- */ +int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, + int dim, double lo, double hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); + + if (space == HostKK) { + if (k_indices.view_host().data()) { + k_count.view_host()(0) = nlocal; + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + k_count.view_host()(0) = nlocal; + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } else { + if (k_indices.view_host().data()) { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify_device(); + k_count.sync_host(); + } else { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + k_count.modify_device(); + k_count.sync_host(); + } + } + + return k_count.view_host()(0); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 8d6e829f01a..3e6d21d3169 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -38,90 +38,148 @@ class AtomVecKokkos : virtual public AtomVec { virtual void modified(ExecutionSpace space, uint64_t mask) = 0; virtual void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) = 0; - virtual int - pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]); - - virtual int - pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, - const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, - const DAT::tdual_int_1d &pbc_flag, - const DAT::tdual_int_2d &pbc, - const DAT::tdual_int_1d &g2l); - - virtual int - pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, + int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst, const int &pbc_flag, const int pbc[]); - virtual void - unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + int pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l); - virtual int - pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, const int pbc[]); - virtual void - unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf); - virtual int - pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst); + int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, const int pbc[]); - virtual int - pack_reverse_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + void unpack_comm_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf); - virtual void - unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, + int pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst); + + int pack_reverse_kokkos(const int &n, const int &nfirst, const DAT::tdual_double_2d_lr &buf); - virtual int - pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) = 0; + void unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf); - virtual void - unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) = 0; + int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space); - virtual int - pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_double_2d_lr /*buf*/, - int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/) { return 0; } + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space); + + int pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, + DAT::tdual_double_2d_lr /*buf*/, + int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/); + + void unpack_border_vel_kokkos(const int &/*n*/, const int & /*nfirst*/, + const DAT::tdual_double_2d_lr & /*buf*/, + ExecutionSpace /*space*/); + + int pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space); + + int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, + int nlocal, int dim, double lo, double hi, + ExecutionSpace space, + DAT::tdual_int_1d &k_indices); - virtual void - unpack_border_vel_kokkos(const int &/*n*/, const int & /*nfirst*/, - const DAT::tdual_double_2d_lr & /*buf*/, - ExecutionSpace /*space*/) {} - - virtual int - pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) = 0; - - virtual int - unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) = 0; - - int no_comm_vel_flag,no_border_vel_flag; - int unpack_exchange_indices_flag; int size_exchange; protected: + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_kkfloat_1d_3_lr d_x; + DAT::t_kkfloat_1d_3 d_v; + DAT::t_kkacc_1d_3 d_f; HAT::t_kkfloat_1d_3_lr h_x; HAT::t_kkfloat_1d_3 h_v; HAT::t_kkacc_1d_3 h_f; + DAT::t_kkfloat_1d_3 d_omega, d_angmom; + HAT::t_kkfloat_1d_3 h_omega, h_angmom; + + // FULL + + DAT::t_kkfloat_1d d_q; + HAT::t_kkfloat_1d h_q; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::t_int_1d d_num_dihedral; + DAT::t_int_2d d_dihedral_type; + DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, + d_dihedral_atom3,d_dihedral_atom4; + DAT::t_int_1d d_num_improper; + DAT::t_int_2d d_improper_type; + DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, + d_improper_atom3,d_improper_atom4; + + HAT::t_int_1d h_num_dihedral; + HAT::t_int_2d h_dihedral_type; + HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, + h_dihedral_atom3,h_dihedral_atom4; + + DAT::t_kkfloat_1d_4 d_mu; + HAT::t_kkfloat_1d_4 h_mu; + + DAT::t_kkfloat_1d_4 d_sp; + DAT::t_kkacc_1d_3 d_fm; + DAT::t_kkacc_1d_3 d_fm_long; + HAT::t_kkfloat_1d_4 h_sp; + HAT::t_kkacc_1d_3 h_fm; + HAT::t_kkacc_1d_3 h_fm_long; + + DAT::t_kkfloat_1d d_radius; + HAT::t_kkfloat_1d h_radius; + DAT::t_kkfloat_1d d_rmass; + HAT::t_kkfloat_1d h_rmass; + DAT::t_kkfloat_1d_3 d_torque; + HAT::t_kkfloat_1d_3 h_torque; + + DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; + HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; + size_t buffer_size; void* buffer; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 8ed3346845f..cfaedf3804c 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -30,7 +30,6 @@ using namespace LAMMPS_NS; AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecMolecular(lmp) { - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -260,615 +259,6 @@ void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort &So /* ---------------------------------------------------------------------- */ -template -struct AtomVecMolecularKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecMolecularKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecMolecularKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecMolecularKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecMolecularKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecMolecularKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecMolecularKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecMolecularKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecMolecularKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecMolecularKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; - } - - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _num_dihedralw(i) = _num_dihedral(j); - for (k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - _num_improperw(i) = _num_improper(j); - for (k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 to store buffer length - - size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d _num_dihedral; - typename AT::t_int_2d _dihedral_type; - typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d _num_improper; - typename AT::t_int_2d _improper_type; - typename AT::t_tagint_2d _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecMolecularKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_dihedral(i) = d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < (int) _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecMolecularKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index b4c6e5cd13f..00c6b625e14 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -35,21 +35,6 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; @@ -61,61 +46,6 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { tagint **angle_atom1,**angle_atom2,**angle_atom3; tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; - - DAT::t_int_1d d_num_dihedral; - DAT::t_int_2d d_dihedral_type; - DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, - d_dihedral_atom3,d_dihedral_atom4; - DAT::t_int_1d d_num_improper; - DAT::t_int_2d d_improper_type; - DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, - d_improper_atom3,d_improper_atom4; - - HAT::t_int_1d h_num_dihedral; - HAT::t_int_2d h_dihedral_type; - HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, - h_dihedral_atom3,h_dihedral_atom4; - HAT::t_int_1d h_num_improper; - HAT::t_int_2d h_improper_type; - HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, - h_improper_atom3,h_improper_atom4; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 9e1d1bf3bd8..ae8d26de020 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -33,8 +33,6 @@ using namespace MathConst; AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecSphere(lmp) { - no_border_vel_flag = 0; - unpack_exchange_indices_flag = 1; } /* ---------------------------------------------------------------------- @@ -143,1516 +141,6 @@ void AtomVecSphereKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecSphereKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSphereKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t elements = 5; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.view().data(),maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _buf(i,3) = _radius(j); - _buf(i,4) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_kokkos( - const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) - return AtomVecKokkos::pack_comm_kokkos(n,list,buf,pbc_flag,pbc); - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - double _h_rate[6]; - const int _deform_vremap; - - AtomVecSphereKokkos_PackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_int_1d &mask, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::ttransform_kkfloat_1d_3 &v, - const typename DAT::ttransform_kkfloat_1d_3 &omega, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const double * const h_rate, - const int &deform_vremap): - _x(x.view()), - _mask(mask.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _v(v.view()), - _omega(omega.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap) - { - const size_t elements = 9 + 2 * RADVARY; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.view().data(),maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; - _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - if (DEFORM_VREMAP == 0) { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); - } else { - if (_mask(i) & _deform_vremap) { - _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; - } else { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); - } - } - _buf(i,6) = _omega(j,0); - _buf(i,7) = _omega(j,1); - _buf(i,8) = _omega(j,2); - if (RADVARY) { - _buf(i,9) = _radius(j); - _buf(i,10) = _rmass(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_vel_kokkos( - const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } - return n*(size_forward+size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackCommSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d _radius,_rmass; - int _nfirst; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSphereKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _radius(i+_nfirst) = _radius(j); - _rmass(i+_nfirst) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_self( - const int &n, const DAT::tdual_int_1d &list, - const int nfirst, const int &pbc_flag, const int* const pbc) { - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) - return AtomVecKokkos::pack_comm_self(n,list,nfirst,pbc_flag,pbc); - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_double_2d_lr_const_um _buf; - int _first; - - AtomVecSphereKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _first(first) - { - const size_t elements = 5; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.view().data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _radius(i+_first) = _buf(i,3); - _rmass(i+_first) = _buf(i,4); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_comm_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) { - AtomVecKokkos::unpack_comm_kokkos(n,first,buf); - return; - } - if (lmp->kokkos->forward_comm_on_host) { - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - struct AtomVecSphereKokkos_UnpackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,first); - Kokkos::parallel_for(n,f); - } else { - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - struct AtomVecSphereKokkos_UnpackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - typename AT::t_double_2d_lr_const _buf; - int _first; - - AtomVecSphereKokkos_UnpackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::ttransform_kkfloat_1d_3 &v, - const typename DAT::ttransform_kkfloat_1d_3 &omega, - const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _v(v.view()), - _omega(omega.view()), - _first(first) - { - const size_t elements = 9 + 2 * RADVARY; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _v(i+_first,0) = _buf(i,3); - _v(i+_first,1) = _buf(i,4); - _v(i+_first,2) = _buf(i,5); - _omega(i+_first,0) = _buf(i,6); - _omega(i+_first,1) = _buf(i,7); - _omega(i+_first,2) = _buf(i,8); - if (RADVARY) { - _radius(i+_first) = _buf(i,9); - _rmass(i+_first) = _buf(i,10); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_comm_vel_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (radvary == 0) { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } - } else { - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (radvary == 0) { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_um _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - double _dx,_dy,_dz; - - AtomVecSphereKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _dx(dx),_dy(dy),_dz(dz) - { - const size_t elements = 8; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _radius(j); - _buf(i,7) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_border_kokkos( - int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecSphereKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecSphereKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_um _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - double _dx,_dy,_dz, _dvx, _dvy, _dvz; - const int _deform_groupbit; - - AtomVecSphereKokkos_PackBorderVel( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const typename AT::t_kkfloat_1d_3 &v, - const typename AT::t_kkfloat_1d_3 &omega, - const double &dx, const double &dy, const double &dz, - const double &dvx, const double &dvy, const double &dvz, - const int &deform_groupbit): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _v(v), _omega(omega), - _dx(dx),_dy(dy),_dz(dz), - _dvx(dvx),_dvy(dvy),_dvz(dvz), - _deform_groupbit(deform_groupbit) - { - const size_t elements = 14; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _radius(j); - _buf(i,7) = _rmass(j); - if (DEFORM_VREMAP) { - if (_mask(i) & _deform_groupbit) { - _buf(i,8) = _v(j,0) + _dvx; - _buf(i,9) = _v(j,1) + _dvy; - _buf(i,10) = _v(j,2) + _dvz; - } - } - else { - _buf(i,8) = _v(j,0); - _buf(i,9) = _v(j,1); - _buf(i,10) = _v(j,2); - } - _buf(i,11) = _omega(j,0); - _buf(i,12) = _omega(j,1); - _buf(i,13) = _omega(j,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_border_vel_kokkos( - int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx=0,dy=0,dz=0; - double dvx=0,dvy=0,dvz=0; - - // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (!deform_vremap) { - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - } else { - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - - return n*(size_border + size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_const_um _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - int _first; - - AtomVecSphereKokkos_UnpackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _first(first) - { - const size_t elements = 8; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (d_ubuf(_buf(i,3)).i); - _type(i+_first) = static_cast (d_ubuf(_buf(i,4)).i); - _mask(i+_first) = static_cast (d_ubuf(_buf(i,5)).i); - _radius(i+_first) = _buf(i,6); - _rmass(i+_first) = _buf(i,7); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecSphereKokkos_UnpackBorder f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackBorder f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - RADIUS_MASK|RMASS_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_const_um _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_3 _omega; - int _first; - - AtomVecSphereKokkos_UnpackBorderVel( - const typename AT::t_double_2d_lr_const &buf, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const typename AT::t_kkfloat_1d_3 &v, - const typename AT::t_kkfloat_1d_3 &omega, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _v(v), _omega(omega), - _first(first) - { - const size_t elements = 14; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (d_ubuf(_buf(i,3)).i); - _type(i+_first) = static_cast (d_ubuf(_buf(i,4)).i); - _mask(i+_first) = static_cast (d_ubuf(_buf(i,5)).i); - _radius(i+_first) = _buf(i,6); - _rmass(i+_first) = _buf(i,7); - _v(i+_first,0) = _buf(i,8); - _v(i+_first,1) = _buf(i,9); - _v(i+_first,2) = _buf(i,10); - _omega(i+_first,0) = _buf(i,11); - _omega(i+_first,1) = _buf(i,12); - _omega(i+_first,2) = _buf(i,13); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_border_vel_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecSphereKokkos_UnpackBorderVel f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackBorderVel f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _radius,_rmass; - typename AT::t_kkfloat_1d_3_randomread _omega; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d_3 _omegaw; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecSphereKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _radiusw(atom->k_radius.view()), - _rmassw(atom->k_rmass.view()), - _omegaw(atom->k_omega.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - _buf = typename AT::t_double_2d_lr_um(buf.template view().data(),maxsend,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _radius[i]; - _buf(mysend,12) = _rmass[i]; - _buf(mysend,13) = _omega(i,0); - _buf(mysend,14) = _omega(i,1); - _buf(mysend,15) = _omega(i,2); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - _radiusw[i] = _radius(j); - _rmassw[i] = _rmass(j); - _omegaw(i,0) = _omega(j,0); - _omegaw(i,1) = _omega(j,1); - _omegaw(i,2) = _omega(j,2); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_exchange_kokkos( - const int &nsend, - DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 16; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*17/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | - OMEGA_MASK); - - if (space == HostKK) { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } else { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } - return nsend*size_exchange; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _radius; - typename AT::t_kkfloat_1d _rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecSphereKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const size_t size_exchange = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/size_exchange; - - buffer_view(_buf,buf,maxsendlist,size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _radius[i] = _buf(myrecv,11); - _rmass[i] = _buf(myrecv,12); - _omega(i,0) = _buf(myrecv,13); - _omega(i,1) = _buf(myrecv,14); - _omega(i,2) = _buf(myrecv,15); - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - if (k_indices.view_host().data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - if (k_indices.view_host().data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - k_count.modify_device(); - k_count.sync_host(); - } - - atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | - OMEGA_MASK); - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - void AtomVecSphereKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index dce220f5a5e..9381e68144a 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -36,65 +36,12 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_border_vel_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: double **torque; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - DAT::t_kkfloat_1d d_radius; - HAT::t_kkfloat_1d h_radius; - DAT::t_kkfloat_1d d_rmass; - HAT::t_kkfloat_1d h_rmass; - DAT::t_kkfloat_1d_3 d_omega; - HAT::t_kkfloat_1d_3 h_omega; - DAT::t_kkfloat_1d_3 d_torque; - HAT::t_kkfloat_1d_3 h_torque; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 101a48be94d..eebbef84e1d 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -41,7 +41,6 @@ static constexpr int DELTA = 10; AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecSpin(lmp) { - } /* ---------------------------------------------------------------------- @@ -147,442 +146,6 @@ void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK); } -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSpinKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_4 &sp, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_sp(sp.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 7; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d_4_randomread _sp; - double _dx,_dy,_dz; - - AtomVecSpinKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d_4 &sp, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_4 _sp; - int _first; - - - AtomVecSpinKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d_4 &sp, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){ - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _sp(i+_first,0) = _buf(i,6); - _sp(i+_first,1) = _buf(i,7); - _sp(i+_first,2) = _buf(i,8); - _sp(i+_first,3) = _buf(i,9); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - if (first+n >= nmax) { - grow(first+n+100); - } - if(space==Host) { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_sp,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_sp,first); - Kokkos::parallel_for(n,f); - } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d_4 _spw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecSpinKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _spw(atom->k_sp.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _sp(i,0); - _buf(mysend,12) = _sp(i,1); - _buf(mysend,13) = _sp(i,2); - _buf(mysend,14) = _sp(i,3); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _spw(i,0) = _sp(j,0); - _spw(i,1) = _sp(j,1); - _spw(i,2) = _sp(j,2); - _spw(i,3) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 15; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecSpinKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _sp(i,0) = _buf(myrecv,11); - _sp(i,1) = _buf(myrecv,12); - _sp(i,2) = _buf(myrecv,13); - _sp(i,3) = _buf(myrecv,14); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if(space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecSpinKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - /* ---------------------------------------------------------------------- clear extra forces starting at atom N nbytes = # of bytes to clear for a per-atom vector diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 20510ba2844..80a16bc5232 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -115,16 +115,6 @@ void CommKokkos::init() reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommBrick::init(); - - if (!comm_f_only) {// not all Kokkos atom_vec styles have reverse pack/unpack routines yet - reverse_comm_legacy = true; - lmp->kokkos->reverse_comm_legacy = 1; - } - - if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) { // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet - forward_comm_legacy = true; - lmp->kokkos->forward_comm_legacy = 1; - } } /* ---------------------------------------------------------------------- @@ -741,22 +731,14 @@ void CommKokkos::exchange() break; } } - - if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { - if (!atomKK->avecKK->unpack_exchange_indices_flag) { - if (comm->me == 0) { - error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " - "switching to legacy exchange/border communication"); - } - } else if (!flag) { - if (comm->me == 0) { - error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " - "switching to legacy exchange/border communication"); - } + if (!flag) { + if (comm->me == 0) { + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to legacy exchange/border communication"); } - exchange_comm_legacy = true; - lmp->kokkos->exchange_comm_legacy = 1; } + exchange_comm_legacy = true; + lmp->kokkos->exchange_comm_legacy = 1; } } @@ -1056,8 +1038,7 @@ void CommKokkos::borders() { if (!exchange_comm_legacy) { - if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || - (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { + if (atom->nextra_border || mode != Comm::SINGLE || bordergroup) { if (comm->me == 0) { error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp index 3a542681233..9664f750621 100644 --- a/src/KOKKOS/comm_tiled_kokkos.cpp +++ b/src/KOKKOS/comm_tiled_kokkos.cpp @@ -82,11 +82,6 @@ void CommTiledKokkos::init() reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommTiled::init(); - - if (!comm_f_only) { // not all Kokkos atom_vec styles have reverse pack/unpack routines yet - reverse_comm_legacy = true; - lmp->kokkos->reverse_comm_legacy = 1; - } } /* ---------------------------------------------------------------------- From b8d91f095ff7cdf3ec5bf054e95561bb389b6e96 Mon Sep 17 00:00:00 2001 From: Evan Weinberg Date: Tue, 28 Oct 2025 10:54:18 -0700 Subject: [PATCH 114/604] Bugfix in pppm_kokkos.cpp due to improperly setting boxlo_kk[...] for tricyclic/tilted systems --- src/KOKKOS/pppm_kokkos.cpp | 61 +++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 457fd0634b5..dae53c12d0b 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -340,18 +340,20 @@ void PPPMKokkos::setup() delvolinv = delxinv*delyinv*delzinv; - delxinv_kk = static_cast(delxinv); - delyinv_kk = static_cast(delyinv); - delzinv_kk = static_cast(delzinv); - delvolinv_kk = static_cast(delvolinv); - unitkx = (MY_2PI/xprd); unitky = (MY_2PI/yprd); unitkz = (MY_2PI/zprd_slab); + // ensure all relevant _kk values are up to date + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + delvolinv_kk = static_cast(delvolinv); unitkx_kk = static_cast(unitkx); unitky_kk = static_cast(unitky); unitkz_kk = static_cast(unitkz); + g_ewald_kk = static_cast(g_ewald); + g_ewald_inv_kk = static_cast(1.0 / g_ewald); // d_fkx,d_fky,d_fkz for my FFT grid pts @@ -466,10 +468,13 @@ void PPPMKokkos::setup_triclinic() delzinv = nz_pppm; delvolinv = delxinv*delyinv*delzinv/volume; + // ensure all relevant _kk values are up to date delxinv_kk = static_cast(delxinv); delyinv_kk = static_cast(delyinv); delzinv_kk = static_cast(delzinv); delvolinv_kk = static_cast(delvolinv); + g_ewald_kk = static_cast(g_ewald); + g_ewald_inv_kk = static_cast(1.0 / g_ewald); numz_fft = nzhi_fft-nzlo_fft + 1; numy_fft = nyhi_fft-nylo_fft + 1; @@ -600,9 +605,9 @@ void PPPMKokkos::compute(int eflag, int vflag) domain->x2lamda(atomKK->nlocal); } - boxlo_kk[0] = static_cast(domain->boxlo[0]); - boxlo_kk[1] = static_cast(domain->boxlo[1]); - boxlo_kk[2] = static_cast(domain->boxlo[2]); + boxlo_kk[0] = static_cast(boxlo[0]); + boxlo_kk[1] = static_cast(boxlo[1]); + boxlo_kk[2] = static_cast(boxlo[2]); // extend size of per-atom arrays if necessary @@ -684,6 +689,10 @@ void PPPMKokkos::compute(int eflag, int vflag) int nlocal = atomKK->nlocal; int ntotal = nlocal; + // ensure all relevant _kk values are up to date + g_ewald_kk = static_cast(g_ewald); + g_ewald_inv_kk = static_cast(1.0 / g_ewald); + if (eflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); @@ -935,9 +944,9 @@ void PPPMKokkos::set_grid_local() boxlo[0] = domain->boxlo[0]; boxlo[1] = domain->boxlo[1]; boxlo[2] = domain->boxlo[2]; - boxlo_kk[0] = static_cast(domain->boxlo[0]); - boxlo_kk[1] = static_cast(domain->boxlo[1]); - boxlo_kk[2] = static_cast(domain->boxlo[2]); + boxlo_kk[0] = static_cast(boxlo[0]); + boxlo_kk[1] = static_cast(boxlo[1]); + boxlo_kk[2] = static_cast(boxlo[2]); } /* ---------------------------------------------------------------------- @@ -1179,7 +1188,15 @@ template void PPPMKokkos::particle_map() { int nlocal = atomKK->nlocal; + + // ensure all relevant _kk values are up to date shift_kk = static_cast(shift); + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + boxlo_kk[0] = static_cast(boxlo[0]); + boxlo_kk[1] = static_cast(boxlo[1]); + boxlo_kk[2] = static_cast(boxlo[2]); k_flag.view_host()() = 0; k_flag.modify_host(); @@ -1239,6 +1256,16 @@ void PPPMKokkos::make_rho() numx_out = nxhi_out-nxlo_out + 1; const int inum_out = numz_out*numy_out*numx_out; + // ensure all relevant _kk values are up to date + shiftone_kk = static_cast(shiftone); + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + delvolinv_kk = static_cast(delvolinv); + boxlo_kk[0] = static_cast(boxlo[0]); + boxlo_kk[1] = static_cast(boxlo[1]); + boxlo_kk[2] = static_cast(boxlo[2]); + copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_out),*this); copymode = 0; @@ -2076,6 +2103,9 @@ void PPPMKokkos::fieldforce_ik() int nlocal = atomKK->nlocal; + // ensure all relevant _kk values are up to date + qscale_kk = static_cast(qscale); + copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; @@ -2136,6 +2166,15 @@ void PPPMKokkos::fieldforce_peratom() int nlocal = atomKK->nlocal; + // ensure all relevant _kk values are up to date + shiftone_kk = static_cast(shiftone); + delxinv_kk = static_cast(delxinv); + delyinv_kk = static_cast(delyinv); + delzinv_kk = static_cast(delzinv); + boxlo_kk[0] = static_cast(boxlo[0]); + boxlo_kk[1] = static_cast(boxlo[1]); + boxlo_kk[2] = static_cast(boxlo[2]); + copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; From c8c562a735386bab1444920e7c5647e52970fd08 Mon Sep 17 00:00:00 2001 From: Michael Lostica Date: Tue, 28 Oct 2025 18:07:15 +0000 Subject: [PATCH 115/604] Address PR review comments - Remove duplicate comment in test_nbody_mpi.cpp - Fix arithmetic comment formatting in test_nbody_bigint.cpp - Simplify test_nbody_mpi.cpp by using simple main() pattern instead of test_mpi_main.h --- unittest/utils/test_nbody_bigint.cpp | 2 +- unittest/utils/test_nbody_mpi.cpp | 34 ++++++---------------------- 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/unittest/utils/test_nbody_bigint.cpp b/unittest/utils/test_nbody_bigint.cpp index dcb69ec199d..fe1c1a68c1c 100644 --- a/unittest/utils/test_nbody_bigint.cpp +++ b/unittest/utils/test_nbody_bigint.cpp @@ -48,7 +48,7 @@ TEST(NbodyBigint, ArithmeticOperations) // Test the actual expression from the code double result = 6.0 * nbody - nlinear; - // Expected: 6 * 2147484647 - 5 = 12,884,907,877 + // Expected: 6 * 2,147,484,647 - 5 = 12,884,907,877 double expected = 6.0 * (static_cast(INT_MAX) + 1000) - 5.0; EXPECT_DOUBLE_EQ(result, expected); EXPECT_GT(result, 6.0 * INT_MAX); // Verify it exceeds int range diff --git a/unittest/utils/test_nbody_mpi.cpp b/unittest/utils/test_nbody_mpi.cpp index 74a0555ae40..d6f35c34799 100644 --- a/unittest/utils/test_nbody_mpi.cpp +++ b/unittest/utils/test_nbody_mpi.cpp @@ -1,4 +1,3 @@ -// Unit tests for nbody MPI communication with large values /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories @@ -23,28 +22,6 @@ using namespace LAMMPS_NS; -// MPI environment setup -class MPIEnvironment : public ::testing::Environment { -public: - void SetUp() override { - int flag; - MPI_Initialized(&flag); - if (!flag) { - int argc = 0; - char **argv = nullptr; - MPI_Init(&argc, &argv); - } - } - - void TearDown() override { - int flag; - MPI_Finalized(&flag); - if (!flag) { - MPI_Finalize(); - } - } -}; - namespace { // Test MPI_Allreduce with MPI_LMP_BIGINT (validates fix_rigid_small.cpp:447) @@ -192,8 +169,11 @@ TEST(NbodyMPI, AllreduceAsymmetric) } // namespace -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); ::testing::InitGoogleTest(&argc, argv); - ::testing::AddGlobalTestEnvironment(new MPIEnvironment); - return RUN_ALL_TESTS(); -} + int rv = RUN_ALL_TESTS(); + MPI_Finalize(); + return rv; +} \ No newline at end of file From 6fc0d22241e625f6b3792e1490930d2971c51f9b Mon Sep 17 00:00:00 2001 From: Michael Lostica Date: Tue, 28 Oct 2025 18:33:01 +0000 Subject: [PATCH 116/604] Fix whitespace: add newline at end of test_nbody_mpi.cpp --- unittest/utils/test_nbody_mpi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/utils/test_nbody_mpi.cpp b/unittest/utils/test_nbody_mpi.cpp index d6f35c34799..171e4f78e2e 100644 --- a/unittest/utils/test_nbody_mpi.cpp +++ b/unittest/utils/test_nbody_mpi.cpp @@ -176,4 +176,4 @@ int main(int argc, char **argv) int rv = RUN_ALL_TESTS(); MPI_Finalize(); return rv; -} \ No newline at end of file +} From de1495c73cf97243f2e33817d1026ae21a94b11e Mon Sep 17 00:00:00 2001 From: Michael Lostica Date: Tue, 28 Oct 2025 18:40:06 +0000 Subject: [PATCH 117/604] Use GTest::GMockMain for test_nbody_mpi consistency - Matches pattern used by test_fft3d and other MPI tests - Custom main() function overrides the one from GMockMain - Addresses Copilot PR review comment about linking consistency --- unittest/utils/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt index b9ad41365f6..5d86bc592b1 100644 --- a/unittest/utils/CMakeLists.txt +++ b/unittest/utils/CMakeLists.txt @@ -20,7 +20,7 @@ add_test(NAME NbodyBigint COMMAND test_nbody_bigint) # Test nbody MPI communication with large values (requires MPI) if(BUILD_MPI) add_executable(test_nbody_mpi test_nbody_mpi.cpp) - target_link_libraries(test_nbody_mpi PRIVATE lammps GTest::GTest MPI::MPI_CXX) + target_link_libraries(test_nbody_mpi PRIVATE lammps GTest::GMockMain MPI::MPI_CXX) add_mpi_test(NAME NbodyMPI NUM_PROCS 4 COMMAND $) set_tests_properties(NbodyMPI PROPERTIES LABELS "mpi") endif() From f3ebfb96bee4df884ac20ef322e5aa10bb002958 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 28 Oct 2025 12:50:58 -0600 Subject: [PATCH 118/604] Fix compile issues --- src/KOKKOS/atom_vec_kokkos.cpp | 2 +- src/KOKKOS/atom_vec_kokkos.h | 18 +++++++++------ src/KOKKOS/atom_vec_spin_kokkos.h | 37 ------------------------------- src/angle.h | 2 +- src/bond.h | 2 +- src/compute.h | 2 +- src/dihedral.h | 2 +- src/fix.h | 2 +- src/improper.h | 2 +- src/kspace.h | 2 +- src/pair.h | 2 +- 11 files changed, 20 insertions(+), 53 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index a22cf55f2d2..dc71d58faab 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -883,7 +883,7 @@ struct AtomVecKokkos_UnpackCommVel { AtomVecKokkos_UnpackCommVel( const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int &first, const int &datamask): + const int &first, const uint64_t &datamask): _x(atomKK->k_x.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 3e6d21d3169..659cf4b6935 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -159,6 +159,10 @@ class AtomVecKokkos : virtual public AtomVec { HAT::t_int_2d h_dihedral_type; HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, h_dihedral_atom3,h_dihedral_atom4; + HAT::t_int_1d h_num_improper; + HAT::t_int_2d h_improper_type; + HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, + h_improper_atom3,h_improper_atom4; DAT::t_kkfloat_1d_4 d_mu; HAT::t_kkfloat_1d_4 h_mu; @@ -185,13 +189,13 @@ class AtomVecKokkos : virtual public AtomVec { DAT::tdual_int_1d k_count; - int datamask_grow; - int datamask_comm; - int datamask_comm_vel; - int datamask_reverse; - int datamask_border; - int datamask_border_vel; - int datamask_exchange; + uint64_t datamask_grow; + uint64_t datamask_comm; + uint64_t datamask_comm_vel; + uint64_t datamask_reverse; + uint64_t datamask_border; + uint64_t datamask_border_vel; + uint64_t datamask_exchange; void setup_fields() override; int field2mask(std::string); diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 4153b22c534..308a4d8ab37 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -36,46 +36,9 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { void grow_pointers() override; void force_clear(int, size_t) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - void sync(ExecutionSpace space, uint64_t mask) override; void modified(ExecutionSpace space, uint64_t mask) override; void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; - - protected: - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d_4 d_sp; - DAT::t_kkacc_1d_3 d_fm; - DAT::t_kkacc_1d_3 d_fm_long; - - HAT::t_kkfloat_1d_4 h_sp; - HAT::t_kkacc_1d_3 h_fm; - HAT::t_kkacc_1d_3 h_fm_long; }; } // namespace LAMMPS_NS diff --git a/src/angle.h b/src/angle.h index 58ca188df57..83ba6a236bc 100644 --- a/src/angle.h +++ b/src/angle.h @@ -43,7 +43,7 @@ class Angle : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Angle(class LAMMPS *); diff --git a/src/bond.h b/src/bond.h index 3b1740b2d42..c844dc9acb0 100644 --- a/src/bond.h +++ b/src/bond.h @@ -48,7 +48,7 @@ class Bond : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Bond(class LAMMPS *); diff --git a/src/compute.h b/src/compute.h index aee4a676ecd..8b0d50946cf 100644 --- a/src/compute.h +++ b/src/compute.h @@ -106,7 +106,7 @@ class Compute : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; diff --git a/src/dihedral.h b/src/dihedral.h index 7ca3379e44c..72895f0b11f 100644 --- a/src/dihedral.h +++ b/src/dihedral.h @@ -43,7 +43,7 @@ class Dihedral : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Dihedral(class LAMMPS *); diff --git a/src/fix.h b/src/fix.h index ebf52241711..950cb378704 100644 --- a/src/fix.h +++ b/src/fix.h @@ -135,7 +135,7 @@ class Fix : protected Pointers { int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate int sort_device; // 1 if sort on Device ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; Fix(class LAMMPS *, int, char **); ~Fix() override; diff --git a/src/improper.h b/src/improper.h index 0f539bdecee..da0ae1276f8 100644 --- a/src/improper.h +++ b/src/improper.h @@ -48,7 +48,7 @@ class Improper : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Improper(class LAMMPS *); diff --git a/src/kspace.h b/src/kspace.h index 0a3db05ee7a..f083b99a0fd 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -125,7 +125,7 @@ class KSpace : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode; int compute_flag; // 0 if skip compute() diff --git a/src/pair.h b/src/pair.h index 0a5d2bc3b38..8d50a1760d1 100644 --- a/src/pair.h +++ b/src/pair.h @@ -124,7 +124,7 @@ class Pair : protected Pointers { // KOKKOS flags and variables ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int kokkosable; // 1 if Kokkos pair int reverse_comm_device; // 1 if reverse comm on Device int fuse_force_clear_flag; // 1 if can fuse force clear with force compute From f8e66f2de5642dc861cd9e27a6f226c49bcf3b96 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 28 Oct 2025 13:08:44 -0600 Subject: [PATCH 119/604] Use correct type --- src/KOKKOS/atom_vec_kokkos.cpp | 2 +- src/KOKKOS/atom_vec_kokkos.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index dc71d58faab..9c223f288da 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -2512,7 +2512,7 @@ int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nr /* ---------------------------------------------------------------------- */ -int AtomVecKokkos::field2mask(std::string field) +uint64_t AtomVecKokkos::field2mask(std::string field) { if (field == "id") return TAG_MASK; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 659cf4b6935..8f779d6918e 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -198,7 +198,7 @@ class AtomVecKokkos : virtual public AtomVec { uint64_t datamask_exchange; void setup_fields() override; - int field2mask(std::string); + uint64_t field2mask(std::string); int field2size(std::string); void set_atom_masks(); void set_size_exchange(); From 92ecac12eacf9f82a823e2b1685a7c344e17f99e Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 28 Oct 2025 15:00:25 -0500 Subject: [PATCH 120/604] second attempt to fix the issue --- lib/gpu/lal_born_coul_long_cs.cu | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index 1a39fe7e65d..6e70268878d 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -44,16 +44,14 @@ _texture( q_tex,int2); #define EPSILON (acctyp)(1.0e-20) #define EPS_EWALD (acctyp)(1.0e-6) #define EPS_EWALD_SQR (acctyp)(1.0e-12) -#elif defined _SINGLE_DOUBLE -#define EPSILON (acctyp)(1.0e-8) -#define EPS_EWALD (acctyp)(1.0e-5) -#define EPS_EWALD_SQR (acctyp)(1.0e-8) #else #define EPSILON (numtyp)(1.0e-7) -#define EPS_EWALD (numtyp)(1.0e-4) +#define EPS_EWALD (numtyp)(1.0e-6) #define EPS_EWALD_SQR (numtyp)(1.0e-7) #endif +#define ucl_recip2(x) ((numtyp)1.0/(x)) + __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff1, const __global numtyp4 *restrict coeff2, @@ -127,7 +125,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp; rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond; - numtyp r2inv = ucl_recip(rsq); + numtyp r2inv = ucl_recip2(rsq); if (rsq < cut_coulsq) { numtyp r = ucl_sqrt(rsq); @@ -139,21 +137,21 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip(r+EPS_EWALD); + prefactor *= ucl_recip2(r+EPS_EWALD); forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent - r2inv = ucl_recip(rsq + EPS_EWALD_SQR); + r2inv = ucl_recip2(rsq + EPS_EWALD_SQR); } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip(r); + prefactor *= ucl_recip2(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } } else forcecoul = (numtyp)0.0; @@ -272,7 +270,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp; rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond; - numtyp r2inv = ucl_recip(rsq); + numtyp r2inv = ucl_recip2(rsq); if (rsq < cut_coulsq) { numtyp r = ucl_sqrt(rsq); @@ -285,23 +283,23 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip(r+EPS_EWALD); + prefactor *= ucl_recip2(r+EPS_EWALD); forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent - r2inv = ucl_recip(rsq + EPS_EWALD_SQR); + r2inv = ucl_recip2(rsq + EPS_EWALD_SQR); } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip(r); + prefactor *= ucl_recip2(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } From 24247aa2d81e8a39dc73d5570d597be99e0c9b12 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 29 Oct 2025 00:21:00 -0400 Subject: [PATCH 121/604] remove dead code in fix bond/react --- src/REACTION/fix_bond_react.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index a546f138b21..7dfad5e4cc4 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -1445,7 +1445,6 @@ void FixBondReact::make_a_guess(Superimpose &super, Reaction &rxn) { Superimpose::StatePoint &sp = super.sp; int &avail_guesses = super.avail_guesses; - std::vector &guess_branch = super.guess_branch; int *type = atom->type; int nfirst_neighs = rxn.reactant->nspecial[sp.pion][0]; @@ -1550,8 +1549,6 @@ void FixBondReact::make_a_guess(Superimpose &super, Reaction &rxn) void FixBondReact::neighbor_loop(Superimpose &super, Reaction &rxn) { Superimpose::StatePoint &sp = super.sp; - int &avail_guesses = super.avail_guesses; - std::vector &guess_branch = super.guess_branch; int nfirst_neighs = rxn.reactant->nspecial[sp.pion][0]; @@ -1576,8 +1573,6 @@ void FixBondReact::neighbor_loop(Superimpose &super, Reaction &rxn) void FixBondReact::check_a_neighbor(Superimpose &super, Reaction &rxn) { Superimpose::StatePoint &sp = super.sp; - int &avail_guesses = super.avail_guesses; - std::vector &guess_branch = super.guess_branch; int *type = atom->type; int nfirst_neighs = rxn.reactant->nspecial[sp.pion][0]; @@ -1687,7 +1682,6 @@ void FixBondReact::crosscheck_the_neighbor(Superimpose &super, Reaction &rxn) { Superimpose::StatePoint &sp = super.sp; int &avail_guesses = super.avail_guesses; - std::vector &guess_branch = super.guess_branch; int nfirst_neighs = rxn.reactant->nspecial[sp.pion][0]; From d565a77be582eff086d4ba84cb07cba853f3487f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 29 Oct 2025 00:21:38 -0400 Subject: [PATCH 122/604] should use utils::inumeric() for integer parameters --- src/REACTION/fix_bond_react.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 7dfad5e4cc4..1b61b0dbe77 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -348,7 +348,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : "used without stabilization keyword"); if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: " "'stabilize_steps' has too few arguments"); - rxn.limit_duration = utils::numeric(FLERR,arg[iarg+1],false,lmp); + rxn.limit_duration = utils::inumeric(FLERR,arg[iarg+1],false,lmp); rxn.stabilize_steps_flag = 1; iarg += 2; } else if (strcmp(arg[iarg],"custom_charges") == 0) { From b33627267c5a3e3854f13464e0d94946cb634a86 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Wed, 29 Oct 2025 16:17:32 -0400 Subject: [PATCH 123/604] Update fix_reaxff_species.rst --- doc/src/fix_reaxff_species.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/fix_reaxff_species.rst b/doc/src/fix_reaxff_species.rst index 90996b7106a..de35bcc066d 100644 --- a/doc/src/fix_reaxff_species.rst +++ b/doc/src/fix_reaxff_species.rst @@ -154,13 +154,13 @@ rate of molecule deletion. The number of deletion occurrences is limited to Nlimit within an interval of Nsteps timesteps. Nlimit can be specified with an equal-style :doc:`variable `. When using the *delete_rate_limit* keyword, no deletions are permitted to occur within the -first Nsteps timesteps of the first run (after reading a either a data or +first Nsteps timesteps of the first run (after reading either a data or restart file). The *delete* keyword can output information about the deleted molecules in either legacy format or JSON format. The latter is activated when the *filedel* argument has a '.json' extension. The legacy format lists how -many of each species is deleted, while the JSON format provides the atom ID, +many of each species are deleted, while the JSON format provides the atom ID, atom type, and coordinates of deleted atoms within each molecule. The format for legacy output changes depending on the keyword used. When using the *specieslist* keyword and legacy format, the *filedel* file has the From 9c68f0bce8b95e84b7dfc02d5b3eace53135a209 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Thu, 30 Oct 2025 13:04:49 -0400 Subject: [PATCH 124/604] Update dump-molecules-schema.json --- tools/json/dump-molecules-schema.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/json/dump-molecules-schema.json b/tools/json/dump-molecules-schema.json index 5fc24895032..a08ec0840b1 100644 --- a/tools/json/dump-molecules-schema.json +++ b/tools/json/dump-molecules-schema.json @@ -18,8 +18,7 @@ "const": "dump" }, "style": { - "type": "string", - "const": "molecules" + "enum": ["molecules"] }, "revision": { "type": "integer", From 4e868b3a9b8fb50efe244142c38707b339ac60f9 Mon Sep 17 00:00:00 2001 From: Kehan Cai Date: Thu, 30 Oct 2025 10:21:31 -0700 Subject: [PATCH 125/604] Add output log files --- .../pimd/langevin_reduced_units_group/log | 2 + .../pimd/langevin_reduced_units_group/log.0 | 104 ++++++++++++++++++ .../pimd/langevin_reduced_units_group/log.1 | 96 ++++++++++++++++ .../pimd/langevin_reduced_units_group/log.2 | 96 ++++++++++++++++ .../pimd/langevin_reduced_units_group/log.3 | 96 ++++++++++++++++ 5 files changed, 394 insertions(+) create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/log create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log b/examples/PACKAGES/pimd/langevin_reduced_units_group/log new file mode 100644 index 00000000000..6bbb1e127f0 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log @@ -0,0 +1,2 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 new file mode 100644 index 00000000000..ee04f26d22a --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 @@ -0,0 +1,104 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +Processor partition = 0 + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 4 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj01 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.002 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 +mass 2 1.0 + +group real_atom type 1 +190 atoms in group real_atom +group virtual_atom type 2 +10 atoms in group virtual_atom + +timestep 0.00044905847 + +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 1 + +thermo_style custom step temp f_1[*] vol press +thermo 10 +thermo_modify norm no + +dump dcd all custom 1 traj${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump dcd all custom 1 traj1.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump_modify dcd sort id #format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 100 +Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule +Initializing PI Langevin equation thermostat... + Bead ID | omega | tau | c1 | c2 + 0 0.00000000e+00 1.00000000e+00 9.99775496e-01 2.11886210e-02 + 1 1.86362182e+02 2.68294777e-03 9.19718608e-01 3.92578249e-01 + 2 2.63555925e+02 1.89713056e-03 8.88383268e-01 4.59102569e-01 + 3 1.86362182e+02 2.68294777e-03 9.19718608e-01 3.92578249e-01 + PILE_L thermostat successfully initialized! + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 0 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 0.038411443 11.465816 0 -875.32113 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 -1.3493346 + 20 0.081832749 24.427076 0 -874.86623 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 -1.3008247 + 30 0.13467173 40.199511 0 -876.21109 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 -1.3087995 + 40 0.18759421 55.996873 0 -876.90255 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 -1.2986964 + 50 0.24981124 74.568656 0 -876.85585 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 -1.2560492 + 60 0.30820757 91.999959 0 -878.23845 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 -1.2551723 + 70 0.37793499 112.81359 0 -881.46154 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 -1.2724164 + 80 0.43487408 129.80991 0 -884.65404 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 -1.2869644 + 90 0.50355318 150.31062 0 -887.38385 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 -1.2951988 + 100 0.57003618 170.1558 0 -891.05656 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 -1.3254094 +Loop time of 0.134782 on 1 procs for 100 steps with 200 atoms + +Performance: 28786.267 tau/day, 741.940 timesteps/s, 148.388 katom-step/s +95.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.016075 | 0.016075 | 0.016075 | 0.0 | 11.93 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00062061 | 0.00062061 | 0.00062061 | 0.0 | 0.46 +Output | 0.09909 | 0.09909 | 0.09909 | 0.0 | 73.52 +Modify | 0.018774 | 0.018774 | 0.018774 | 0.0 | 13.93 +Other | | 0.0002219 | | | 0.16 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1142 ave 1142 max 1142 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7503 ave 7503 max 7503 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7503 +Ave neighs/atom = 37.515 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 new file mode 100644 index 00000000000..c80a127b1ce --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 @@ -0,0 +1,96 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +Processor partition = 1 + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 4 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj01 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.003 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 +mass 2 1.0 + +group real_atom type 1 +190 atoms in group real_atom +group virtual_atom type 2 +10 atoms in group virtual_atom + +timestep 0.00044905847 + +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 2 + +thermo_style custom step temp f_1[*] vol press +thermo 10 +thermo_modify norm no + +dump dcd all custom 1 traj${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump dcd all custom 1 traj2.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump_modify dcd sort id #format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 100 +Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 4.6054235e-24 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 3.6888879 1101.1331 285.67287 -874.75939 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.78530277 + 20 3.4940756 1042.9816 790.68515 -876.17255 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.64433263 + 30 3.352259 1000.6493 988.97192 -876.91853 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.55573019 + 40 3.7408936 1116.6567 1007.0387 -877.86197 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.77082307 + 50 3.7919069 1131.8842 1072.1488 -879.36819 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.77700475 + 60 3.7086239 1107.0242 1050.5562 -881.73393 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.6957508 + 70 3.8176119 1139.5572 1054.2695 -882.29721 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.74600469 + 80 3.3021658 985.69649 1087.6174 -884.90368 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.38965467 + 90 3.8785892 1157.7589 1017.7424 -887.88222 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.67569569 + 100 3.7316631 1113.9014 996.70261 -890.75765 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.51681218 +Loop time of 0.134755 on 1 procs for 100 steps with 200 atoms + +Performance: 28791.973 tau/day, 742.087 timesteps/s, 148.417 katom-step/s +95.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.015986 | 0.015986 | 0.015986 | 0.0 | 11.86 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00061376 | 0.00061376 | 0.00061376 | 0.0 | 0.46 +Output | 0.095962 | 0.095962 | 0.095962 | 0.0 | 71.21 +Modify | 0.021985 | 0.021985 | 0.021985 | 0.0 | 16.31 +Other | | 0.0002084 | | | 0.15 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1142 ave 1142 max 1142 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7503 ave 7503 max 7503 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7503 +Ave neighs/atom = 37.515 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 new file mode 100644 index 00000000000..f3937759c1b --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 @@ -0,0 +1,96 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +Processor partition = 2 + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 4 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj01 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.003 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 +mass 2 1.0 + +group real_atom type 1 +190 atoms in group real_atom +group virtual_atom type 2 +10 atoms in group virtual_atom + +timestep 0.00044905847 + +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 3 + +thermo_style custom step temp f_1[*] vol press +thermo 10 +thermo_modify norm no + +dump dcd all custom 1 traj${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump dcd all custom 1 traj3.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump_modify dcd sort id #format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 100 +Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 0 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 3.2100461 958.19875 487.33842 -874.71922 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.49377608 + 20 3.7003852 1104.565 863.96941 -872.98129 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.79081263 + 30 3.8240127 1141.4678 1032.9144 -873.28849 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.86681413 + 40 3.5116515 1048.228 1123.5034 -876.6311 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.65012938 + 50 3.6895722 1101.3373 1143.8268 -880.66063 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.68747857 + 60 4.0395555 1205.8073 1105.4001 -883.62527 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.84147104 + 70 4.0847536 1219.2989 1195.2014 -886.86715 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.80491283 + 80 3.6472645 1088.7085 1187.7184 -888.21493 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.51747741 + 90 3.6311826 1083.908 1173.2906 -889.91457 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.47096547 + 100 4.0525189 1209.6769 997.93435 -892.34535 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.6877229 +Loop time of 0.134911 on 1 procs for 100 steps with 200 atoms + +Performance: 28758.748 tau/day, 741.231 timesteps/s, 148.246 katom-step/s +92.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.015957 | 0.015957 | 0.015957 | 0.0 | 11.83 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00061673 | 0.00061673 | 0.00061673 | 0.0 | 0.46 +Output | 0.095947 | 0.095947 | 0.095947 | 0.0 | 71.12 +Modify | 0.022177 | 0.022177 | 0.022177 | 0.0 | 16.44 +Other | | 0.0002129 | | | 0.16 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1142 ave 1142 max 1142 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7503 ave 7503 max 7503 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7503 +Ave neighs/atom = 37.515 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 new file mode 100644 index 00000000000..f7c1d899f0c --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 @@ -0,0 +1,96 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +Processor partition = 3 + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 4 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj01 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.003 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 +mass 2 1.0 + +group real_atom type 1 +190 atoms in group real_atom +group virtual_atom type 2 +10 atoms in group virtual_atom + +timestep 0.00044905847 + +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 4 + +thermo_style custom step temp f_1[*] vol press +thermo 10 +thermo_modify norm no + +dump dcd all custom 1 traj${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump dcd all custom 1 traj4.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +dump_modify dcd sort id #format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 100 +Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 2.2103965e-23 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 3.2431068 968.06738 296.17847 -876.03789 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.51346639 + 20 3.2818989 979.64681 818.11437 -875.95192 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.54670855 + 30 3.6057557 1076.3181 977.15059 -876.80428 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.72247086 + 40 3.7618847 1122.9226 1118.9264 -877.96302 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.77694116 + 50 3.8979994 1163.5528 1156.2033 -880.32726 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.80424078 + 60 4.0856511 1219.5669 1111.0176 -882.56588 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.87289367 + 70 4.0217913 1200.5047 1198.0635 -884.65317 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.80584971 + 80 4.1454238 1237.409 1113.1635 -887.63937 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.83007064 + 90 3.8894042 1160.9871 1170.5358 -889.93317 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.63498319 + 100 3.4570929 1031.9422 1184.8761 -891.18508 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.36734878 +Loop time of 0.134747 on 1 procs for 100 steps with 200 atoms + +Performance: 28793.701 tau/day, 742.131 timesteps/s, 148.426 katom-step/s +95.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.016175 | 0.016175 | 0.016175 | 0.0 | 12.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.0006406 | 0.0006406 | 0.0006406 | 0.0 | 0.48 +Output | 0.097484 | 0.097484 | 0.097484 | 0.0 | 72.35 +Modify | 0.020224 | 0.020224 | 0.020224 | 0.0 | 15.01 +Other | | 0.0002238 | | | 0.17 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1142 ave 1142 max 1142 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7503 ave 7503 max 7503 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7503 +Ave neighs/atom = 37.515 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 From 8ad507d9c28eb69708406275aba8b5280c9af607 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 14:17:45 -0400 Subject: [PATCH 126/604] update doc to clarify the usage of sp --- doc/src/fix_pimd.rst | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 912fe093404..19d0cebada5 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -46,6 +46,7 @@ Syntax *ensemble* value = *nvt* or *nve* or *nph* or *npt* *fmmode* value = *physical* or *normal* *fmass* value = scaling factor on mass + *sp* value = scaling factor on Planck constant *temp* value = temperature (temperature unit) temperature = target temperature of the thermostat *thermostat* values = style seed @@ -247,9 +248,19 @@ a value other than *physical*, due to the lack of support for bosonic normal mod (:math:`\sum_{i=1}^P \frac{1}{2}m\omega_P^2(q_i - q_{i+1})^2`, :math:`m` is always the actual mass of the particles). -The keyword *sp* is a scaling factor on Planck's constant, which can -be useful for debugging or other purposes. The default value of 1.0 -is appropriate for most situations. +The keyword *sp* is a scaling factor on Planck's constant. Scaling the Planck's constant means modifying the "quantumness" of the PIMD simulation. Using the physical value of Planck's constant corresponds to a fully quantum simulation, and 0 corresponds to the classical limit. +For unit styles other than *lj*, the default value of 1.0 is appropriate for most situations. +For *lj* units, a fully quantum simulation translates into setting *sp* to the de Boer quantumness parameter :math:`\Lambda^{\ast}`(see :ref:`de Boer `): +.. math:: + + \Lambda^{\ast}=h/\sigma\sqrt{m\varepsilon} + +where :math:`h` is Planck's constant, :math:`\sigma` is the length scale, :math:`\epsilon` is the energy scale, and :math:`m` is the mass of the particles. +For example, for Neon, :math:`m = 20.1797` Dalton, :math:`\varepsilon = 3.0747 \times 10^{-3}` eV and :math:`\sigma = 2.7616` Å. Then we have +.. math:: + \Lambda^{\ast} = 4.135667403e-3 eV * ps / (2.7616 Å * sqrt(20.1797 Dalton * 3.0747e-3 eV * 1.0364269e-4 eV / Dalton / Å^2 * ps^2)) = 0.600. +Thus for a fully quantum simulation of Neon using *lj* units, *sp* should be set to 0.600. +The modification of the quantumness should be done by scaling :math:`\Lambda^{\ast}`. The keyword *ensemble* for fix style *pimd/langevin* determines which ensemble is it going to sample. The value can be *nve* (microcanonical), *nvt* (canonical), *nph* (isoenthalpic), @@ -535,6 +546,10 @@ Path Integrals, McGraw-Hill, New York (1965). **(Cao2)** J. Cao and G. Voth, J Chem Phys, 100, 5093 (1994). +.. _de Boer: + +**(de Boer)** J. de Boer, "Quantum Effects and Exchange Effects on the Thermodynamic Properties of Liquid Helium," Progress in Low Temperature Physics, Volume 2, Pages 1-58 (1957). + .. _Hone: **(Hone)** T. Hone, P. Rossky, G. Voth, J Chem Phys, 124, From a7d508b23624338c575f5657fc2381969ebcfbd7 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 14:48:48 -0400 Subject: [PATCH 127/604] correct reference --- doc/src/fix_pimd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 19d0cebada5..3f71ba4b6fa 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -276,7 +276,7 @@ a positive floating-point number. for nve pimd, since the spring elastic frequency between the beads will be affected by the temperature. The keyword *thermostat* reads *style* and *seed* of thermostat for fix style *pimd/langevin*. -*style* can only be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti `), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. +*style* can only be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti <_Ceriotti3>`), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. .. note:: From c55680ad3c5943676114b0819be8d46f4a289cd4 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 14:49:43 -0400 Subject: [PATCH 128/604] discard calculating Planck's constant for the lj unit style --- src/REPLICA/fix_pimd_langevin.cpp | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index 39084765727..ee4f67e51e7 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -89,11 +89,6 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : integrator = OBABO; thermostat = PILE_L; barostat = BZP; - lj_epsilon = 1; - lj_sigma = 1; - lj_mass = 1; - other_planck = 1; - other_mvv2e = 1; fmass = 1.0; np = universe->nworlds; inverse_np = 1.0 / np; @@ -195,13 +190,6 @@ FixPIMDLangevin::FixPIMDLangevin(LAMMPS *lmp, int narg, char **arg) : temp = utils::numeric(FLERR, arg[i + 1], false, lmp); if (temp < 0.0) error->universe_all(FLERR, fmt::format("Invalid temp value for fix {}", style)); - } else if (strcmp(arg[i], "lj") == 0) { - lj_epsilon = utils::numeric(FLERR, arg[i + 1], false, lmp); - lj_sigma = utils::numeric(FLERR, arg[i + 2], false, lmp); - lj_mass = utils::numeric(FLERR, arg[i + 3], false, lmp); - other_planck = utils::numeric(FLERR, arg[i + 4], false, lmp); - other_mvv2e = utils::numeric(FLERR, arg[i + 5], false, lmp); - i += 4; } else if (strcmp(arg[i], "thermostat") == 0) { if (strcmp(arg[i + 1], "PILE_L") == 0) { thermostat = PILE_L; @@ -442,14 +430,7 @@ void FixPIMDLangevin::init() masstotal = group->mass(igroup); - double planck; - if (strcmp(update->unit_style, "lj") == 0) { - double planck_star = sqrt(lj_epsilon) * sqrt(lj_mass) * lj_sigma * sqrt(other_mvv2e); - planck = other_planck / planck_star; - } else { - planck = force->hplanck; - } - planck *= sp; + double planck = sp * force->hplanck; hbar = planck / (MY_2PI); beta = 1.0 / (force->boltz * temp); double _fbond = 1.0 * np * np / (beta * beta * hbar * hbar); From f6e0921f3fb68deab4df7dde37b2f26c1ca869cb Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 14:49:58 -0400 Subject: [PATCH 129/604] clean commented line --- src/REPLICA/fix_pimd_langevin.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp index ee4f67e51e7..3ee6f8d7ea3 100644 --- a/src/REPLICA/fix_pimd_langevin.cpp +++ b/src/REPLICA/fix_pimd_langevin.cpp @@ -1214,11 +1214,8 @@ void FixPIMDLangevin::spring_force() int nlocal = atom->nlocal; tagint *tagtmp = atom->tag; - // printf("iworld = %d, x_last = %d, x_next = %d\n", universe->iworld, x_last, x_next); int *mask = atom->mask; - // int idx_tmp = atom->map(1); - for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { double delx1 = bufsortedall[x_last * nlocal + tagtmp[i] - 1][0] - x[i][0]; From 0b912432e91c24d7d9ee3181d48ff2fe6205c2ac Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 16:38:31 -0400 Subject: [PATCH 130/604] clean trailing whitespace --- doc/src/fix_pimd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 3f71ba4b6fa..9557022d7a2 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -260,7 +260,7 @@ For example, for Neon, :math:`m = 20.1797` Dalton, :math:`\varepsilon = 3.0747 \ .. math:: \Lambda^{\ast} = 4.135667403e-3 eV * ps / (2.7616 Å * sqrt(20.1797 Dalton * 3.0747e-3 eV * 1.0364269e-4 eV / Dalton / Å^2 * ps^2)) = 0.600. Thus for a fully quantum simulation of Neon using *lj* units, *sp* should be set to 0.600. -The modification of the quantumness should be done by scaling :math:`\Lambda^{\ast}`. +The modification of the quantumness should be done by scaling :math:`\Lambda^{\ast}`. The keyword *ensemble* for fix style *pimd/langevin* determines which ensemble is it going to sample. The value can be *nve* (microcanonical), *nvt* (canonical), *nph* (isoenthalpic), From 17df3301913670574938b8dd1cb06b498bdf88d8 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Thu, 30 Oct 2025 17:17:54 -0400 Subject: [PATCH 131/604] update example --- .../pimd/langevin_reduced_units/in.lmp | 2 +- .../log.10Sep25.langevin.reduced.g++ | 2 + .../log.10Sep25.langevin.reduced.g++.0 | 97 +++++++++++++++++++ .../log.10Sep25.langevin.reduced.g++.1 | 91 +++++++++++++++++ .../log.14Jun23.langevin.reduced.g++ | 2 - .../log.14Jun23.langevin.reduced.g++.0 | 97 ------------------- .../log.14Jun23.langevin.reduced.g++.1 | 97 ------------------- 7 files changed, 191 insertions(+), 197 deletions(-) create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++ create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.0 create mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.1 delete mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++ delete mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.0 delete mode 100644 examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.1 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/in.lmp b/examples/PACKAGES/pimd/langevin_reduced_units/in.lmp index 80bfbe49565..ea9657f56c2 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units/in.lmp +++ b/examples/PACKAGES/pimd/langevin_reduced_units/in.lmp @@ -14,7 +14,7 @@ mass 1 1.0 timestep 0.00044905847 -fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} thermo_style custom step temp f_1[*] vol press thermo 100 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++ b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++ new file mode 100644 index 00000000000..b6846f7eea4 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++ @@ -0,0 +1,2 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-285-g0b912432e9-modified) +Running on 2 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.0 b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.0 new file mode 100644 index 00000000000..e4f5f8c3063 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.0 @@ -0,0 +1,97 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-285-g0b912432e9-modified) +Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 32 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj${ibead} +read_data data.lj01 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.001 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 + +timestep 0.00044905847 + +fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 01 + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +#dump dcd all custom 1 ${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +#dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Initializing PI Langevin equation thermostat... + Bead ID | omega | tau | c1 | c2 + 0 0.00000000e+00 1.00000000e+00 9.99775496e-01 2.11886210e-02 + 1 1.31787526e+02 3.79398579e-03 9.42536810e-01 3.34102322e-01 + PILE_L thermostat successfully initialized! + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 0 -875.67022 841988.51 -421279.13 253.30998 382.47517 -1646.5336 -1.9663356 -0.82731217 341.38937 -1.3810467 + 100 14.817371 4422.9853 0 -534.92328 5337.0116 98.252811 7794.4231 306.80873 -0.79835015 13.250964 8.3267012 341.38937 12.562252 + 200 13.762897 4108.2246 0 -534.95517 4177.5291 334.44025 -8232.4092 308.68322 0.1241056 12.726742 8.0817572 341.38937 12.177521 + 300 12.567041 3751.2617 0 -493.05357 3999.6771 286.94793 8675.7287 309.21515 -0.061380815 14.107894 8.6065735 341.38937 12.246489 + 400 11.844955 3535.7192 0 -515.1282 3784.41 290.67609 -85.218746 309.0205 -0.046820083 13.638883 8.2475712 341.38937 11.616699 + 500 11.101108 3313.6808 0 -521.46928 3556 274.62448 650.39076 309.45886 -0.1095114 12.806143 7.7267347 341.38937 10.99195 + 600 9.9620798 2973.6808 0 -462.66349 3219.3472 323.6578 3169.1089 309.48769 0.081993582 13.761585 8.0387709 341.38937 11.072758 + 700 9.3387158 2787.6067 0 -501.1952 2901.7138 343.18094 2002.8977 309.02634 0.15824332 12.74807 7.4599969 341.38937 10.131828 + 800 8.9074976 2658.888 0 -523.14785 2864.9389 287.94911 2331.5161 308.67674 -0.057470593 11.606039 6.8145291 341.38937 9.3523989 + 900 8.5049897 2538.7394 0 -543.6833 2597.7781 328.04213 599.92997 308.62574 0.099117065 10.499473 6.2153451 341.38937 8.6401113 + 1000 8.0728921 2409.7583 0 -571.68303 2533.6673 290.94669 -1268.3145 308.58653 -0.045763228 9.4221992 5.5934009 341.38937 7.8382485 +Loop time of 0.188222 on 1 procs for 1000 steps with 200 atoms + +Performance: 206132.768 tau/day, 5312.885 timesteps/s, 1.063 Matom-step/s +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.091306 | 0.091306 | 0.091306 | 0.0 | 48.51 +Neigh | 0.01581 | 0.01581 | 0.01581 | 0.0 | 8.40 +Comm | 0.0038515 | 0.0038515 | 0.0038515 | 0.0 | 2.05 +Output | 9.6502e-05 | 9.6502e-05 | 9.6502e-05 | 0.0 | 0.05 +Modify | 0.076181 | 0.076181 | 0.076181 | 0.0 | 40.47 +Other | | 0.0009761 | | | 0.52 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1117 ave 1117 max 1117 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7491 ave 7491 max 7491 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7491 +Ave neighs/atom = 37.455 +Neighbor list builds = 55 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.1 b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.1 new file mode 100644 index 00000000000..1f9fb3b8788 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_reduced_units/log.10Sep25.langevin.reduced.g++.1 @@ -0,0 +1,91 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-285-g0b912432e9-modified) +Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 32 pad + +units lj +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 2.8015 +read_data data.lj${ibead} +read_data data.lj02 +Reading data file ... + orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + read_data CPU = 0.001 seconds + +pair_coeff * * 1.0 1.0 +pair_modify shift yes + +mass 1 1.0 + +timestep 0.00044905847 + +fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 02 + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +#dump dcd all custom 1 ${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz +#dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 3.1015 + ghost atom cutoff = 3.1015 + binsize = 1.55075, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 843768.92 -904.74343 841988.51 -421279.13 253.30998 382.47517 -1646.5336 -1.9663356 -0.82731217 341.38937 -0.58528882 + 100 3.1989795 954.89538 1014.1504 -520.09611 5337.0116 98.252811 7794.4231 306.80873 -0.79835015 13.250964 8.3267012 341.38937 5.939687 + 200 2.051821 612.46858 541.7755 -549.98447 4177.5291 334.44025 -8232.4092 308.68322 0.1241056 12.726742 8.0817572 341.38937 5.1585163 + 300 2.0005528 597.165 636.76014 -492.45624 3999.6771 286.94793 8675.7287 309.21515 -0.061380815 14.107894 8.6065735 341.38937 6.1072177 + 400 2.1301684 635.85527 629.30382 -501.34009 3784.41 290.67609 -85.218746 309.0205 -0.046820083 13.638883 8.2475712 341.38937 6.0953183 + 500 2.1090566 629.5534 661.40704 -527.17197 3556 274.62448 650.39076 309.45886 -0.1095114 12.806143 7.7267347 341.38937 5.6643757 + 600 2.1073572 629.04613 563.34039 -484.05666 3219.3472 323.6578 3169.1089 309.48769 0.081993582 13.761585 8.0387709 341.38937 6.2065377 + 700 1.9719958 588.64076 524.29412 -497.63256 2901.7138 343.18094 2002.8977 309.02634 0.15824332 12.74807 7.4599969 341.38937 5.9128176 + 800 2.0469937 611.0276 634.75777 -516.58664 2864.9389 287.94911 2331.5161 308.67674 -0.057470593 11.606039 6.8145291 341.38937 5.4463933 + 900 1.9587768 584.69488 554.57173 -536.54463 2597.7781 328.04213 599.92997 308.62574 0.099117065 10.499473 6.2153451 341.38937 4.9090894 + 1000 2.0978183 626.19876 628.76262 -559.36931 2533.6673 290.94669 -1268.3145 308.58653 -0.045763228 9.4221992 5.5934009 341.38937 4.5482659 +Loop time of 0.188222 on 1 procs for 1000 steps with 200 atoms + +Performance: 206132.206 tau/day, 5312.870 timesteps/s, 1.063 Matom-step/s +99.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.092279 | 0.092279 | 0.092279 | 0.0 | 49.03 +Neigh | 0.016436 | 0.016436 | 0.016436 | 0.0 | 8.73 +Comm | 0.0037922 | 0.0037922 | 0.0037922 | 0.0 | 2.01 +Output | 9.8e-05 | 9.8e-05 | 9.8e-05 | 0.0 | 0.05 +Modify | 0.074731 | 0.074731 | 0.074731 | 0.0 | 39.70 +Other | | 0.0008862 | | | 0.47 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1113 ave 1113 max 1113 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 7470 ave 7470 max 7470 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 7470 +Ave neighs/atom = 37.35 +Neighbor list builds = 57 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++ b/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++ deleted file mode 100644 index 4ed421dddb0..00000000000 --- a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++ +++ /dev/null @@ -1,2 +0,0 @@ -LAMMPS (28 Mar 2023) -Running on 2 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.0 b/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.0 deleted file mode 100644 index d219e208455..00000000000 --- a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.0 +++ /dev/null @@ -1,97 +0,0 @@ -LAMMPS (28 Mar 2023) -Processor partition = 0 - using 1 OpenMP thread(s) per MPI task -variable ibead uloop 32 pad - -units lj -atom_style atomic -atom_modify map yes -boundary p p p -pair_style lj/cut 2.8015 -read_data data.lj${ibead} -read_data data.lj01 -Reading data file ... - orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 200 atoms - read_data CPU = 0.001 seconds - -pair_coeff * * 1.0 1.0 -pair_modify shift yes - -mass 1 1.0 - -timestep 0.00044905847 - -fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 01 - -thermo_style custom step temp f_1[*] vol press -thermo 100 -thermo_modify norm no - -#dump dcd all custom 1 ${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz -#dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" - -run 1000 -Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99775496e-01 2.11886210e-02 - 1 1.31777963e+02 3.79426112e-03 9.42540858e-01 3.34090903e-01 -PILE_L thermostat successfully initialized! - -Neighbor list info ... - update: every = 1 steps, delay = 0 steps, check = yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 3.1015 - ghost atom cutoff = 3.1015 - binsize = 1.55075, bins = 5 5 5 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair lj/cut, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes - Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press - 0 0 0 0 -875.67022 841866.06 -843041.14 253.30998 382.47517 -1646.2945 -1.9663356 -0.82731217 341.38937 -1.3810467 - 100 14.815998 4422.5753 0 -534.91485 5336.9805 -409.03828 7790.9787 306.80845 -0.79877176 13.250868 8.3263811 341.38937 12.561531 - 200 13.762526 4108.114 0 -535.07896 4177.1707 63.548813 -8225.489 308.68383 0.1240984 12.723365 8.0800159 341.38937 12.1757 - 300 12.566448 3751.0847 0 -493.13907 3999.3337 -31.433066 8675.7059 309.21599 -0.061382631 14.105486 8.6052841 341.38937 12.245009 - 400 11.843976 3535.4267 0 -515.14836 3784.077 -23.971605 -81.467506 309.02093 -0.046811856 13.638064 8.2470184 341.38937 11.61585 - 500 11.100239 3313.4214 0 -521.49831 3555.6926 -56.076799 648.94757 309.45936 -0.10950702 12.805213 7.7261445 341.38937 10.991018 - 600 9.9616183 2973.5431 0 -462.72888 3219.061 41.992567 3171.6576 309.48724 0.082003271 13.759365 8.0375919 341.38937 11.071546 - 700 9.3388468 2787.6458 0 -501.33865 2901.4483 81.033913 2000.5159 309.02619 0.15824338 12.744135 7.4580508 341.38937 10.129991 - 800 8.9069211 2658.716 0 -523.1858 2864.6773 -29.435005 2329.1521 308.67617 -0.057480808 11.604242 6.8135454 341.38937 9.3513467 - 900 8.5046965 2538.6519 0 -543.75602 2597.5491 50.752591 601.47078 308.62547 0.099109884 10.497389 6.2142574 341.38937 8.6389792 - 1000 8.0725601 2409.6592 0 -571.72872 2533.47 -23.431499 -1267.4683 308.58765 -0.045757135 9.421094 5.5928021 341.38937 7.8375753 -Loop time of 0.201181 on 1 procs for 1000 steps with 200 atoms - -Performance: 192854.150 tau/day, 4970.640 timesteps/s, 994.128 katom-step/s -98.8% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.11949 | 0.11949 | 0.11949 | 0.0 | 59.39 -Neigh | 0.011868 | 0.011868 | 0.011868 | 0.0 | 5.90 -Comm | 0.0041169 | 0.0041169 | 0.0041169 | 0.0 | 2.05 -Output | 0.00011916 | 0.00011916 | 0.00011916 | 0.0 | 0.06 -Modify | 0.064249 | 0.064249 | 0.064249 | 0.0 | 31.94 -Other | | 0.00134 | | | 0.67 - -Nlocal: 200 ave 200 max 200 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 1117 ave 1117 max 1117 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 7492 ave 7492 max 7492 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 7492 -Ave neighs/atom = 37.46 -Neighbor list builds = 55 -Dangerous builds = 0 -Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.1 b/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.1 deleted file mode 100644 index 0804a3488a5..00000000000 --- a/examples/PACKAGES/pimd/langevin_reduced_units/log.14Jun23.langevin.reduced.g++.1 +++ /dev/null @@ -1,97 +0,0 @@ -LAMMPS (28 Mar 2023) -Processor partition = 1 - using 1 OpenMP thread(s) per MPI task -variable ibead uloop 32 pad - -units lj -atom_style atomic -atom_modify map yes -boundary p p p -pair_style lj/cut 2.8015 -read_data data.lj${ibead} -read_data data.lj02 -Reading data file ... - orthogonal box = (-3.4945131 -3.4945131 -3.4945131) to (3.4945131 3.4945131 3.4945131) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 200 atoms - read_data CPU = 0.001 seconds - -pair_coeff * * 1.0 1.0 -pair_modify shift yes - -mass 1 1.0 - -timestep 0.00044905847 - -fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 all pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 02 - -thermo_style custom step temp f_1[*] vol press -thermo 100 -thermo_modify norm no - -#dump dcd all custom 1 ${ibead}.lammpstrj id type x y z vx vy vz ix iy iz fx fy fz -#dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" - -run 1000 -Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99775496e-01 2.11886210e-02 - 1 1.31777963e+02 3.79426112e-03 9.42540858e-01 3.34090903e-01 -PILE_L thermostat successfully initialized! - -Neighbor list info ... - update: every = 1 steps, delay = 0 steps, check = yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 3.1015 - ghost atom cutoff = 3.1015 - binsize = 1.55075, bins = 5 5 5 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair lj/cut, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes - Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press - 0 0 0 843646.47 -904.74343 841866.06 -843041.14 253.30998 382.47517 -1646.2945 -1.9663356 -0.82731217 341.38937 -0.58528882 - 100 3.1994696 955.04167 1014.3663 -520.08788 5336.9805 -409.03828 7790.9787 306.80845 -0.79877176 13.250868 8.3263811 341.38937 5.9400539 - 200 2.0518234 612.46927 541.77919 -550.11284 4177.1707 63.548813 -8225.489 308.68383 0.1240984 12.723365 8.0800159 341.38937 5.1568532 - 300 2.0005565 597.16612 636.76107 -492.53916 3999.3337 -31.433066 8675.7059 309.21599 -0.061382631 14.105486 8.6052841 341.38937 6.1061183 - 400 2.1301774 635.85796 629.2996 -501.35894 3784.077 -23.971605 -81.467506 309.02093 -0.046811856 13.638064 8.2470184 341.38937 6.0950651 - 500 2.1090509 629.55171 661.4048 -527.18699 3555.6926 -56.076799 648.94757 309.45936 -0.10950702 12.805213 7.7261445 341.38937 5.6641225 - 600 2.1073653 629.04855 563.33543 -484.13719 3219.061 41.992567 3171.6576 309.48724 0.082003271 13.759365 8.0375919 341.38937 6.2053973 - 700 1.9719777 588.63535 524.29409 -497.78822 2901.4483 81.033913 2000.5159 309.02619 0.15824338 12.744135 7.4580508 341.38937 5.9107525 - 800 2.0469949 611.02799 634.76301 -516.64387 2864.6773 -29.435005 2329.1521 308.67617 -0.057480808 11.604242 6.8135454 341.38937 5.445481 - 900 1.9587942 584.70008 554.57541 -536.62232 2597.5491 50.752591 601.47078 308.62547 0.099109884 10.497389 6.2142574 341.38937 4.9080572 - 1000 2.0978185 626.19882 628.7595 -559.41879 2533.47 -23.431499 -1267.4683 308.58765 -0.045757135 9.421094 5.5928021 341.38937 4.5477372 -Loop time of 0.20118 on 1 procs for 1000 steps with 200 atoms - -Performance: 192855.450 tau/day, 4970.674 timesteps/s, 994.135 katom-step/s -99.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.12009 | 0.12009 | 0.12009 | 0.0 | 59.69 -Neigh | 0.012807 | 0.012807 | 0.012807 | 0.0 | 6.37 -Comm | 0.0040331 | 0.0040331 | 0.0040331 | 0.0 | 2.00 -Output | 0.00012271 | 0.00012271 | 0.00012271 | 0.0 | 0.06 -Modify | 0.062764 | 0.062764 | 0.062764 | 0.0 | 31.20 -Other | | 0.001361 | | | 0.68 - -Nlocal: 200 ave 200 max 200 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 1117 ave 1117 max 1117 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 7460 ave 7460 max 7460 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 7460 -Ave neighs/atom = 37.3 -Neighbor list builds = 57 -Dangerous builds = 0 -Total wall time: 0:00:00 From 07f8620f4f8057ef1896fd8fad3e3c0aea0d59ec Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 31 Oct 2025 20:59:23 -0600 Subject: [PATCH 132/604] Add templates to forward comm --- src/KOKKOS/atom_vec_kokkos.cpp | 497 ++++++++++++++++++++++----------- 1 file changed, 329 insertions(+), 168 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 9c223f288da..7e99bfb9dd0 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -62,7 +62,7 @@ void AtomVecKokkos::setup_fields() /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackComm { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -122,30 +122,32 @@ struct AtomVecKokkos_PackComm { } } - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - } + if constexpr (!DEFAULT) { + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); + if (_datamask & DPDTHETA_MASK) + _buf(i,m++) = _dpdTheta(j); - if (_datamask & UCOND_MASK) - _buf(i,m++) = _uCond(j); + if (_datamask & UCOND_MASK) + _buf(i,m++) = _uCond(j); - if (_datamask & UMECH_MASK) - _buf(i,m++) = _uMech(j); + if (_datamask & UMECH_MASK) + _buf(i,m++) = _uMech(j); - if (_datamask & UCHEM_MASK) - _buf(i,m++) = _uChem(j); + if (_datamask & UCHEM_MASK) + _buf(i,m++) = _uChem(j); + } } }; @@ -164,54 +166,110 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n, atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } } else { atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } } @@ -221,7 +279,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackCommSelf { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -281,30 +339,32 @@ struct AtomVecKokkos_PackCommSelf { } } - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } + if constexpr (!DEFAULT) { + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } - if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); - } + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); + } - if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); - if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); - if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); - if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); + } } }; @@ -312,62 +372,119 @@ struct AtomVecKokkos_PackCommSelf { int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst, const int &pbc_flag, const int* const pbc) { + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + if (lmp->kokkos->forward_comm_on_host) { atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } - atomKK->modified(HostKK,datamask_comm); } else { atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } - atomKK->modified(Device,datamask_comm); } return n*size_forward; @@ -375,7 +492,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackCommSelfFused { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -456,30 +573,32 @@ struct AtomVecKokkos_PackCommSelfFused { } } - if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); - } + if constexpr (!DEFAULT) { + if (_datamask & MU_MASK) { + _muw(i+_nfirst,0) = _mu(j,0); + _muw(i+_nfirst,1) = _mu(j,1); + _muw(i+_nfirst,2) = _mu(j,2); + } - if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); - } + if (_datamask & SP_MASK) { + _spw(i+_nfirst,0) = _sp(j,0); + _spw(i+_nfirst,1) = _sp(j,1); + _spw(i+_nfirst,2) = _sp(j,2); + _spw(i+_nfirst,3) = _sp(j,3); + } - if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); + if (_datamask & DPDTHETA_MASK) + _dpdThetaw(i+_nfirst) = _dpdTheta(j); - if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); + if (_datamask & UCOND_MASK) + _uCondw(i+_nfirst) = _uCond(j); - if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); + if (_datamask & UMECH_MASK) + _uMechw(i+_nfirst) = _uMech(j); - if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); + if (_datamask & UCHEM_MASK) + _uChemw(i+_nfirst) = _uChem(j); + } } }; @@ -491,29 +610,57 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr if (lmp->kokkos->forward_comm_on_host) { atomKK->sync(HostKK,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } atomKK->modified(HostKK,datamask_comm); } else { atomKK->sync(Device,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } atomKK->modified(Device,datamask_comm); } @@ -523,7 +670,7 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackComm { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -561,30 +708,32 @@ struct AtomVecKokkos_UnpackComm { _x(i+_first,1) = _buf(i,m++); _x(i+_first,2) = _buf(i,m++); - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - } + if constexpr (!DEFAULT) { + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); + if (_datamask & DPDTHETA_MASK) + _dpdTheta(i+_first) = _buf(i,m++); - if (_datamask & UCOND_MASK) - _uCond(i+_first) = _buf(i,m++); + if (_datamask & UCOND_MASK) + _uCond(i+_first) = _buf(i,m++); - if (_datamask & UMECH_MASK) - _uMech(i+_first) = _buf(i,m++); + if (_datamask & UMECH_MASK) + _uMech(i+_first) = _buf(i,m++); - if (_datamask & UCHEM_MASK) - _uChem(i+_first) = _buf(i,m++); + if (_datamask & UCHEM_MASK) + _uChem(i+_first) = _buf(i,m++); + } } }; @@ -593,14 +742,26 @@ struct AtomVecKokkos_UnpackComm { void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,datamask_comm); - struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + atomKK->sync(HostKK,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + atomKK->sync(HostKK,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,datamask_comm); - struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + atomKK->sync(Device,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + atomKK->sync(Device,datamask_comm); + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } atomKK->modified(Device,datamask_comm); } } From 13e4919189af09b8ffbd595c44773d39e68df899 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 11:56:36 -0400 Subject: [PATCH 133/604] re-check during init_style if computes and fixes still produce per-atom data --- src/dump_custom.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp index 0ead0c801af..2fc954ba655 100644 --- a/src/dump_custom.cpp +++ b/src/dump_custom.cpp @@ -443,11 +443,16 @@ void DumpCustom::init_style() for (i = 0; i < ncompute; i++) { compute[i] = modify->get_compute_by_id(id_compute[i]); if (!compute[i]) error->all(FLERR,"Could not find dump {} compute ID {}",style,id_compute[i]); + if (!compute[i]->peratom_flag) + error->all(FLERR,"Compute ID {} for dump {} does not compute per-atom data", + id_compute[i], style); } for (i = 0; i < nfix; i++) { fix[i] = modify->get_fix_by_id(id_fix[i]); if (!fix[i]) error->all(FLERR,"Could not find dump {} fix ID {}", style, id_fix[i]); + if (!fix[i]->peratom_flag) + error->all(FLERR,"Fix ID {} for dump {} does not compute per-atom data", id_fix[i],style); if (nevery % fix[i]->peratom_freq) error->all(FLERR,"Dump {} and fix not computed at compatible times{}", style, utils::errorurl(7)); From 7ac593c23677553efa6053abf00986c76b93640b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 12:15:05 -0400 Subject: [PATCH 134/604] add feature to optionally store constraint forces as per-atom data --- doc/src/fix_shake.rst | 22 ++++++- src/KOKKOS/fix_shake_kokkos.cpp | 3 + src/RIGID/fix_rattle.cpp | 22 ++++--- src/RIGID/fix_shake.cpp | 107 ++++++++++++++++++++++++-------- src/RIGID/fix_shake.h | 4 ++ 5 files changed, 122 insertions(+), 36 deletions(-) diff --git a/doc/src/fix_shake.rst b/doc/src/fix_shake.rst index 6ad8c5851df..f0a2255d063 100644 --- a/doc/src/fix_shake.rst +++ b/doc/src/fix_shake.rst @@ -33,7 +33,7 @@ Syntax *m* value = one or more mass values * zero or more keyword/value pairs may be appended -* keyword = *mol* or *kbond* +* keyword = *mol* or *kbond* or *store* .. parsed-literal:: @@ -41,6 +41,7 @@ Syntax template-ID = ID of molecule template specified in a separate :doc:`molecule ` command *kbond* value = force constant force constant = force constant used to apply a restraint force when used during minimization + *store* value = *yes* or *no* Examples """""""" @@ -50,6 +51,7 @@ Examples fix 1 sub shake 0.0001 20 10 b 4 19 a 3 5 2 fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol + fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 store yes fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol @@ -198,6 +200,22 @@ will be fulfilled to the desired accuracy within a few MD steps following the minimization. The default value for *kbond* depends on the :doc:`units ` setting and is 1.0e9*k_B. +.. versionadded:: TBD + +The *store* keyword controls whether the fix stores the constraint +(or restraint) forces as a per-atom property. + +During an MD :doc:`run `, the constraint forces are the forces on +atoms due to the constraints after an constrained position update. +Applying the SHAKE constraint *minimizes* those forces. By using *store +yes* the original constraint forces on all atoms can be accessed as a +per-atom array of the fix. + +During a :doc:`minimization `, restraint forces are added to +the atoms to keep the constrained bonds and angles close to their +initial values. By using *store yes* those added forces can be accessed +as a per-atom array of the fix. + ---------- .. include:: accel_styles.rst @@ -292,7 +310,7 @@ Related commands Default """"""" -kbond = 1.0e9*k_B +kbond = 1.0e9*k_B, store = no ---------- diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index fff17c3e99f..42a42f182b3 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -44,6 +44,9 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; + if (store_flag) + error->all(FLERR, "Option 'store yes' is not (yet) supported by fix {}/kk", style); + datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; diff --git a/src/RIGID/fix_rattle.cpp b/src/RIGID/fix_rattle.cpp index f7673194a9e..7438debbd25 100644 --- a/src/RIGID/fix_rattle.cpp +++ b/src/RIGID/fix_rattle.cpp @@ -64,6 +64,8 @@ FixRattle::FixRattle(LAMMPS *lmp, int narg, char **arg) : vp = nullptr; FixRattle::grow_arrays(atom->nmax); + if (store_flag) error->all(FLERR, "Option 'store yes' is not yet compatible with fix rattle"); + // default communication mode // necessary for compatibility with SHAKE // see pack_forward and unpack_forward @@ -83,18 +85,20 @@ FixRattle::~FixRattle() #if RATTLE_DEBUG - // communicate maximum distance error + // communicate maximum distance error - double global_derr_max, global_verr_max; - int npid; + double global_derr_max, global_verr_max; + int npid; - MPI_Reduce(&derr_max, &global_derr_max, 1 , MPI_DOUBLE, MPI_MAX, 0, world); - MPI_Reduce(&verr_max, &global_verr_max, 1 , MPI_DOUBLE, MPI_MAX, 0, world); + MPI_Reduce(&derr_max, &global_derr_max, 1 , MPI_DOUBLE, MPI_MAX, 0, world); + MPI_Reduce(&verr_max, &global_verr_max, 1 , MPI_DOUBLE, MPI_MAX, 0, world); - if (comm->me == 0 && screen) { - fprintf(screen, "RATTLE: Maximum overall relative position error ( (r_ij-d_ij)/d_ij ): %.10g\n", global_derr_max); - fprintf(screen, "RATTLE: Maximum overall absolute velocity error (r_ij * v_ij): %.10g\n", global_verr_max); - } + if (comm->me == 0) { + utils::logmesg(lmp, "RATTLE: Maximum overall relative position error ( (r_ij-d_ij)/d_ij ): " + "{:.10}\n", global_derr_max); + utils::logmesg(lmp, "RATTLE: Maximum overall absolute velocity error (r_ij * v_ij): {:.10}\n", + global_verr_max); + } #endif } diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index dcbd20939f1..ed04c0c0a38 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -48,12 +48,12 @@ static constexpr double MASSDELTA = 0.1; FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), bond_flag(nullptr), angle_flag(nullptr), type_flag(nullptr), - mass_list(nullptr), bond_distance(nullptr), angle_distance(nullptr), loop_respa(nullptr), - step_respa(nullptr), x(nullptr), v(nullptr), f(nullptr), ftmp(nullptr), vtmp(nullptr), - mass(nullptr), rmass(nullptr), type(nullptr), shake_flag(nullptr), shake_atom(nullptr), - shake_type(nullptr), xshake(nullptr), nshake(nullptr), list(nullptr), closest_list(nullptr), - b_count(nullptr), b_count_all(nullptr), b_ave(nullptr), b_max(nullptr), b_min(nullptr), - b_ave_all(nullptr), b_max_all(nullptr), b_min_all(nullptr), a_count(nullptr), + mass_list(nullptr), bond_distance(nullptr), angle_distance(nullptr), fstore(nullptr), + loop_respa(nullptr), step_respa(nullptr), x(nullptr), v(nullptr), f(nullptr), ftmp(nullptr), + vtmp(nullptr), mass(nullptr), rmass(nullptr), type(nullptr), shake_flag(nullptr), + shake_atom(nullptr), shake_type(nullptr), xshake(nullptr), nshake(nullptr), list(nullptr), + closest_list(nullptr), b_count(nullptr), b_count_all(nullptr), b_ave(nullptr), b_max(nullptr), + b_min(nullptr), b_ave_all(nullptr), b_max_all(nullptr), b_min_all(nullptr), a_count(nullptr), a_count_all(nullptr), a_ave(nullptr), a_max(nullptr), a_min(nullptr), a_ave_all(nullptr), a_max_all(nullptr), a_min_all(nullptr), atommols(nullptr), onemols(nullptr) { @@ -79,16 +79,13 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : if (molecular == Atom::ATOMIC) error->all(FLERR, "Cannot use fix {} with non-molecular system", style); - // perform initial allocation of atom-based arrays - // register with Atom class + // do not store constraint forces by default - shake_flag = nullptr; - shake_atom = nullptr; - shake_type = nullptr; - xshake = nullptr; + store_flag = peratom_flag = 0; + maxstore = -1; - ftmp = nullptr; - vtmp = nullptr; + // perform initial allocation of atom-based arrays + // register with Atom class FixShake::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); @@ -150,7 +147,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : // break if known optional keyword - } else if ((strcmp(arg[next], "mol") == 0) || (strcmp(arg[next], "kbond") == 0)) { + } else if ((strcmp(arg[next], "mol") == 0) || (strcmp(arg[next], "kbond") == 0) || + (strcmp(arg[next], "store") == 0)) { break; // get numeric types for b, a, t, or m keywords. @@ -212,6 +210,19 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) : kbond = utils::numeric(FLERR, arg[iarg+1], false, lmp); if (kbond < 0) error->all(FLERR,"Illegal {} kbond value {}. Must be >= 0.0", mystyle, kbond); iarg += 2; + } else if (strcmp(arg[iarg],"store") == 0) { + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,mystyle+" store",error); + store_flag = utils::logical(FLERR, arg[iarg+1], false, lmp); + if (store_flag) { + peratom_flag = 1; + size_peratom_cols = 3; + peratom_freq = 1; + } else { + peratom_flag = 0; + size_peratom_cols = 0; + peratom_freq = 0; + } + iarg += 2; } else error->all(FLERR,"Unknown {} command option: {}", mystyle, arg[iarg]); } @@ -315,6 +326,7 @@ FixShake::~FixShake() memory->destroy(ftmp); memory->destroy(vtmp); + memory->destroy(fstore); delete[] bond_flag; delete[] angle_flag; @@ -777,9 +789,22 @@ void FixShake::min_post_force(int vflag) a_min[i] = BIG; } + // allocate storage for restraint forces if requested + + if (store_flag) { + if (maxstore < atom->nmax) { + maxstore = MAX(atom->nmax, 1); + memory->destroy(fstore); + memory->create(fstore, maxstore, 3, "shake/fstore"); + for (int i = 0; i < maxstore; ++i) fstore[i][0] = fstore[i][1] = fstore[i][2] = 0.0; + } + array_atom = fstore; + } + // loop over local shake clusters to add restraint forces for (int i = 0; i < nlocal; i++) { + if (store_flag) fstore[i][0] = fstore[i][1] = fstore[i][2] = 0.0; if (shake_flag[i]) { if (shake_flag[i] == 2) { atom1 = atom->map(shake_atom[i][0]); @@ -1455,8 +1480,9 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag, partner_massflag[i][j] = masscheck(massone); } n = bondtype_findset(i,tag[i],partner_tag[i][j],0); - if (n) partner_bondtype[i][j] = n; - else { + if (n) { + partner_bondtype[i][j] = n; + } else { n = bondtype_findset(m,tag[i],partner_tag[i][j],0); if (n) partner_bondtype[i][j] = n; } @@ -2809,6 +2835,11 @@ double FixShake::bond_force(int i1, int i2, double length) f[i1][0] += delx * fbond; f[i1][1] += dely * fbond; f[i1][2] += delz * fbond; + if (store_flag) { + fstore[i1][0] += delx * fbond; + fstore[i1][1] += dely * fbond; + fstore[i1][2] += delz * fbond; + } atomlist[count++] = i1; ebond += 0.5*eb; } @@ -2816,6 +2847,11 @@ double FixShake::bond_force(int i1, int i2, double length) f[i2][0] -= delx * fbond; f[i2][1] -= dely * fbond; f[i2][2] -= delz * fbond; + if (store_flag) { + fstore[i2][0] -= delx * fbond; + fstore[i2][1] -= dely * fbond; + fstore[i2][2] -= delz * fbond; + } atomlist[count++] = i2; ebond += 0.5*eb; } @@ -3445,11 +3481,23 @@ void FixShake::correct_velocities() {} void FixShake::correct_coordinates(int vflag) { + // allocate storage for constraint forces if requested + + if (store_flag) { + if (maxstore < atom->nmax) { + maxstore = MAX(atom->nmax,1); + memory->destroy(fstore); + memory->create(fstore, maxstore, 3, "shake/fstore"); + for (int i = 0; i < maxstore; ++i) fstore[i][0] = fstore[i][1] = fstore[i][2] = 0.0; + } + array_atom = fstore; + } + // save current forces and velocities so that you // initialize them to zero such that FixShake::unconstrained_coordinate_update has no effect - for (int j=0; j Date: Sat, 1 Nov 2025 15:26:30 -0400 Subject: [PATCH 135/604] Fix minor grammar issue in fix shake docs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- doc/src/fix_shake.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_shake.rst b/doc/src/fix_shake.rst index f0a2255d063..b9d0a2a4ce4 100644 --- a/doc/src/fix_shake.rst +++ b/doc/src/fix_shake.rst @@ -206,7 +206,7 @@ The *store* keyword controls whether the fix stores the constraint (or restraint) forces as a per-atom property. During an MD :doc:`run `, the constraint forces are the forces on -atoms due to the constraints after an constrained position update. +atoms due to the constraints after a constrained position update. Applying the SHAKE constraint *minimizes* those forces. By using *store yes* the original constraint forces on all atoms can be accessed as a per-atom array of the fix. From baff68eb4c1b25727f78204b6228d095a2511fb1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 15:42:02 -0400 Subject: [PATCH 136/604] support outputting constraint/restraint forces also during fix rattle --- src/RIGID/fix_rattle.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/RIGID/fix_rattle.cpp b/src/RIGID/fix_rattle.cpp index 7438debbd25..fb57d49326e 100644 --- a/src/RIGID/fix_rattle.cpp +++ b/src/RIGID/fix_rattle.cpp @@ -64,8 +64,6 @@ FixRattle::FixRattle(LAMMPS *lmp, int narg, char **arg) : vp = nullptr; FixRattle::grow_arrays(atom->nmax); - if (store_flag) error->all(FLERR, "Option 'store yes' is not yet compatible with fix rattle"); - // default communication mode // necessary for compatibility with SHAKE // see pack_forward and unpack_forward @@ -139,8 +137,7 @@ void FixRattle::init() { } if (flag && comm->me == 0) - error->warning(FLERR, - "Fix rattle should come after all other integration fixes "); + error->warning(FLERR, "Fix rattle should come after all other integration fixes "); } /* ---------------------------------------------------------------------- From e9a20f87270b5f8ccf3974a9d752ab2063a1620a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 15:47:29 -0400 Subject: [PATCH 137/604] apply clang-format to some lines --- src/RIGID/fix_rattle.cpp | 50 ++++++++++++++++++++-------------------- src/RIGID/fix_shake.cpp | 50 ++++++++++++++++++++-------------------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/RIGID/fix_rattle.cpp b/src/RIGID/fix_rattle.cpp index fb57d49326e..b9bab3853d2 100644 --- a/src/RIGID/fix_rattle.cpp +++ b/src/RIGID/fix_rattle.cpp @@ -262,13 +262,13 @@ void FixRattle::vrattle3angle(int m) // matrix coeffs and rhs for lamda equations if (rmass) { - imass[0] = 1.0/rmass[i0]; - imass[1] = 1.0/rmass[i1]; - imass[2] = 1.0/rmass[i2]; + imass[0] = 1.0 / rmass[i0]; + imass[1] = 1.0 / rmass[i1]; + imass[2] = 1.0 / rmass[i2]; } else { - imass[0] = 1.0/mass[type[i0]]; - imass[1] = 1.0/mass[type[i1]]; - imass[2] = 1.0/mass[type[i2]]; + imass[0] = 1.0 / mass[type[i0]]; + imass[1] = 1.0 / mass[type[i1]]; + imass[2] = 1.0 / mass[type[i2]]; } // setup matrix @@ -333,11 +333,11 @@ void FixRattle::vrattle2(int m) // matrix coeffs and rhs for lamda equations if (rmass) { - imass[0] = 1.0/rmass[i0]; - imass[1] = 1.0/rmass[i1]; + imass[0] = 1.0 / rmass[i0]; + imass[1] = 1.0 / rmass[i1]; } else { - imass[0] = 1.0/mass[type[i0]]; - imass[1] = 1.0/mass[type[i1]]; + imass[0] = 1.0 / mass[type[i0]]; + imass[1] = 1.0 / mass[type[i1]]; } // Lagrange multiplier: exact solution @@ -385,13 +385,13 @@ void FixRattle::vrattle3(int m) MathExtra::sub3(vp[i2],vp[i0],vp02); if (rmass) { - imass[0] = 1.0/rmass[i0]; - imass[1] = 1.0/rmass[i1]; - imass[2] = 1.0/rmass[i2]; + imass[0] = 1.0 / rmass[i0]; + imass[1] = 1.0 / rmass[i1]; + imass[2] = 1.0 / rmass[i2]; } else { - imass[0] = 1.0/mass[type[i0]]; - imass[1] = 1.0/mass[type[i1]]; - imass[2] = 1.0/mass[type[i2]]; + imass[0] = 1.0 / mass[type[i0]]; + imass[1] = 1.0 / mass[type[i1]]; + imass[2] = 1.0 / mass[type[i2]]; } // setup matrix @@ -460,15 +460,15 @@ void FixRattle::vrattle4(int m) // matrix coeffs and rhs for lamda equations if (rmass) { - imass[0] = 1.0/rmass[i0]; - imass[1] = 1.0/rmass[i1]; - imass[2] = 1.0/rmass[i2]; - imass[3] = 1.0/rmass[i3]; + imass[0] = 1.0 / rmass[i0]; + imass[1] = 1.0 / rmass[i1]; + imass[2] = 1.0 / rmass[i2]; + imass[3] = 1.0 / rmass[i3]; } else { - imass[0] = 1.0/mass[type[i0]]; - imass[1] = 1.0/mass[type[i1]]; - imass[2] = 1.0/mass[type[i2]]; - imass[3] = 1.0/mass[type[i3]]; + imass[0] = 1.0 / mass[type[i0]]; + imass[1] = 1.0 / mass[type[i1]]; + imass[2] = 1.0 / mass[type[i2]]; + imass[3] = 1.0 / mass[type[i3]]; } // setup matrix @@ -604,7 +604,7 @@ void FixRattle::update_v_half_nocons() } else { for (int i = 0; i < nlocal; i++) { - dtfvinvm = dtfv/mass[type[i]]; + dtfvinvm = dtfv / mass[type[i]]; if (shake_flag[i]) { for (int k=0; k<3; k++) vp[i][k] = v[i][k] + dtfvinvm * f[i][k]; diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index ed04c0c0a38..b96e29319ba 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -2070,11 +2070,11 @@ void FixShake::shake(int ilist) // a,b,c = coeffs in quadratic equation for lamda if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; } double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; @@ -2187,13 +2187,13 @@ void FixShake::shake3(int ilist) // matrix coeffs and rhs for lamda equations if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * @@ -2375,15 +2375,15 @@ void FixShake::shake4(int ilist) // matrix coeffs and rhs for lamda equations if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; - invmass3 = 1.0/rmass[i3]; + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; + invmass3 = 1.0 / rmass[i3]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; - invmass3 = 1.0/mass[type[i3]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; + invmass3 = 1.0 / mass[type[i3]]; } double a11 = 2.0 * (invmass0+invmass1) * @@ -2626,13 +2626,13 @@ void FixShake::shake3angle(int ilist) // matrix coeffs and rhs for lamda equations if (rmass) { - invmass0 = 1.0/rmass[i0]; - invmass1 = 1.0/rmass[i1]; - invmass2 = 1.0/rmass[i2]; + invmass0 = 1.0 / rmass[i0]; + invmass1 = 1.0 / rmass[i1]; + invmass2 = 1.0 / rmass[i2]; } else { - invmass0 = 1.0/mass[type[i0]]; - invmass1 = 1.0/mass[type[i1]]; - invmass2 = 1.0/mass[type[i2]]; + invmass0 = 1.0 / mass[type[i0]]; + invmass1 = 1.0 / mass[type[i1]]; + invmass2 = 1.0 / mass[type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * @@ -3523,7 +3523,7 @@ void FixShake::correct_coordinates(int vflag) { double dtfmsq; if (rmass) { for (int i = 0; i < nlocal; i++) { - dtfmsq = dtfsq/ rmass[i]; + dtfmsq = dtfsq / rmass[i]; x[i][0] = x[i][0] + dtfmsq*f[i][0]; x[i][1] = x[i][1] + dtfmsq*f[i][1]; x[i][2] = x[i][2] + dtfmsq*f[i][2]; From 602418982691b5cfd720d21ca3c4bd4d05a598e9 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 15:56:07 -0400 Subject: [PATCH 138/604] general fix shake/rattle documentation improvements --- doc/src/fix_shake.rst | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/doc/src/fix_shake.rst b/doc/src/fix_shake.rst index b9d0a2a4ce4..a823c23c1a9 100644 --- a/doc/src/fix_shake.rst +++ b/doc/src/fix_shake.rst @@ -19,9 +19,9 @@ Syntax * ID, group-ID are documented in :doc:`fix ` command * style = shake or rattle = style name of this fix command -* tol = accuracy tolerance of SHAKE solution -* iter = max # of iterations in each SHAKE solution -* N = print SHAKE statistics every this many timesteps (0 = never) +* tol = accuracy tolerance of SHAKE or RATTLE solution +* iter = max # of iterations in each SHAKE or RATTLE solution +* N = print SHAKE or RATTLE statistics every this many timesteps (0 = never) * one or more constraint/value pairs are appended * constraint = *b* or *a* or *t* or *m* @@ -119,6 +119,8 @@ Setting the N argument will print statistics to the screen and log file about regarding the lengths of bonds and angles that are being constrained. Small delta values mean SHAKE is doing a good job. +----- + In LAMMPS, only small clusters of atoms can be constrained. This is so the constraint calculation for a cluster can be performed by a single processor, to enable good parallel performance. A cluster is @@ -150,25 +152,27 @@ Thus, LAMMPS will print a warning and type label handling is disabled and numeric types must be used. For all constraints, a particular bond is only constrained if *both* -atoms in the bond are in the group specified with the SHAKE fix. +atoms in the bond are in the group specified with the SHAKE or RATTLE +fix. The degrees-of-freedom removed by SHAKE bonds and angles are accounted -for in temperature and pressure computations. Similarly, the SHAKE -contribution to the pressure of the system (virial) is also accounted -for. +for in temperature and pressure computations. Similarly, the SHAKE or +RATTLE contribution to the pressure of the system (virial) is also +accounted for. .. note:: - This command works by using the current forces on atoms to calculate - an additional constraint force which when added will leave the atoms - in positions that satisfy the SHAKE constraints (e.g. bond length) - after the next time integration step. If you define fixes - (e.g. :doc:`fix efield `) that add additional force to - the atoms after *fix shake* operates, then this fix will not take them - into account and the time integration will typically not satisfy the - SHAKE constraints. The solution for this is to make sure that fix - shake is defined in your input script after any other fixes which add - or change forces (to atoms that *fix shake* operates on). + The *fix shake* command works by using the current forces on atoms to + calculate an additional constraint force which when added will leave + the atoms in positions that satisfy the SHAKE constraints (e.g. bond + length) after the next time integration step. If you define fixes + (e.g. :doc:`fix efield `) that add additional forces to + the atoms **after** *fix shake* operates, then those forces will not + be taken into account, and the time integration will typically not + fully satisfy the SHAKE constraints. The solution for this is to make + sure that *fix shake* is defined in your input script **after** any + other fixes which add or change forces (to atoms that *fix shake* + operates on). ---------- From 25afce486d795cf846d1177d24883a20d05c23f5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 16:16:37 -0400 Subject: [PATCH 139/604] add warning to fix smd and its docs that alternatives should be used --- doc/src/fix_smd.rst | 8 ++++++++ src/EXTRA-FIX/fix_smd.cpp | 3 +++ 2 files changed, 11 insertions(+) diff --git a/doc/src/fix_smd.rst b/doc/src/fix_smd.rst index 14892227fd1..6db0f0ab3a2 100644 --- a/doc/src/fix_smd.rst +++ b/doc/src/fix_smd.rst @@ -47,6 +47,14 @@ Examples Description """"""""""" +.. admonition:: Fix smd is unmaintained + :class: warning + + Please note that *fix smd* is unmaintained and has multiple known + issues. We recommend to use the equivalent functionality in either + :doc:`fix colvars ` or :doc:`fix plumed ` + instead, which are both actively maintained. + This fix implements several options of steered MD (SMD) as reviewed in :ref:`(Izrailev) `, which allows to induce conformational changes in systems and to compute the potential of mean force (PMF) diff --git a/src/EXTRA-FIX/fix_smd.cpp b/src/EXTRA-FIX/fix_smd.cpp index 9c7fc53c4ef..48c4f248091 100644 --- a/src/EXTRA-FIX/fix_smd.cpp +++ b/src/EXTRA-FIX/fix_smd.cpp @@ -112,6 +112,9 @@ FixSMD::FixSMD(LAMMPS *lmp, int narg, char **arg) : argoffs +=6; } else error->all(FLERR,"Illegal fix smd command"); + if (comm->me == 0) + error->warning(FLERR, "Fix smd is unmaintained. Consider using fix colvars or fix plumed."); + force_flag = 0; ftotal[0] = ftotal[1] = ftotal[2] = 0.0; } From 166aea9c54151b3f179dd6ff4742d5eb4db28701 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 17:05:05 -0400 Subject: [PATCH 140/604] address issues reported at: https://download.lammps.org/analysis/ --- src/REACTION/fix_bond_react.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 1b61b0dbe77..d03b5fda14a 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -592,7 +592,7 @@ FixBondReact::~FixBondReact() if (constraint.type == Reaction::Constraint::Type::ARRHENIUS) delete constraint.arrhenius.rrhandom; - for (int i = 0; i < rxns.size(); i++) delete random[i]; + for (std::size_t i = 0; i < rxns.size(); i++) delete random[i]; delete[] random; delete reset_mol_ids; @@ -2090,7 +2090,6 @@ compute local temperature: average over all atoms in reaction template double FixBondReact::get_temperature(std::vector &glove) { - int i,ilocal; double adof = domain->dimension; double **v = atom->v; @@ -2101,14 +2100,14 @@ double FixBondReact::get_temperature(std::vector &glove) double t = 0.0; if (rmass) { - for (i = 0; i < glove.size(); i++) { - ilocal = atom->map(glove[i]); + for (const auto &g : glove) { + auto ilocal = atom->map(g); t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + v[ilocal][2]*v[ilocal][2]) * rmass[ilocal]; } } else { - for (i = 0; i < glove.size(); i++) { - ilocal = atom->map(glove[i]); + for (const auto &g : glove) { + auto ilocal = atom->map(g); t += (v[ilocal][0]*v[ilocal][0] + v[ilocal][1]*v[ilocal][1] + v[ilocal][2]*v[ilocal][2]) * mass[type[ilocal]]; } @@ -2945,7 +2944,7 @@ void FixBondReact::update_everything() for (int pass = 0; pass < 2; pass++) { update_num_mega = 0; int *noccur = new int[rxns.size()]; - for (int i = 0; i < rxns.size(); i++) noccur[i] = 0; + for (std::size_t i = 0; i < rxns.size(); i++) noccur[i] = 0; if (pass == 0) { for (int i = 0; i < local_num_mega; i++) { auto &rxn = rxns[(int) local_mega_glove[0][i]]; @@ -4177,7 +4176,7 @@ void FixBondReact::ReadConstraints(char *line, Reaction &rxn) } else if ((ptr = strstr(lptr,"||"))) { rxn.constraintstr += "||"; *ptr = '\0'; - } else if (constraint.ID+1 < rxn.constraints.size()) { + } else if (constraint.ID+1 < (int)rxn.constraints.size()) { rxn.constraintstr += "&&"; } if ((ptr = strchr(lptr,')'))) @@ -4467,7 +4466,7 @@ void FixBondReact::write_restart(FILE *fp) set[0].nrxns = rxns.size(); set[0].nratelimits = rate_limits.size(); - for (int i = 0; i < rxns.size(); i++) { + for (std::size_t i = 0; i < rxns.size(); i++) { set[i].reaction_count_total = rxns[i].reaction_count_total; strncpy(set[i].rxn_name,rxns[i].name.c_str(),MAXNAME-1); @@ -4524,7 +4523,7 @@ void FixBondReact::restart(char *buf) iptr += sizeof(Set)*r_nrxns; for (int i = 0; i < r_nrxns; i++) - for (int j = 0; j < rxns.size(); j++) + for (std::size_t j = 0; j < rxns.size(); j++) if (strcmp(set_restart[i].rxn_name,rxns[j].name.c_str()) == 0) rxns[j].reaction_count_total = set_restart[i].reaction_count_total; From 23d5f1076e36c23daebd2a776e3788e77ebfc6da Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 17:20:42 -0400 Subject: [PATCH 141/604] use move semantics to avoid copy of a temporary --- src/REACTION/fix_bond_react.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index d03b5fda14a..5efff92fb3d 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -211,7 +211,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : if (iarg+rlm.Nrxns+4 > narg) utils::missing_cmd_args(FLERR,"fix bond/react rate_limit", error); for (int i = 0; i < rlm.Nrxns; i++) { std::string tmpstr = arg[iarg+1+i]; - rlm.rxn_names.push_back(tmpstr); + rlm.rxn_names.push_back(std::move(tmpstr)); } char *myarg = arg[iarg+rlm.Nrxns+1]; // Nlimit if (strncmp(myarg,"v_",2) == 0) { @@ -4543,11 +4543,11 @@ void FixBondReact::restart(char *buf) for (int i = 0; i < r_rlm.Nrxns; i++) { r_rlm.rxnIDs.push_back(ibuf[ii++]); std::string myrxn_name = set_restart[r_rlm.rxnIDs[i]].rxn_name; - r_rlm.rxn_names.push_back(myrxn_name); + r_rlm.rxn_names.push_back(std::move(myrxn_name)); } r_rlm.Nsteps = ibuf[ii++]; for (int i = 0; i < r_rlm.Nsteps; i++) r_rlm.store_rxn_counts.push_back(ibuf[ii++]); - restart_rate_limits.push_back(r_rlm); + restart_rate_limits.push_back(std::move(r_rlm)); } // restore rate_limits store_rxn_counts if all rxn_names match // assumes there are no repeats - perhaps should error-check this? From 82b06e30aa1e7be0ede7ab557f8c35ff11089bc1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 1 Nov 2025 17:21:10 -0400 Subject: [PATCH 142/604] use const reference to avoid temporary copy of object with read-only access --- src/REACTION/fix_bond_react.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 5efff92fb3d..b3c2d40f60d 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -4476,7 +4476,7 @@ void FixBondReact::write_restart(FILE *fp) // to store, for each RateLimit: Nrxns rxn_IDs[Nrxns] NSteps store_rxn_counts[Nsteps] // NOTE: rxn_IDs only valid in reference to this restart file's reaction list int rbufcount = rate_limits.size()*2; - for (auto rlm : rate_limits) + for (const auto &rlm : rate_limits) rbufcount += rlm.Nsteps + rlm.Nrxns; int ii = 0; From 32f50fa499170a762678089fa6423faa0d55c8dd Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 2 Nov 2025 12:40:39 -0500 Subject: [PATCH 143/604] used consistent formatting and re-wrap --- doc/src/fix_gcmc.rst | 303 +++++++++++++++++++++---------------------- 1 file changed, 150 insertions(+), 153 deletions(-) diff --git a/doc/src/fix_gcmc.rst b/doc/src/fix_gcmc.rst index beb36fbafdf..2b6a7786f8a 100644 --- a/doc/src/fix_gcmc.rst +++ b/doc/src/fix_gcmc.rst @@ -68,82 +68,82 @@ Examples Description """"""""""" -This fix performs grand canonical Monte Carlo (GCMC) exchanges of atoms or -molecules with an imaginary ideal gas reservoir at the specified T and -chemical potential (mu) as discussed in :ref:`(Frenkel) `. It -also attempts Monte Carlo (MC) moves (translations and molecule rotations) -within the simulation cell or region. If used with the :doc:`fix nvt ` -command, simulations in the grand canonical ensemble (muVT, constant -chemical potential, constant volume, and constant temperature) can be -performed. Specific uses include computing isotherms in microporous -materials, or computing vapor-liquid coexistence curves. - -Every N timesteps the fix attempts both GCMC exchanges (insertions or -deletions) and MC moves of gas atoms or molecules. On those timesteps, the -average number of attempted GCMC exchanges is X, while the average number -of attempted MC moves is M. For GCMC exchanges of either molecular or -atomic gasses, these exchanges can be either deletions or insertions, with -equal probability. - -The possible choices for MC moves are translation of an atom, translation -of a molecule, and rotation of a molecule. The relative amounts of each are -determined by the optional *mcmoves* keyword (see below). The default -behavior is as follows. If the *mol* keyword is used, only molecule -translations and molecule rotations are performed with equal probability. -Conversely, if the *mol* keyword is not used, only atom translations are -performed. M should typically be chosen to be approximately equal to the -expected number of gas atoms or molecules of the given type within the -simulation cell or region, which will result in roughly one MC move per -atom or molecule per MC cycle. +This fix performs grand canonical Monte Carlo (GCMC) exchanges of atoms +or molecules with an imaginary ideal gas reservoir at the specified T +and chemical potential (:math:`\mu`) as discussed in :ref:`(Frenkel) +`. It also attempts Monte Carlo (MC) moves (translations and +molecule rotations) within the simulation cell or region. If used with +the :doc:`fix nvt ` command, simulations in the grand canonical +ensemble (muVT, constant chemical potential, constant volume, and +constant temperature) can be performed. Specific uses include computing +isotherms in microporous materials, or computing vapor-liquid +coexistence curves. + +Every *N* timesteps the fix attempts both GCMC exchanges (insertions or +deletions) and MC moves of gas atoms or molecules. On those timesteps, +the average number of attempted GCMC exchanges is *X*, while the average +number of attempted MC moves is *M*. For GCMC exchanges of either +molecular or atomic gasses, these exchanges can be either deletions or +insertions, with equal probability. + +The possible choices for MC moves are translation of an atom, +translation of a molecule, and rotation of a molecule. The relative +amounts of each are determined by the optional *mcmoves* keyword (see +below). The default behavior is as follows. If the *mol* keyword is +used, only molecule translations and molecule rotations are performed +with equal probability. Conversely, if the *mol* keyword is not used, +only atom translations are performed. *M* should typically be chosen to +be approximately equal to the expected number of gas atoms or molecules +of the given type within the simulation cell or region, which will +result in roughly one MC move per atom or molecule per MC cycle. All inserted particles are always added to two groups: the default group "all" and the fix group specified in the fix command. In addition, particles are also added to any groups specified by the *group* and -*grouptype* keywords. If inserted particles are individual atoms, they are -assigned the atom type given by the type argument. If they are molecules, -the type argument has no effect and must be set to zero. Instead, the type -of each atom in the inserted molecule is specified in the file read by the -:doc:`molecule ` command. +*grouptype* keywords. If inserted particles are individual atoms, they +are assigned the atom type given by the type argument. If they are +molecules, the type argument has no effect and must be set to zero. +Instead, the type of each atom in the inserted molecule is specified in +the file read by the :doc:`molecule ` command. .. note:: - Care should be taken to apply fix gcmc only to - a group that contains only those atoms and molecules - that you wish to manipulate using Monte Carlo. - Hence it is generally not a good idea to specify - the default group "all" in the fix command, although it is allowed. + Care should be taken to apply *fix gcmc* to a group that contains + *only* those atoms and molecules that you wish to manipulate using + Monte Carlo. Hence it is generally not a good idea to specify the + default group "all" in the fix command, although it is allowed. This fix cannot be used to perform GCMC insertions of gas atoms or -molecules other than the exchanged type, but GCMC deletions, -and MC translations, and rotations can be performed on any atom/molecule in -the fix group. All atoms in the simulation cell can be moved using -regular time integration translations, e.g. via :doc:`fix nvt `, -resulting in a hybrid GCMC+MD simulation. A smaller-than-usual -timestep size may be needed when running such a hybrid simulation, -especially if the inserted molecules are not well equilibrated. +molecules other than the exchanged type, but GCMC deletions, and MC +translations, and rotations can be performed on any atom/molecule in the +fix group. All atoms in the simulation cell can be moved using regular +time integration translations, e.g. via :doc:`fix nvt `, +resulting in a hybrid GCMC+MD simulation. A smaller-than-usual timestep +size may be needed when running such a hybrid simulation, especially if +the inserted molecules are not well equilibrated. This command may optionally use the *region* keyword to define an exchange and move volume. The specified region must have been previously defined with a :doc:`region ` command. It must be defined with side = *in*\ . Insertion attempts occur only within the -specified region. For non-rectangular regions, random trial points are +specified region. For non-rectangular regions, random trial points are generated within the rectangular bounding box until a point is found -that lies inside the region. If no valid point is generated after 1000 +that lies inside the region. If no valid point is generated after 1000 trials, no insertion is performed, but it is counted as an attempted insertion. Move and deletion attempt candidates are selected from gas -atoms or molecules within the region. If there are no candidates, no +atoms or molecules within the region. If there are no candidates, no move or deletion is performed, but it is counted as an attempt move or -deletion. If an attempted move places the atom or molecule +deletion. If an attempted move places the atom or molecule center-of-mass outside the specified region, a new attempted move is -generated. This process is repeated until the atom or molecule +generated. This process is repeated until the atom or molecule center-of-mass is inside the specified region. If used with :doc:`fix nvt `, the temperature of the imaginary -reservoir, T, should be set to be equivalent to the target temperature -used in fix nvt. Otherwise, the imaginary reservoir will not be in -thermal equilibrium with the simulation cell. Also, it is important that -the temperature used by *fix nvt* is dynamically updated, which can be -achieved as follows: +reservoir, *T*, should be set to be equivalent to the target temperature +used in fix nvt. Otherwise, the imaginary reservoir will not be in +thermal equilibrium with the simulation cell. Also, it is important +that the temperature used by *fix nvt* is dynamically updated, which can +be achieved as follows: .. code-block:: LAMMPS @@ -153,37 +153,37 @@ achieved as follows: fix_modify mdnvt temp mdtemp Note that neighbor lists are re-built every timestep that this fix is -invoked, so you should not set N to be too small. However, periodic +invoked, so you should not set *N* to be too small. However, periodic rebuilds are necessary in order to avoid dangerous rebuilds and missed -interactions. Specifically, avoid performing so many MC translations -per timestep that atoms can move beyond the neighbor list skin -distance. See the :doc:`neighbor ` command for details. +interactions. Specifically, avoid performing so many MC translations +per timestep that atoms can move beyond the neighbor list skin distance. +See the :doc:`neighbor ` command for details. When an atom or molecule is to be inserted, its coordinates are chosen at a random position within the current simulation cell or region, and new atom velocities are randomly chosen from the specified temperature -distribution given by T. The effective temperature for new atom +distribution given by *T*. The effective temperature for new atom velocities can be increased or decreased using the optional keyword -*tfac_insert* (see below). Relative coordinates for atoms in a -molecule are taken from the template molecule provided by the -user. The center of mass of the molecule is placed at the insertion -point. The orientation of the molecule is chosen at random by rotating -about this point. +*tfac_insert* (see below). Relative coordinates for atoms in a molecule +are taken from the template molecule provided by the user. The center +of mass of the molecule is placed at the insertion point. The +orientation of the molecule is chosen at random by rotating about this +point. Individual atoms are inserted, unless the *mol* keyword is used. It -specifies a *template-ID* previously defined using the -:doc:`molecule ` command, which reads a file that defines the -molecule. The coordinates, atom types, charges, etc., as well as any -bonding and special neighbor information for the molecule can -be specified in the molecule file. See the :doc:`molecule ` -command for details. The only settings required to be in this file -are the coordinates and types of atoms in the molecule. +specifies a *template-ID* previously defined using the :doc:`molecule +` command, which reads a file that defines the molecule. The +coordinates, atom types, charges, etc., as well as any bonding and +special neighbor information for the molecule can be specified in the +molecule file. See the :doc:`molecule ` command for details. +The only settings required to be in this file are the coordinates and +types of atoms in the molecule. When not using the *mol* keyword, you should ensure you do not delete atoms that are bonded to other atoms, or LAMMPS will soon generate an error when it tries to find bonded neighbors. LAMMPS will warn you if -any of the atoms eligible for deletion have a non-zero molecule ID, -but does not check for this at the time of deletion. +any of the atoms eligible for deletion have a non-zero molecule ID, but +does not check for this at the time of deletion. If you wish to insert molecules using the *mol* keyword that will be treated as rigid bodies, use the *rigid* keyword, specifying as its @@ -204,26 +204,26 @@ their bonds or angles constrained via SHAKE, use the *shake* keyword, specifying as its value the ID of a separate :doc:`fix shake ` command which also appears in your input script. -Optionally, users may specify the relative amounts of different MC -moves using the *mcmoves* keyword. The values *Patomtrans*, -*Pmoltrans*, *Pmolrotate* specify the average proportion of -atom translations, molecule translations, and molecule rotations, -respectively. The values must be non-negative integers or real -numbers, with at least one non-zero value. For example, (10,30,0) -would result in 25% of the MC moves being atomic translations, 75% -molecular translations, and no molecular rotations. +Optionally, users may specify the relative amounts of different MC moves +using the *mcmoves* keyword. The values *Patomtrans*, *Pmoltrans*, +*Pmolrotate* specify the average proportion of atom translations, +molecule translations, and molecule rotations, respectively. The values +must be non-negative integers or real numbers, with at least one +non-zero value. For example, (10,30,0) would result in 25% of the MC +moves being atomic translations, 75% molecular translations, and no +molecular rotations. Optionally, users may specify the maximum rotation angle for molecular rotations using the *maxangle* keyword and specifying the angle in degrees. Rotations are performed by generating a random point on the -unit sphere and a random rotation angle on the range -[0,maxangle). The molecule is then rotated by that angle about an -axis passing through the molecule center of mass. The axis is parallel -to the unit vector defined by the point on the unit sphere. The same -procedure is used for randomly rotating molecules when they are -inserted, except that the maximum angle is 360 degrees. - -Note that fix gcmc does not use configurational bias MC or any other +unit sphere and a random rotation angle on the range [0,maxangle). The +molecule is then rotated by that angle about an axis passing through the +molecule center of mass. The axis is parallel to the unit vector defined +by the point on the unit sphere. The same procedure is used for +randomly rotating molecules when they are inserted, except that the +maximum angle is 360 degrees. + +Note that *fix gcmc* does not use configurational bias MC or any other kind of sampling of intramolecular degrees of freedom. Inserted molecules can have different orientations, but they will all have the same intramolecular configuration, which was specified in the molecule @@ -231,38 +231,37 @@ command input. For atomic gasses, inserted atoms have the specified atom type, but deleted atoms are any atoms that have been inserted or that already -belong to the fix group. For molecular gasses, exchanged -molecules use the same atom types as in the template molecule supplied -by the user. In both cases, exchanged atoms/molecules are assigned to -two groups: the default group "all" and the fix group -(which can also be "all"). +belong to the fix group. For molecular gasses, exchanged molecules use +the same atom types as in the template molecule supplied by the user. +In both cases, exchanged atoms/molecules are assigned to two groups: the +default group "all" and the fix group (which can also be "all"). -The chemical potential is a user-specified input parameter defined -as: +The chemical potential is a user-specified input parameter defined as: .. math:: \mu = \mu^{id} + \mu^{ex} -The second term mu_ex is the excess chemical potential due to +The second term :math:`\mu^{ex}` is the excess chemical potential due to energetic interactions and is formally zero for the fictitious gas -reservoir but is non-zero for interacting systems. So, while the +reservoir but is non-zero for interacting systems. So, while the chemical potential of the reservoir and the simulation cell are equal, -mu_ex is not, and as a result, the densities of the two are generally -quite different. The first term mu_id is the ideal gas contribution -to the chemical potential. mu_id can be related to the density or -pressure of the fictitious gas reservoir by: +:math:`\mu^{ex}` is not, and as a result, the densities of the two are +generally quite different. The first term :math:`\mu^{id}` is the ideal +gas contribution to the chemical potential. :math:`\mu^{id}` can be +related to the density or pressure of the fictitious gas reservoir by: .. math:: \mu^{id} = & k T \ln{\rho \Lambda^3} \\ = & k T \ln{\frac{\phi P \Lambda^3}{k_B T}} -where :math:`k_B` is the Boltzmann constant, :math:`T` is the user-specified -temperature, :math:`\rho` is the number density, *P* is the pressure, -and :math:`\phi` is the fugacity coefficient. The constant -:math:`\Lambda` is required for dimensional consistency. For all unit -styles except *lj* it is defined as the thermal de Broglie wavelength +where :math:`k_B` is the Boltzmann constant, *T* is the +user-specified temperature, :math:`\rho` is the number density, *P* is +the pressure, and :math:`\phi` is the fugacity coefficient. The +constant :math:`\Lambda` is required for dimensional consistency. For +all unit styles except *lj* it is defined as the thermal de Broglie +wavelength .. math:: @@ -276,22 +275,20 @@ value of 0.18292026. Chemical potential under the old definition can be converted to an equivalent value under the new definition by subtracting :math:`3 k T \ln(\Lambda_{old})`. -As an alternative to specifying mu directly, the ideal gas reservoir -can be defined by its pressure *P* using the *pressure* keyword, in -which case the user-specified chemical potential is ignored. The user -may also specify the fugacity coefficient :math:`\phi` using the +As an alternative to specifying mu directly, the ideal gas reservoir can +be defined by its pressure *P* using the *pressure* keyword, in which +case the user-specified chemical potential is ignored. The user may +also specify the fugacity coefficient :math:`\phi` using the *fugacity_coeff* keyword, which defaults to unity. The *full_energy* option means that the fix calculates the total -potential energy of the entire simulated system, instead of just -the energy of the part that is changed. The total system -energy before and after the proposed GCMC exchange or MC move -is then used in the -Metropolis criterion to determine whether or not to accept the -proposed change. By default, this option is off, -in which case only -partial energies are computed to determine the energy difference -due to the proposed change. +potential energy of the entire simulated system, instead of just the +energy of the part that is changed. The total system energy before and +after the proposed GCMC exchange or MC move is then used in the +Metropolis criterion to determine whether or not to accept the proposed +change. By default, this option is off, in which case only partial +energies are computed to determine the energy difference due to the +proposed change. The *full_energy* option is needed for systems with complicated potential energy calculations, including the following: @@ -308,23 +305,23 @@ keyword and issue a warning message. When the *mol* keyword is used, the *full_energy* option also includes the intramolecular energy of inserted and deleted molecules, whereas -this energy is not included when *full_energy* is not used. If this -is not desired, the *intra_energy* keyword can be used to define an -amount of energy that is subtracted from the final energy when a -molecule is inserted, and subtracted from the initial energy when a molecule -is deleted. For molecules that have a non-zero intramolecular energy, -this will ensure roughly the same behavior whether or not the -*full_energy* option is used. - -Inserted atoms and molecules are assigned random velocities based on -the specified temperature :math:`T`. Because the relative velocity of all -atoms in the molecule is zero, this may result in inserted molecules -that are systematically too cold. In addition, the intramolecular -potential energy of the inserted molecule may cause the kinetic energy -of the molecule to quickly increase or decrease after insertion. The +this energy is not included when *full_energy* is not used. If this is +not desired, the *intra_energy* keyword can be used to define an amount +of energy that is subtracted from the final energy when a molecule is +inserted, and subtracted from the initial energy when a molecule is +deleted. For molecules that have a non-zero intramolecular energy, this +will ensure roughly the same behavior whether or not the *full_energy* +option is used. + +Inserted atoms and molecules are assigned random velocities based on the +specified temperature *T*. Because the relative velocity of all atoms +in the molecule is zero, this may result in inserted molecules that are +systematically too cold. In addition, the intramolecular potential +energy of the inserted molecule may cause the kinetic energy of the +molecule to quickly increase or decrease after insertion. The *tfac_insert* keyword allows the user to counteract these effects by -changing the temperature used to assign velocities to inserted atoms -and molecules by a constant factor. For a particular application, some +changing the temperature used to assign velocities to inserted atoms and +molecules by a constant factor. For a particular application, some experimentation may be required to find a value of *tfac_insert* that results in inserted molecules that equilibrate quickly to the correct temperature. @@ -442,7 +439,7 @@ type masses. Do not set "neigh_modify once yes" or else this fix will never be called. Reneighboring is **required**. -Only usable for 3D simulations. +This fix style is only usable for 3D simulations. This fix can be run in parallel, but aspects of the GCMC part will not scale well in parallel. Currently, molecule translations and rotations @@ -451,27 +448,27 @@ to do parallel molecule exchange without translation and rotation moves by setting MC moves to zero and/or by using the *mcmoves* keyword with *Pmoltrans* = *Pmolrotate* = 0 . +When using *fix gcmc* in combination with :doc:`fix shake ` +or :doc:`fix rigid `, only GCMC exchange moves are supported, +so the argument *M* must be zero. -When using fix gcmc in combination with fix shake or fix rigid, only -GCMC exchange moves are supported, so the argument *M* must be zero. - -When using fix gcmc in combination with fix rigid, deletion of the last -remaining molecule is not allowed for technical reasons, and so the -molecule count will never drop below 1, regardless of the specified -chemical potential. +When using *fix gcmc* in combination with :doc:`fix rigid `, +deletion of the last remaining molecule is not allowed for technical +reasons, and so the molecule count will never drop below 1, regardless +of the specified chemical potential. Note that very lengthy simulations involving insertions/deletions of billions of gas molecules may run out of atom or molecule IDs and trigger an error, so it is better to run multiple shorter-duration -simulations. Likewise, very large molecules have not been tested and +simulations. Likewise, very large molecules have not been tested and may turn out to be problematic. -Use of multiple fix gcmc commands in the same input script can be -problematic if using a template molecule. The issue is that the -user-referenced template molecule in the second fix gcmc command may -no longer exist since it might have been deleted by the first fix gcmc -command. An existing template molecule will need to be referenced by -the user for each subsequent fix gcmc command. +Use of multiple *fix gcmc* commands in the same input script can be +problematic if using a template molecule. The issue is that the +user-referenced template molecule in the second *fix gcmc* command may +no longer exist since it might have been deleted by the first *fix gcmc* +command. An existing template molecule will need to be referenced by +the user for each subsequent *fix gcmc* command. Related commands """""""""""""""" @@ -481,8 +478,8 @@ Related commands :doc:`fix deposit `, :doc:`fix evaporate `, :doc:`delete_atoms ` -Default -""""""" +Defaults +"""""""" The option defaults are mol = no, maxangle = 10, overlap_cutoff = 0.0, fugacity_coeff = 1.0, intra_energy = 0.0, tfac_insert = 1.0. From b28449dd79b7ef3c639e91aa75feb245af35f2eb Mon Sep 17 00:00:00 2001 From: megmcca Date: Sun, 2 Nov 2025 15:34:14 -0700 Subject: [PATCH 144/604] rename with date format --- .../{log => log.31Oct25.langevin.reduced.group.g++} | 0 .../{log.0 => log.31Oct25.langevin.reduced.group.g++.0} | 0 .../{log.1 => log.31Oct25.langevin.reduced.group.g++.1} | 0 .../{log.2 => log.31Oct25.langevin.reduced.group.g++.2} | 0 .../{log.3 => log.31Oct25.langevin.reduced.group.g++.3} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename examples/PACKAGES/pimd/langevin_reduced_units_group/{log => log.31Oct25.langevin.reduced.group.g++} (100%) rename examples/PACKAGES/pimd/langevin_reduced_units_group/{log.0 => log.31Oct25.langevin.reduced.group.g++.0} (100%) rename examples/PACKAGES/pimd/langevin_reduced_units_group/{log.1 => log.31Oct25.langevin.reduced.group.g++.1} (100%) rename examples/PACKAGES/pimd/langevin_reduced_units_group/{log.2 => log.31Oct25.langevin.reduced.group.g++.2} (100%) rename examples/PACKAGES/pimd/langevin_reduced_units_group/{log.3 => log.31Oct25.langevin.reduced.group.g++.3} (100%) diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ similarity index 100% rename from examples/PACKAGES/pimd/langevin_reduced_units_group/log rename to examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 similarity index 100% rename from examples/PACKAGES/pimd/langevin_reduced_units_group/log.0 rename to examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 similarity index 100% rename from examples/PACKAGES/pimd/langevin_reduced_units_group/log.1 rename to examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 similarity index 100% rename from examples/PACKAGES/pimd/langevin_reduced_units_group/log.2 rename to examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 similarity index 100% rename from examples/PACKAGES/pimd/langevin_reduced_units_group/log.3 rename to examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 From bd0a6bb792e78ab3bff2b64e00ba2a7b8379f42a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 3 Nov 2025 10:16:11 -0700 Subject: [PATCH 145/604] Fix some issues in Kokkos UF3 --- src/KOKKOS/kokkos_type.h | 3 +++ src/KOKKOS/pair_uf3_kokkos.cpp | 14 ++++++++++---- src/KOKKOS/pair_uf3_kokkos.h | 8 +++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 8696910669a..0719a94ba8d 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -1181,6 +1181,7 @@ KOKKOS_DEVICE_DUALVIEW(KK_ACC_FLOAT*[3], LMPDeviceLayout, kkacc_1d_3) KOKKOS_DEVICE_DUALVIEW(KK_FLOAT*[4], LMPDeviceLayout, kkfloat_1d_4) KOKKOS_DEVICE_DUALVIEW(KK_FLOAT*[6], LMPDeviceLayout, kkfloat_1d_6) KOKKOS_DEVICE_DUALVIEW(KK_ACC_FLOAT*[6], LMPDeviceLayout, kkacc_1d_6) +KOKKOS_DEVICE_DUALVIEW(KK_ACC_FLOAT*[9], LMPDeviceLayout, kkacc_1d_9) typedef TransformView ttransform_kkacc_1d; typedef TransformView ttransform_int_2d; @@ -1194,6 +1195,7 @@ typedef TransformView ttransform_ typedef TransformView ttransform_kkfloat_1d_4; typedef TransformView ttransform_kkfloat_1d_6; typedef TransformView ttransform_kkacc_1d_6; +typedef TransformView ttransform_kkacc_1d_9; // 3D view types @@ -1275,6 +1277,7 @@ KOKKOS_HOST_DUALVIEW(KK_ACC_FLOAT*[3], LMPDeviceLayout, kkacc_1d_3) KOKKOS_HOST_DUALVIEW(KK_FLOAT*[4], LMPDeviceLayout, kkfloat_1d_4) KOKKOS_HOST_DUALVIEW(KK_FLOAT*[6], LMPDeviceLayout, kkfloat_1d_6) KOKKOS_HOST_DUALVIEW(KK_ACC_FLOAT*[6], LMPDeviceLayout, kkacc_1d_6) +KOKKOS_HOST_DUALVIEW(KK_ACC_FLOAT*[9], LMPDeviceLayout, kkacc_1d_9) // 3D view types diff --git a/src/KOKKOS/pair_uf3_kokkos.cpp b/src/KOKKOS/pair_uf3_kokkos.cpp index 30f93c3ac0a..5a7a2c3a3f0 100644 --- a/src/KOKKOS/pair_uf3_kokkos.cpp +++ b/src/KOKKOS/pair_uf3_kokkos.cpp @@ -731,6 +731,12 @@ template void PairUF3Kokkos::compute(int eflag_in d_vatom = k_vatom.view(); } + if (cvflag_atom) { + memoryKK->destroy_kokkos(k_cvatom, cvatom); + memoryKK->create_kokkos(k_cvatom, cvatom, maxvatom, "pair:vatom"); + d_cvatom = k_cvatom.view(); + } + atomKK->sync(execution_space, datamask_read); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -759,7 +765,7 @@ template void PairUF3Kokkos::compute(int eflag_in escatter = ScatterEType(d_eatom); fscatter = ScatterFType(f); vscatter = ScatterVType(d_vatom); - //cvscatter = ScatterCVType(d_cvatom); + cvscatter = ScatterCVType(d_cvatom); EV_FLOAT ev; EV_FLOAT ev_all; @@ -791,7 +797,7 @@ template void PairUF3Kokkos::compute(int eflag_in Kokkos::Experimental::contribute(d_eatom, escatter); Kokkos::Experimental::contribute(d_vatom, vscatter); - //Kokkos::Experimental::contribute(d_cvatom, cvscatter); + Kokkos::Experimental::contribute(d_cvatom, cvscatter); Kokkos::Experimental::contribute(f, fscatter); if (eflag_global) eng_vdwl += ev_all.evdwl; @@ -815,8 +821,8 @@ template void PairUF3Kokkos::compute(int eflag_in } if (cvflag_atom) { - //k_cvatom.template modify(); - //k_cvatom.sync_host(); + k_cvatom.template modify(); + k_cvatom.sync_host(); } if (vflag_fdotr) pair_virial_fdotr_compute(this); diff --git a/src/KOKKOS/pair_uf3_kokkos.h b/src/KOKKOS/pair_uf3_kokkos.h index e7e1e7eb21c..841653be4fc 100644 --- a/src/KOKKOS/pair_uf3_kokkos.h +++ b/src/KOKKOS/pair_uf3_kokkos.h @@ -141,21 +141,23 @@ template class PairUF3Kokkos : public PairUF3 { DAT::ttransform_kkacc_1d k_eatom; DAT::ttransform_kkacc_1d_6 k_vatom; + DAT::ttransform_kkacc_1d_9 k_cvatom; typename AT::t_kkacc_1d d_eatom; typename AT::t_kkacc_1d_6 d_vatom; + typename AT::t_kkacc_1d_9 d_cvatom; using KKDeviceType = typename KKDevice::value; using ScatterFType = Kokkos::Experimental::ScatterView; ScatterFType fscatter; - using ScatterVType = Kokkos::Experimental::ScatterView; ScatterVType vscatter; - using ScatterCVType = Kokkos::Experimental::ScatterView; ScatterCVType cvscatter; - using ScatterEType = Kokkos::Experimental::ScatterView; ScatterEType escatter; From 4a13eff653ab7f157c952a434911661ac58dbd99 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 3 Nov 2025 13:16:59 -0700 Subject: [PATCH 146/604] Allowing finite size mass in ideal EoS --- src/RHEO/fix_rheo_pressure.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/RHEO/fix_rheo_pressure.cpp b/src/RHEO/fix_rheo_pressure.cpp index 2262a1e4546..6ea38b49d34 100644 --- a/src/RHEO/fix_rheo_pressure.cpp +++ b/src/RHEO/fix_rheo_pressure.cpp @@ -246,7 +246,12 @@ double FixRHEOPressure::calc_pressure(double rho, int i) rho_ratio = rho * rho0inv[type]; p = csq[type] * rho0[type] * (pow(rho_ratio, tpower[type]) - 1.0) / tpower[type]; } else if (pressure_style[type] == IDEAL) { - p = (gamma[type] - 1.0) * rho * atom->esph[i] / atom->mass[type]; + double imass; + if (atom->rmass) + imass = atom->rmass[i]; + else + imass = atom->mass[type]; + p = (gamma[type] - 1.0) * rho * atom->esph[i] / imass; } if (background_flag) @@ -281,7 +286,12 @@ double FixRHEOPressure::calc_rho(double p, int i) rho *= pow(rho0[type], 1.0 - 1.0 / tpower[type]); rho *= pow(csq[type], -1.0 / tpower[type]); } else if (pressure_style[type] == IDEAL) { - rho = p * atom->mass[type] / ((gamma[type] - 1.0) * atom->esph[i]); + double imass; + if (atom->rmass) + imass = atom->rmass[i]; + else + imass = atom->mass[type]; + rho = p * imass / ((gamma[type] - 1.0) * atom->esph[i]); } return rho; } From f624d68207df306e6f4b97bf683f21b78c2a74ef Mon Sep 17 00:00:00 2001 From: jtclemm Date: Mon, 3 Nov 2025 13:23:43 -0700 Subject: [PATCH 147/604] Adding missing factor of mass in energy/temperature conversion --- src/RHEO/fix_rheo_thermal.cpp | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/RHEO/fix_rheo_thermal.cpp b/src/RHEO/fix_rheo_thermal.cpp index a855c966165..8ca4321e91a 100644 --- a/src/RHEO/fix_rheo_thermal.cpp +++ b/src/RHEO/fix_rheo_thermal.cpp @@ -359,6 +359,10 @@ void FixRHEOThermal::post_integrate() double *heatflow = atom->heatflow; int *type = atom->type; + double imass; + double rmass = atom->rmass; + double mass = atom->mass; + int n_melt = 0; int n_freeze = 0; @@ -367,9 +371,12 @@ void FixRHEOThermal::post_integrate() if (status[i] & STATUS_NO_INTEGRATION) continue; itype = type[i]; + if (rmass) imass = rmass[i]; + else imass = mass[itype]; + cvi = calc_cv(itype); energy[i] += dth * heatflow[i]; - temperature[i] = energy[i] / cvi; + temperature[i] = energy[i] / (imass * cvi); if (Tc_style[itype] != NONE) { Ti = temperature[i]; @@ -377,7 +384,7 @@ void FixRHEOThermal::post_integrate() if (L_style[itype] != NONE) { Li = calc_L(itype); - if (Ti > Tci) Ti = MAX(Tci, (energy[i] - Li) / cvi); + if (Ti > Tci) Ti = MAX(Tci, (energy[i] / imass - Li) / cvi); temperature[i] = Ti; } @@ -461,18 +468,26 @@ void FixRHEOThermal::post_neighbor() void FixRHEOThermal::pre_force(int /*vflag*/) { + int i, itype; double cvi, Tci, Ti, Li; double *energy = atom->esph; double *temperature = atom->temperature; int *type = atom->type; + + double imass; + double rmass = atom->rmass; + double mass = atom->mass; + int nall = atom->nlocal + atom->nghost; // Calculate temperature - for (int i = 0; i < nall; i++) { - int itype = type[i]; + for (i = 0; i < nall; i++) { + itype = type[i]; + if (rmass) imass = rmass[i]; + else imass = mass[itype]; cvi = calc_cv(itype); - temperature[i] = energy[i] / cvi; + temperature[i] = energy[i] / (imass * cvi); if (Tc_style[itype] != NONE) { Ti = temperature[i]; @@ -480,7 +495,7 @@ void FixRHEOThermal::pre_force(int /*vflag*/) if (L_style[itype] != NONE) { Li = calc_L(itype); - if (Ti > Tci) Ti = MAX(Tci, (energy[i] - Li) / cvi); + if (Ti > Tci) Ti = MAX(Tci, (energy[i] / imass - Li) / cvi); temperature[i] = Ti; } } From 51c2d126a739001f68df5b182e2b0ebe66164c56 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 3 Nov 2025 15:51:07 -0500 Subject: [PATCH 148/604] disable bounding box info when region has variable shape or dynamic position or orientation the bounding box information is only valid for static and fixed size regions. variable shape and translate/rotate was added but bounding box is not updated. --- src/region_block.cpp | 2 +- src/region_cone.cpp | 2 +- src/region_cylinder.cpp | 2 +- src/region_ellipsoid.cpp | 2 +- src/region_prism.cpp | 2 +- src/region_sphere.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/region_block.cpp b/src/region_block.cpp index 36c38f517c9..1bd570b7d9d 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -156,7 +156,7 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : // extent of block - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; extent_xlo = xlo; extent_xhi = xhi; diff --git a/src/region_cone.cpp b/src/region_cone.cpp index 30f29e39345..26dda8ba2e0 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -266,7 +266,7 @@ RegCone::RegCone(LAMMPS *lmp, int narg, char **arg) : // extent of cone maxradius = ((radiuslo > radiushi) ? radiuslo : radiushi); - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; if (axis == 'x') { extent_xlo = lo; diff --git a/src/region_cylinder.cpp b/src/region_cylinder.cpp index 2ad0ba82f5b..6546366522a 100644 --- a/src/region_cylinder.cpp +++ b/src/region_cylinder.cpp @@ -195,7 +195,7 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : // extent of cylinder // for variable radius, uses initial radius - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; if (axis == 'x') { extent_xlo = lo; diff --git a/src/region_ellipsoid.cpp b/src/region_ellipsoid.cpp index 02925574120..cfed7968546 100644 --- a/src/region_ellipsoid.cpp +++ b/src/region_ellipsoid.cpp @@ -117,7 +117,7 @@ RegEllipsoid::RegEllipsoid(LAMMPS *lmp, int narg, char **arg) : // extent of ellipsoid // for variable axes, uses initial axes and origin for variable center - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; extent_xlo = xc - a; extent_xhi = xc + a; diff --git a/src/region_prism.cpp b/src/region_prism.cpp index 5c6ee58a904..91e1291abdc 100644 --- a/src/region_prism.cpp +++ b/src/region_prism.cpp @@ -193,7 +193,7 @@ RegPrism::RegPrism(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg), // extent of prism - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; extent_xlo = MIN(xlo, xlo + xy); extent_xlo = MIN(extent_xlo, extent_xlo + xz); diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index ec472c031c8..641956d7eff 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -81,7 +81,7 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : // extent of sphere // for variable radius, uses initial radius and origin for variable center - if (interior) { + if (interior && !dynamic && !varshape) { bboxflag = 1; extent_xlo = xc - radius; extent_xhi = xc + radius; From 58d9b8137225cf0db0de8ed6948ff104a21c4e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yifan=20Li=E6=9D=8E=E4=B8=80=E5=B8=86?= Date: Mon, 3 Nov 2025 16:04:16 -0500 Subject: [PATCH 149/604] Update doc/src/fix_pimd.rst Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- doc/src/fix_pimd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 9557022d7a2..64c5bb911f6 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -276,7 +276,7 @@ a positive floating-point number. for nve pimd, since the spring elastic frequency between the beads will be affected by the temperature. The keyword *thermostat* reads *style* and *seed* of thermostat for fix style *pimd/langevin*. -*style* can only be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti <_Ceriotti3>`), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. +*style* can only be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti `), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. .. note:: From dc0b680979b67ec5f4fe28b9ee8ffe89fdfb0622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yifan=20Li=E6=9D=8E=E4=B8=80=E5=B8=86?= Date: Mon, 3 Nov 2025 16:08:59 -0500 Subject: [PATCH 150/604] Update doc/src/fix_pimd.rst Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- doc/src/fix_pimd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 64c5bb911f6..5c99d19f7bc 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -250,7 +250,7 @@ a value other than *physical*, due to the lack of support for bosonic normal mod The keyword *sp* is a scaling factor on Planck's constant. Scaling the Planck's constant means modifying the "quantumness" of the PIMD simulation. Using the physical value of Planck's constant corresponds to a fully quantum simulation, and 0 corresponds to the classical limit. For unit styles other than *lj*, the default value of 1.0 is appropriate for most situations. -For *lj* units, a fully quantum simulation translates into setting *sp* to the de Boer quantumness parameter :math:`\Lambda^{\ast}`(see :ref:`de Boer `): +For *lj* units, a fully quantum simulation translates into setting *sp* to the de Boer quantumness parameter :math:`\Lambda^{\ast}` (see :ref:`de Boer `): .. math:: \Lambda^{\ast}=h/\sigma\sqrt{m\varepsilon} From 02f781895b3ee46ed7be94d465a88de1ac4a7471 Mon Sep 17 00:00:00 2001 From: Chuck Witt Date: Mon, 3 Nov 2025 13:36:37 -0800 Subject: [PATCH 151/604] Apply @bathmatt patch to pack/unpack in mliap/kk --- src/KOKKOS/pair_mliap_kokkos.cpp | 60 +++++++++----------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/src/KOKKOS/pair_mliap_kokkos.cpp b/src/KOKKOS/pair_mliap_kokkos.cpp index f25434dab3c..38075f6a21e 100644 --- a/src/KOKKOS/pair_mliap_kokkos.cpp +++ b/src/KOKKOS/pair_mliap_kokkos.cpp @@ -425,18 +425,11 @@ int PairMLIAPKokkos::pack_forward_comm_kokkos( auto val=fill.view(); int nf=vec_len; auto to=copy_to; - Kokkos::parallel_for( - Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), - KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { - const int i = team_member.league_rank(); + Kokkos::parallel_for(nv*nf, KOKKOS_LAMBDA (int start) { + const int i = start/nf; const int gstart=idx(i)*nf; - const int start=i*nf; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(team_member, nf), - [=] (const int j) { - val(start+j) = static_cast(to[gstart+j]); - } - ); + const int j = start%nf; + val(start+j) = static_cast(to[gstart+j]); } ); return nv*nf; @@ -503,18 +496,11 @@ void PairMLIAPKokkos::unpack_forward_comm_kokkos( auto val=fill.view(); int nf=vec_len; auto to=copy_to; - Kokkos::parallel_for( - Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), - KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { - const int i = team_member.league_rank(); + Kokkos::parallel_for(nv*nf, KOKKOS_LAMBDA (int start) { + const int i=start/nf; const int gstart=(first_up+i)*nf; - const int start=i*nf; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(team_member, nf), - [=] (const int j) { - to[gstart+j] = static_cast(val(start+j)); - } - ); + const int j=start%nf; + to[gstart+j] = static_cast(val(start+j)); } ); } @@ -573,18 +559,11 @@ int PairMLIAPKokkos::pack_reverse_comm_kokkos(int nv, int first_up, int nf=vec_len; auto val=fill.view(); auto to=copy_to; - Kokkos::parallel_for( - Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), - KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { - const int i = team_member.league_rank(); + Kokkos::parallel_for(nv*nf, KOKKOS_LAMBDA (int start) { + const int i = start/nf; const int gstart=(first_up+i)*nf; - const int start=i*nf; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(team_member, nf), - [=] (const int j) { - val(start+j) = static_cast(to[gstart+j]); - } - ); + const int j = start%nf; + val(start+j) = static_cast(to[gstart+j]); } ); return nv*nf; @@ -648,18 +627,11 @@ void PairMLIAPKokkos::unpack_reverse_comm_kokkos(int nv, DAT::tdual_ auto val=fill.view(); auto idx=idx_v.view(); auto to=copy_to; - Kokkos::parallel_for( - Kokkos::TeamPolicy<>(nv, Kokkos::AUTO, Kokkos::AUTO), - KOKKOS_LAMBDA (Kokkos::TeamPolicy<>::member_type team_member) { - const int i = team_member.league_rank(); + Kokkos::parallel_for(nv*nf, KOKKOS_LAMBDA (int start) { + const int i = start/nf; const int gstart=idx(i)*nf; - const int start=i*nf; - Kokkos::parallel_for( - Kokkos::TeamVectorRange(team_member, nf), - [=] (const int j) { - to[gstart+j] += static_cast(val(start+j)); - } - ); + const int j=i%nf; + to[gstart+j] += static_cast(val(start+j)); } ); } From e3e0e0baf07ec0f093837333afdecb1d8ae3b18a Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Mon, 3 Nov 2025 17:32:07 -0500 Subject: [PATCH 152/604] improve math in doc --- doc/src/fix_pimd.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 5c99d19f7bc..1b7f63ed7e2 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -251,14 +251,18 @@ a value other than *physical*, due to the lack of support for bosonic normal mod The keyword *sp* is a scaling factor on Planck's constant. Scaling the Planck's constant means modifying the "quantumness" of the PIMD simulation. Using the physical value of Planck's constant corresponds to a fully quantum simulation, and 0 corresponds to the classical limit. For unit styles other than *lj*, the default value of 1.0 is appropriate for most situations. For *lj* units, a fully quantum simulation translates into setting *sp* to the de Boer quantumness parameter :math:`\Lambda^{\ast}` (see :ref:`de Boer `): + .. math:: \Lambda^{\ast}=h/\sigma\sqrt{m\varepsilon} where :math:`h` is Planck's constant, :math:`\sigma` is the length scale, :math:`\epsilon` is the energy scale, and :math:`m` is the mass of the particles. For example, for Neon, :math:`m = 20.1797` Dalton, :math:`\varepsilon = 3.0747 \times 10^{-3}` eV and :math:`\sigma = 2.7616` Å. Then we have + .. math:: - \Lambda^{\ast} = 4.135667403e-3 eV * ps / (2.7616 Å * sqrt(20.1797 Dalton * 3.0747e-3 eV * 1.0364269e-4 eV / Dalton / Å^2 * ps^2)) = 0.600. + + \Lambda^{\ast} = \frac{4.135667403\times 10^{-3}\ \mathrm{eV} \cdot\ \mathrm{ps}}{2.7616\ \mathrm{Å}\times \sqrt{20.1797\ \mathrm{Dalton}\times\ 3.0747\times 10^{-3}\ \mathrm{eV}\times 1.0364269\times 10^{-4}\ \mathrm{eV}\cdot\mathrm{Dalton}^{-1}\cdot\mathrm{Å}^{-2}\cdot\mathrm{ps}^{2}}} = 0.600. + Thus for a fully quantum simulation of Neon using *lj* units, *sp* should be set to 0.600. The modification of the quantumness should be done by scaling :math:`\Lambda^{\ast}`. From 27a8b9699a1ed279ac4c6524cde1ab5a2cb41a83 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 4 Nov 2025 08:27:05 -0500 Subject: [PATCH 153/604] add missing newline --- src/dump_image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dump_image.cpp b/src/dump_image.cpp index cd37d84d3b8..1bddd0c16c3 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -1705,7 +1705,7 @@ void DumpImage::create_image() reg.ptr = ptr; if (reg.ptr->rotateflag) { - utils::logmesg(lmp, "Cannot (yet) handle rotating region {}. Skipping... ", reg.ptr->id); + utils::logmesg(lmp, "Cannot (yet) handle rotating region {}. Skipping...\n", reg.ptr->id); continue; } From 24b36647989ca5950a1be5d685744320ee80a433 Mon Sep 17 00:00:00 2001 From: Jeremy FERSULA Date: Tue, 4 Nov 2025 11:15:13 +0100 Subject: [PATCH 154/604] Restore fix_langevin omega keyword Signed-off-by: Jeremy Fersula --- src/fix_langevin.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp index 1a13c3c77ac..9b637853a53 100644 --- a/src/fix_langevin.cpp +++ b/src/fix_langevin.cpp @@ -106,8 +106,7 @@ FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) : ascale = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; } else if (strcmp(arg[iarg], "omega") == 0) { - if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix langevin angmom", error); - error->all(FLERR, "Illegal fix langevin command"); + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix langevin omega", error); oflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; } else if (strcmp(arg[iarg], "scale") == 0) { From 2250f6aa4daeaefa8107fc2f566d89bc29fb874c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 4 Nov 2025 09:52:54 -0500 Subject: [PATCH 155/604] Use some sphinx/rst magic to provide dummy html pages for removed ones --- doc/src/Commands_removed.rst | 7 +++++++ doc/src/atc_add_molecule.rst | 1 + doc/src/atc_add_species.rst | 1 + doc/src/atc_atom_element_map.rst | 1 + doc/src/atc_atom_weight.rst | 1 + doc/src/atc_atomic_charge.rst | 1 + doc/src/atc_boundary_dynamics.rst | 1 + doc/src/atc_boundary_faceset.rst | 1 + doc/src/atc_boundary_type.rst | 1 + doc/src/atc_consistent_fe_initialization.rst | 1 + doc/src/atc_control_localized_lambda.rst | 1 + doc/src/atc_control_momentum.rst | 1 + doc/src/atc_control_thermal.rst | 1 + doc/src/atc_decomposition.rst | 1 + doc/src/atc_electron_integration.rst | 1 + doc/src/atc_equilibrium_start.rst | 1 + doc/src/atc_extrinsic_exchange.rst | 1 + doc/src/atc_fe_md_boundary.rst | 1 + doc/src/atc_filter_scale.rst | 1 + doc/src/atc_filter_type.rst | 1 + doc/src/atc_fix.rst | 1 + doc/src/atc_fix_flux.rst | 1 + doc/src/atc_hardy_computes.rst | 1 + doc/src/atc_hardy_fields.rst | 1 + doc/src/atc_hardy_gradients.rst | 1 + doc/src/atc_hardy_kernel.rst | 1 + doc/src/atc_hardy_on_the_fly.rst | 1 + doc/src/atc_hardy_rates.rst | 1 + doc/src/atc_initial.rst | 1 + doc/src/atc_internal_element_set.rst | 1 + doc/src/atc_internal_quadrature.rst | 1 + doc/src/atc_kernel_bandwidth.rst | 1 + doc/src/atc_lumped_lambda_solve.rst | 1 + doc/src/atc_mask_direction.rst | 1 + doc/src/atc_mass_matrix.rst | 1 + doc/src/atc_material.rst | 1 + doc/src/atc_mesh_add_to_nodeset.rst | 1 + doc/src/atc_mesh_create.rst | 1 + doc/src/atc_mesh_create_elementset.rst | 1 + doc/src/atc_mesh_create_faceset_box.rst | 1 + doc/src/atc_mesh_create_faceset_plane.rst | 1 + doc/src/atc_mesh_create_nodeset.rst | 1 + doc/src/atc_mesh_delete_elements.rst | 1 + doc/src/atc_mesh_nodeset_to_elementset.rst | 1 + doc/src/atc_mesh_output.rst | 1 + doc/src/atc_mesh_quadrature.rst | 1 + doc/src/atc_mesh_read.rst | 1 + doc/src/atc_mesh_write.rst | 1 + doc/src/atc_output.rst | 1 + doc/src/atc_output_boundary_integral.rst | 1 + doc/src/atc_output_contour_integral.rst | 1 + doc/src/atc_output_nodeset.rst | 1 + doc/src/atc_output_volume_integral.rst | 1 + doc/src/atc_pair_interactions.rst | 1 + doc/src/atc_poisson_solver.rst | 1 + doc/src/atc_read_restart.rst | 1 + doc/src/atc_remove_molecule.rst | 1 + doc/src/atc_remove_source.rst | 1 + doc/src/atc_remove_species.rst | 1 + doc/src/atc_reset_atomic_reference.rst | 1 + doc/src/atc_reset_time.rst | 1 + doc/src/atc_sample_frequency.rst | 1 + doc/src/atc_set_reference_pe.rst | 1 + doc/src/atc_source.rst | 1 + doc/src/atc_source_integration.rst | 1 + doc/src/atc_temperature_definition.rst | 1 + doc/src/atc_time_filter.rst | 1 + doc/src/atc_time_integration.rst | 1 + doc/src/atc_track_displacement.rst | 1 + doc/src/atc_unfix.rst | 1 + doc/src/atc_unfix_flux.rst | 1 + doc/src/atc_write_atom_weights.rst | 1 + doc/src/atc_write_restart.rst | 1 + doc/src/fix_atc.rst | 9 +++++++++ doc/src/fix_nve_awpmd.rst | 9 +++++++++ doc/src/fix_poems.rst | 9 +++++++++ doc/src/pair_awpmd.rst | 9 +++++++++ 77 files changed, 115 insertions(+) create mode 120000 doc/src/atc_add_molecule.rst create mode 120000 doc/src/atc_add_species.rst create mode 120000 doc/src/atc_atom_element_map.rst create mode 120000 doc/src/atc_atom_weight.rst create mode 120000 doc/src/atc_atomic_charge.rst create mode 120000 doc/src/atc_boundary_dynamics.rst create mode 120000 doc/src/atc_boundary_faceset.rst create mode 120000 doc/src/atc_boundary_type.rst create mode 120000 doc/src/atc_consistent_fe_initialization.rst create mode 120000 doc/src/atc_control_localized_lambda.rst create mode 120000 doc/src/atc_control_momentum.rst create mode 120000 doc/src/atc_control_thermal.rst create mode 120000 doc/src/atc_decomposition.rst create mode 120000 doc/src/atc_electron_integration.rst create mode 120000 doc/src/atc_equilibrium_start.rst create mode 120000 doc/src/atc_extrinsic_exchange.rst create mode 120000 doc/src/atc_fe_md_boundary.rst create mode 120000 doc/src/atc_filter_scale.rst create mode 120000 doc/src/atc_filter_type.rst create mode 120000 doc/src/atc_fix.rst create mode 120000 doc/src/atc_fix_flux.rst create mode 120000 doc/src/atc_hardy_computes.rst create mode 120000 doc/src/atc_hardy_fields.rst create mode 120000 doc/src/atc_hardy_gradients.rst create mode 120000 doc/src/atc_hardy_kernel.rst create mode 120000 doc/src/atc_hardy_on_the_fly.rst create mode 120000 doc/src/atc_hardy_rates.rst create mode 120000 doc/src/atc_initial.rst create mode 120000 doc/src/atc_internal_element_set.rst create mode 120000 doc/src/atc_internal_quadrature.rst create mode 120000 doc/src/atc_kernel_bandwidth.rst create mode 120000 doc/src/atc_lumped_lambda_solve.rst create mode 120000 doc/src/atc_mask_direction.rst create mode 120000 doc/src/atc_mass_matrix.rst create mode 120000 doc/src/atc_material.rst create mode 120000 doc/src/atc_mesh_add_to_nodeset.rst create mode 120000 doc/src/atc_mesh_create.rst create mode 120000 doc/src/atc_mesh_create_elementset.rst create mode 120000 doc/src/atc_mesh_create_faceset_box.rst create mode 120000 doc/src/atc_mesh_create_faceset_plane.rst create mode 120000 doc/src/atc_mesh_create_nodeset.rst create mode 120000 doc/src/atc_mesh_delete_elements.rst create mode 120000 doc/src/atc_mesh_nodeset_to_elementset.rst create mode 120000 doc/src/atc_mesh_output.rst create mode 120000 doc/src/atc_mesh_quadrature.rst create mode 120000 doc/src/atc_mesh_read.rst create mode 120000 doc/src/atc_mesh_write.rst create mode 120000 doc/src/atc_output.rst create mode 120000 doc/src/atc_output_boundary_integral.rst create mode 120000 doc/src/atc_output_contour_integral.rst create mode 120000 doc/src/atc_output_nodeset.rst create mode 120000 doc/src/atc_output_volume_integral.rst create mode 120000 doc/src/atc_pair_interactions.rst create mode 120000 doc/src/atc_poisson_solver.rst create mode 120000 doc/src/atc_read_restart.rst create mode 120000 doc/src/atc_remove_molecule.rst create mode 120000 doc/src/atc_remove_source.rst create mode 120000 doc/src/atc_remove_species.rst create mode 120000 doc/src/atc_reset_atomic_reference.rst create mode 120000 doc/src/atc_reset_time.rst create mode 120000 doc/src/atc_sample_frequency.rst create mode 120000 doc/src/atc_set_reference_pe.rst create mode 120000 doc/src/atc_source.rst create mode 120000 doc/src/atc_source_integration.rst create mode 120000 doc/src/atc_temperature_definition.rst create mode 120000 doc/src/atc_time_filter.rst create mode 120000 doc/src/atc_time_integration.rst create mode 120000 doc/src/atc_track_displacement.rst create mode 120000 doc/src/atc_unfix.rst create mode 120000 doc/src/atc_unfix_flux.rst create mode 120000 doc/src/atc_write_atom_weights.rst create mode 120000 doc/src/atc_write_restart.rst create mode 100644 doc/src/fix_atc.rst create mode 100644 doc/src/fix_nve_awpmd.rst create mode 100644 doc/src/fix_poems.rst create mode 100644 doc/src/pair_awpmd.rst diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index e48078ce9b0..ad5e1596045 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -24,6 +24,13 @@ started to create problems with modern C++ compilers. LAMMPS version download and compile this version, if you want to use any of these packages. +.. toctree:: + :maxdepth: 0 + :hidden: + :glob: + + atc_* + Neighbor style and comm mode multi/old -------------------------------------- diff --git a/doc/src/atc_add_molecule.rst b/doc/src/atc_add_molecule.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_add_molecule.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_add_species.rst b/doc/src/atc_add_species.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_add_species.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_atom_element_map.rst b/doc/src/atc_atom_element_map.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_atom_element_map.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_atom_weight.rst b/doc/src/atc_atom_weight.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_atom_weight.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_atomic_charge.rst b/doc/src/atc_atomic_charge.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_atomic_charge.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_boundary_dynamics.rst b/doc/src/atc_boundary_dynamics.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_boundary_dynamics.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_boundary_faceset.rst b/doc/src/atc_boundary_faceset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_boundary_faceset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_boundary_type.rst b/doc/src/atc_boundary_type.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_boundary_type.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_consistent_fe_initialization.rst b/doc/src/atc_consistent_fe_initialization.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_consistent_fe_initialization.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_control_localized_lambda.rst b/doc/src/atc_control_localized_lambda.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_control_localized_lambda.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_control_momentum.rst b/doc/src/atc_control_momentum.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_control_momentum.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_control_thermal.rst b/doc/src/atc_control_thermal.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_control_thermal.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_decomposition.rst b/doc/src/atc_decomposition.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_decomposition.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_electron_integration.rst b/doc/src/atc_electron_integration.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_electron_integration.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_equilibrium_start.rst b/doc/src/atc_equilibrium_start.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_equilibrium_start.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_extrinsic_exchange.rst b/doc/src/atc_extrinsic_exchange.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_extrinsic_exchange.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_fe_md_boundary.rst b/doc/src/atc_fe_md_boundary.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_fe_md_boundary.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_filter_scale.rst b/doc/src/atc_filter_scale.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_filter_scale.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_filter_type.rst b/doc/src/atc_filter_type.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_filter_type.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_fix.rst b/doc/src/atc_fix.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_fix.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_fix_flux.rst b/doc/src/atc_fix_flux.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_fix_flux.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_computes.rst b/doc/src/atc_hardy_computes.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_computes.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_fields.rst b/doc/src/atc_hardy_fields.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_fields.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_gradients.rst b/doc/src/atc_hardy_gradients.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_gradients.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_kernel.rst b/doc/src/atc_hardy_kernel.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_kernel.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_on_the_fly.rst b/doc/src/atc_hardy_on_the_fly.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_on_the_fly.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_hardy_rates.rst b/doc/src/atc_hardy_rates.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_hardy_rates.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_initial.rst b/doc/src/atc_initial.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_initial.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_internal_element_set.rst b/doc/src/atc_internal_element_set.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_internal_element_set.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_internal_quadrature.rst b/doc/src/atc_internal_quadrature.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_internal_quadrature.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_kernel_bandwidth.rst b/doc/src/atc_kernel_bandwidth.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_kernel_bandwidth.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_lumped_lambda_solve.rst b/doc/src/atc_lumped_lambda_solve.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_lumped_lambda_solve.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mask_direction.rst b/doc/src/atc_mask_direction.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mask_direction.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mass_matrix.rst b/doc/src/atc_mass_matrix.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mass_matrix.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_material.rst b/doc/src/atc_material.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_material.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_add_to_nodeset.rst b/doc/src/atc_mesh_add_to_nodeset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_add_to_nodeset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_create.rst b/doc/src/atc_mesh_create.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_create.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_create_elementset.rst b/doc/src/atc_mesh_create_elementset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_create_elementset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_create_faceset_box.rst b/doc/src/atc_mesh_create_faceset_box.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_create_faceset_box.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_create_faceset_plane.rst b/doc/src/atc_mesh_create_faceset_plane.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_create_faceset_plane.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_create_nodeset.rst b/doc/src/atc_mesh_create_nodeset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_create_nodeset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_delete_elements.rst b/doc/src/atc_mesh_delete_elements.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_delete_elements.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_nodeset_to_elementset.rst b/doc/src/atc_mesh_nodeset_to_elementset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_nodeset_to_elementset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_output.rst b/doc/src/atc_mesh_output.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_output.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_quadrature.rst b/doc/src/atc_mesh_quadrature.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_quadrature.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_read.rst b/doc/src/atc_mesh_read.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_read.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_mesh_write.rst b/doc/src/atc_mesh_write.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_mesh_write.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_output.rst b/doc/src/atc_output.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_output.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_output_boundary_integral.rst b/doc/src/atc_output_boundary_integral.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_output_boundary_integral.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_output_contour_integral.rst b/doc/src/atc_output_contour_integral.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_output_contour_integral.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_output_nodeset.rst b/doc/src/atc_output_nodeset.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_output_nodeset.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_output_volume_integral.rst b/doc/src/atc_output_volume_integral.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_output_volume_integral.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_pair_interactions.rst b/doc/src/atc_pair_interactions.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_pair_interactions.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_poisson_solver.rst b/doc/src/atc_poisson_solver.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_poisson_solver.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_read_restart.rst b/doc/src/atc_read_restart.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_read_restart.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_remove_molecule.rst b/doc/src/atc_remove_molecule.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_remove_molecule.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_remove_source.rst b/doc/src/atc_remove_source.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_remove_source.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_remove_species.rst b/doc/src/atc_remove_species.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_remove_species.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_reset_atomic_reference.rst b/doc/src/atc_reset_atomic_reference.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_reset_atomic_reference.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_reset_time.rst b/doc/src/atc_reset_time.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_reset_time.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_sample_frequency.rst b/doc/src/atc_sample_frequency.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_sample_frequency.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_set_reference_pe.rst b/doc/src/atc_set_reference_pe.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_set_reference_pe.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_source.rst b/doc/src/atc_source.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_source.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_source_integration.rst b/doc/src/atc_source_integration.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_source_integration.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_temperature_definition.rst b/doc/src/atc_temperature_definition.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_temperature_definition.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_time_filter.rst b/doc/src/atc_time_filter.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_time_filter.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_time_integration.rst b/doc/src/atc_time_integration.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_time_integration.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_track_displacement.rst b/doc/src/atc_track_displacement.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_track_displacement.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_unfix.rst b/doc/src/atc_unfix.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_unfix.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_unfix_flux.rst b/doc/src/atc_unfix_flux.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_unfix_flux.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_write_atom_weights.rst b/doc/src/atc_write_atom_weights.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_write_atom_weights.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/atc_write_restart.rst b/doc/src/atc_write_restart.rst new file mode 120000 index 00000000000..4c4215ea4e4 --- /dev/null +++ b/doc/src/atc_write_restart.rst @@ -0,0 +1 @@ +fix_atc.rst \ No newline at end of file diff --git a/doc/src/fix_atc.rst b/doc/src/fix_atc.rst new file mode 100644 index 00000000000..c4c5d5684e6 --- /dev/null +++ b/doc/src/fix_atc.rst @@ -0,0 +1,9 @@ +fix atc command +=============== + +.. meta:: + :http-equiv=Refresh: 5; url=Commands_removed.html#atc-awpmd-and-poems-packages + +.. deprecated:: 10Sep2025 + +The ATC package and the `fix atc` command have been removed from LAMMPS. diff --git a/doc/src/fix_nve_awpmd.rst b/doc/src/fix_nve_awpmd.rst new file mode 100644 index 00000000000..c1b6dd4e973 --- /dev/null +++ b/doc/src/fix_nve_awpmd.rst @@ -0,0 +1,9 @@ +fix nve/awpmd command +===================== + +.. meta:: + :http-equiv=Refresh: 5; url=Commands_removed.html#atc-awpmd-and-poems-packages + +.. deprecated:: 10Sep2025 + +The AWPMD package and the `fix nve/awpmd` command have been removed from LAMMPS. diff --git a/doc/src/fix_poems.rst b/doc/src/fix_poems.rst new file mode 100644 index 00000000000..39f8f33acf5 --- /dev/null +++ b/doc/src/fix_poems.rst @@ -0,0 +1,9 @@ +fix poems command +================= + +.. meta:: + :http-equiv=Refresh: 5; url=Commands_removed.html#atc-awpmd-and-poems-packages + +.. deprecated:: 10Sep2025 + +The POEMS package and the `fix poems` command have been removed from LAMMPS. diff --git a/doc/src/pair_awpmd.rst b/doc/src/pair_awpmd.rst new file mode 100644 index 00000000000..e69c161e772 --- /dev/null +++ b/doc/src/pair_awpmd.rst @@ -0,0 +1,9 @@ +pair_style awpmd command +======================== + +.. meta:: + :http-equiv=Refresh: 5; url=Commands_removed.html#atc-awpmd-and-poems-packages + +.. deprecated:: 10Sep2025 + +The AWPMD package and the `pair_style awpmd` command have been removed from LAMMPS. From 9cb92505ce9244eeb63d7719ccb593ff152093ab Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 10:06:35 -0600 Subject: [PATCH 156/604] added another special kernel to handle minimum image checks --- lib/gpu/lal_base_charge.cpp | 39 ++- lib/gpu/lal_base_charge.h | 9 +- lib/gpu/lal_charmm_long_ext.cpp | 8 +- lib/gpu/lal_neighbor.cpp | 381 +++++++++++++++++++++++ lib/gpu/lal_neighbor.h | 9 + lib/gpu/lal_neighbor_gpu.cu | 146 ++++++++- lib/gpu/lal_neighbor_shared.cpp | 3 +- src/GPU/pair_lj_charmm_coul_long_gpu.cpp | 4 +- 8 files changed, 581 insertions(+), 18 deletions(-) diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index 3cd6c6030a2..eea7cf95ad0 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -184,6 +184,33 @@ inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, _max_an_bytes=bytes; } +template +inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, + const int nall, double **host_x, + int *host_type, double *sublo, + double *subhi, tagint *tag, + int **nspecial, tagint **special, + double xprd_half, double yprd_half, + double zprd_half, int xperiodic, int yperiodic, + int zperiodic, bool &success) { + success=true; + resize_atom(inum,nall,success); + resize_local(inum,host_inum,nbor->max_nbors(),success); + if (!success) + return; + atom->cast_copy_x(host_x,host_type); + + int mn; + nbor->build_nbor_list2(host_x, inum, host_inum, nall, *atom, sublo, subhi, + tag, nspecial, special, success, mn, xprd_half, + yprd_half, zprd_half, xperiodic, yperiodic, + zperiodic, ans->error_flag); + + double bytes=ans->gpu_bytes()+nbor->gpu_bytes(); + if (bytes>_max_an_bytes) + _max_an_bytes=bytes; +} + // --------------------------------------------------------------------------- // Copy nbor list from host if necessary and then calculate forces, virials,.. // --------------------------------------------------------------------------- @@ -257,7 +284,8 @@ int** BaseChargeT::compute(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, - double *host_q, double *boxlo, double *prd) { + double *host_q, double *boxlo, double *prd, + int* periodicity) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -288,8 +316,15 @@ int** BaseChargeT::compute(const int ago, const int inum_full, // Build neighbor list on GPU if necessary if (ago==0) { + double xprd_half = prd[0] * 0.5; + double yprd_half = prd[1] * 0.5; + double zprd_half = prd[2] * 0.5; build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, - sublo, subhi, tag, nspecial, special, success); + sublo, subhi, tag, nspecial, special, + xprd_half, yprd_half, zprd_half, + periodicity[0], periodicity[1], periodicity[2], + success); + if (!success) return nullptr; atom->cast_q_data(host_q); diff --git a/lib/gpu/lal_base_charge.h b/lib/gpu/lal_base_charge.h index 307c5c079f6..ef26d68fbda 100644 --- a/lib/gpu/lal_base_charge.h +++ b/lib/gpu/lal_base_charge.h @@ -129,6 +129,13 @@ class BaseCharge { double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, bool &success); + void build_nbor_list(const int inum, const int host_inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, double xprd_half, double yprd_half, + double zprd_half, int xperiodic, int yperiodic, + int zperiodic, bool &success); + /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, @@ -144,7 +151,7 @@ class BaseCharge { tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, - double *charge, double *boxlo, double *prd); + double *charge, double *boxlo, double *prd, int* periodicity=nullptr); // -------------------------- DEVICE DATA ------------------------- diff --git a/lib/gpu/lal_charmm_long_ext.cpp b/lib/gpu/lal_charmm_long_ext.cpp index dfe3d993831..fd8849e430f 100644 --- a/lib/gpu/lal_charmm_long_ext.cpp +++ b/lib/gpu/lal_charmm_long_ext.cpp @@ -109,17 +109,17 @@ int** crml_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return CRMLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void crml_gpu_compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, + const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, + const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success, double *host_q, const int nlocal, double *boxlo, double *prd) { diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index aca9b1d1417..698629ea887 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -856,6 +856,380 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, time_nbor.stop(); } +template +void Neighbor::build_nbor_list2(double **x, const int inum, const int host_inum, + const int nall, Atom &atom, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success, + int &mn, double xprd_half, double yprd_half, + double zprd_half, int xperiodic, int yperiodic, + int zperiodic, UCL_Vector &error_flag) { + _nbor_time_avail=true; + const int nt=inum+host_inum; + + const double subx = subhi[0]-sublo[0]; + const double suby = subhi[1]-sublo[1]; + const double subz = subhi[2]-sublo[2]; + + // Calculate number of cells and allocate storage for binning as necessary + int ncellx, ncelly, ncellz; + int cells_in_cutoff=static_cast(ceil(_cutoff/_cell_size)); + int ghost_cells=2*cells_in_cutoff; + ncellx = static_cast(ceil(subx/_cell_size))+ghost_cells; + ncelly = static_cast(ceil(suby/_cell_size))+ghost_cells; + ncellz = static_cast(ceil(subz/_cell_size))+ghost_cells; + + #ifndef LAL_USE_OLD_NEIGHBOR + if (_auto_cell_size && subz>0.0) { + if (_old_ncellx!=ncellx || _old_ncelly!=ncelly || _old_ncellz!=ncellz) { + _cell_size = _shared->best_cell_size(subx, suby, subz, nt, _cutoff); + cells_in_cutoff=static_cast(ceil(_cutoff/_cell_size)); + ghost_cells=2*cells_in_cutoff; + ncellx = static_cast(ceil(subx/_cell_size))+ghost_cells; + ncelly = static_cast(ceil(suby/_cell_size))+ghost_cells; + ncellz = static_cast(ceil(subz/_cell_size))+ghost_cells; + } + } + #endif + + int ncell_3d = ncellx * ncelly * ncellz; + if (ncell_3d+1>_ncells) { + cell_counts.clear(); +#ifndef LAL_USE_OLD_NEIGHBOR + cell_subgroup_counts.clear(); +#endif + + if (_gpu_nbor==2) { + if (_ncells>0) + delete [] cell_iter; + cell_iter = new int[ncell_3d+1]; + success = success && (cell_counts.alloc(ncell_3d+1,*dev, + UCL_READ_WRITE,UCL_READ_ONLY) == UCL_SUCCESS); +#ifndef LAL_USE_OLD_NEIGHBOR + success = success && (cell_subgroup_counts.alloc(ncell_3d+1,*dev, + UCL_READ_WRITE,UCL_READ_ONLY) == UCL_SUCCESS); + if (!success) return; + cell_subgroup_counts.host[0]=0; +#endif + } else { + cell_counts.device.clear(); + success = success && (cell_counts.device.alloc(ncell_3d+1, + *dev) == UCL_SUCCESS); + } + if (!success) return; + + _ncells=ncell_3d+1; + _cell_bytes=cell_counts.device.row_bytes(); +#ifndef LAL_USE_OLD_NEIGHBOR + _cell_bytes+=cell_subgroup_counts.row_bytes()+subgroup2cell.row_bytes(); +#endif + } + + const auto cutoff_cast=static_cast(_cutoff); + + if (_maxspecial>0) { + time_nbor.start(); + UCL_H_Vec view_nspecial; + UCL_H_Vec view_special, view_tag; + view_nspecial.view(nspecial[0],nt*3,*dev); + view_special.view(special[0],nt*_maxspecial,*dev); + view_tag.view(tag,nall,*dev); + ucl_copy(dev_nspecial,view_nspecial,nt*3,false); + ucl_copy(dev_special_t,view_special,nt*_maxspecial,false); + ucl_copy(atom.dev_tag,view_tag,nall,false); + time_nbor.stop(); + if (_time_device) + time_nbor.add_to_total(); + + // on the host, special[i][j] = the special j neighbor of atom i (nall by maxspecial) + // on the device, transpose the matrix (1-d array) for coalesced reads + // dev_special[i][j] = the special i neighbor of atom j + + time_transpose.start(); + const int b2x=_block_cell_2d; + const int b2y=_block_cell_2d; + const int g2x=static_cast(ceil(static_cast(_maxspecial)/b2x)); + const int g2y=static_cast(ceil(static_cast(nt)/b2y)); + // the maximum number of blocks on the device is typically 65535 + // in principle we can use a lower number to have more resource per block 32768 + const int max_num_blocks = 65535; + int shift = 0; + if (g2y < max_num_blocks) { + _shared->k_transpose.set_size(g2x,g2y,b2x,b2y); + _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift); + } else { + // using a fixed number of blocks + int g2y_m = max_num_blocks; + _shared->k_transpose.set_size(g2x,g2y_m,b2x,b2y); + // number of chunks needed for the whole transpose + const int num_chunks = ceil(static_cast(g2y) / g2y_m); + for (int i = 0; i < num_chunks; i++) { + _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift); + shift += g2y_m*b2y; + } + } + + time_transpose.stop(); + } + + // If binning on CPU, do this now +#ifndef LAL_USE_OLD_NEIGHBOR + int subgroup_count = 0; +#endif + if (_gpu_nbor==2) { + #ifndef GERYON_OCL_FLUSH + dev_nbor.flush(); + #endif + double stime = MPI_Wtime(); + int *cell_id=atom.host_cell_id.begin(); + int *particle_id=atom.host_particle_id.begin(); + + // Build cell list on CPU + cell_counts.host.zero(); + double i_cell_size=1.0/_cell_size; + + int offset_hi=cells_in_cutoff+1; + for (int i=0; i(px*i_cell_size+cells_in_cutoff); + ix = std::max(ix,cells_in_cutoff); + ix = std::min(ix,ncellx-offset_hi); + int iy = static_cast(py*i_cell_size+cells_in_cutoff); + iy = std::max(iy,cells_in_cutoff); + iy = std::min(iy,ncelly-offset_hi); + int iz = static_cast(pz*i_cell_size+cells_in_cutoff); + iz = std::max(iz,cells_in_cutoff); + iz = std::min(iz,ncellz-offset_hi); + + int id = ix+iy*ncellx+iz*ncellx*ncelly; + cell_id[i] = id; + cell_counts[id+1]++; + } + +#ifndef LAL_USE_OLD_NEIGHBOR + // populate subgroup counts only for the local atoms + for (int i=1; i<_ncells; i++) { + cell_subgroup_counts[i] = ceil(static_cast(cell_counts[i]) / + _simd_size); + subgroup_count += cell_subgroup_counts[i]; + cell_subgroup_counts[i] += cell_subgroup_counts[i-1]; + } + if (subgroup_count > (int)subgroup2cell.numel()) { + subgroup2cell.clear(); + success = success && (subgroup2cell.alloc(1.1*subgroup_count,*dev, + UCL_READ_WRITE,UCL_READ_ONLY) == UCL_SUCCESS); + if (!success) return; + _cell_bytes=cell_counts.device.row_bytes() + + cell_subgroup_counts.row_bytes()+subgroup2cell.row_bytes(); + } + for (int i=1; i<_ncells; i++) + for (int j=cell_subgroup_counts[i-1]; j(px*i_cell_size); + ix = std::max(ix,0); + ix = std::min(ix,ncellx-1); + int iy = static_cast(py*i_cell_size); + iy = std::max(iy,0); + iy = std::min(iy,ncelly-1); + int iz = static_cast(pz*i_cell_size); + iz = std::max(iz,0); + iz = std::min(iz,ncellz-1); + + int id = ix+iy*ncellx+iz*ncellx*ncelly; + cell_id[i] = id; + cell_counts[id+1]++; + } + + mn=0; + for (int i=0; i<_ncells; i++) + mn=std::max(mn,cell_counts[i]); + double mind=std::min(subx,suby); + mind=std::min(mind,subz) + _cutoff; + double ics; + if (mind >= _cell_size) ics = i_cell_size; + else ics = 1.0 / mind; + double vadjust=_cutoff*ics; + vadjust*=vadjust*vadjust*4.1888; + if (_cutoff < _cell_size) vadjust*=1.46; + mn=std::max(mn,static_cast(ceil(_max_neighbor_factor*vadjust*mn))); + if (mn<33) mn+=3; + + resize_max_neighbors(mn,success); + set_nbor_block_size(mn/2); + if (!success) + return; + _total_atoms=nt; + + // For neighbor builds for host atoms, _max_nbors is used for neighbor + // allocation offsets. + if (_max_host > 0) mn=_max_nbors; + + cell_iter[0]=0; + for (int i=1; i<_ncells; i++) { + cell_counts[i]+=cell_counts[i-1]; + cell_iter[i]=cell_counts[i]; + } + time_hybrid1.start(); + #ifndef LAL_USE_OLD_NEIGHBOR + if (_old_ncellx!=ncellx || _old_ncelly!=ncelly || _old_ncellz!=ncellz) { + _old_ncellx = ncellx; + _old_ncelly = ncelly; + _old_ncellz = ncellz; + const int bin_stencil_stride = cells_in_cutoff * 2 + 1; + const int bin_stencil_size = bin_stencil_stride * bin_stencil_stride; + if (bin_stencil_size > (int)_host_bin_stencil.numel()) + _host_bin_stencil.alloc(bin_stencil_size,*dev); + for (int s = 0; sneigh_tex.bind_float(atom.x,4); + + // If binning on GPU, do this now + if (_gpu_nbor==1) { + mn = _max_nbors; + const auto i_cell_size=static_cast(1.0/_cell_size); + const int neigh_block=_block_cell_id; + const int GX=(int)ceil((double)nall/neigh_block); + const auto sublo0=static_cast(sublo[0]); + const auto sublo1=static_cast(sublo[1]); + const auto sublo2=static_cast(sublo[2]); + _shared->k_cell_id.set_size(GX,neigh_block); + _shared->k_cell_id.run(&atom.x, &atom.dev_cell_id, + &atom.dev_particle_id, &sublo0, &sublo1, + &sublo2, &i_cell_size, &ncellx, &ncelly, &ncellz, + &nt, &nall, &cells_in_cutoff); + + atom.sort_neighbor(nall); + + /* calculate cell count */ + _shared->k_cell_counts.set_size(GX,neigh_block); + _shared->k_cell_counts.run(&atom.dev_cell_id, &cell_counts, &nall, + &ncell_3d); + } + + /* build the neighbor list */ + const int cell_block=_block_nbor_build; +#ifndef LAL_USE_OLD_NEIGHBOR + int nblocks = (subgroup_count-1)/(cell_block/_simd_size)+1; + _shared->k_build_nbor.set_size(nblocks, cell_block); + _shared->k_build_nbor.run(&atom.x, &atom.dev_particle_id, + &cell_counts, &dev_nbor, &nbor_host, + &dev_numj_host, &mn, &cutoff_cast, &ncellx, + &ncelly, &ncellz, &inum, &nt, &nall, + &_threads_per_atom, &cells_in_cutoff, + &cell_subgroup_counts, &subgroup2cell, + &subgroup_count, _bin_stencil.begin(), + &error_flag); + error_flag.update_host(); +#else + _shared->k_build_nbor.set_size(ncellx-ghost_cells,(ncelly-ghost_cells)* + (ncellz-ghost_cells),cell_block,1); + _shared->k_build_nbor.run(&atom.x, &atom.dev_particle_id, + &cell_counts, &dev_nbor, &nbor_host, + &dev_numj_host, &mn, &cutoff_cast, &ncellx, + &ncelly, &ncellz, &inum, &nt, &nall, + &_threads_per_atom, &cells_in_cutoff); +#endif + + /* Get the maximum number of nbors and realloc if necessary */ + UCL_D_Vec _numj_view; + if (_gpu_nbor!=2 || inuminum) { + _host_offset.view_offset(inum,host_acc,nt-inum); + ucl_copy(_host_offset,dev_numj_host,nt-inum,true); + } + } + + if (_gpu_nbor!=2) { + host_acc.sync(); + mn=host_acc[0]; + for (int i=1; i_max_nbors) { + resize_max_neighbors(mn,success); + if (!success) + return; + time_kernel.stop(); + if (_time_device) + time_kernel.add_to_total(); + build_nbor_list2(x, inum, host_inum, nall, atom, sublo, subhi, tag, + nspecial, special, success, mn, xprd_half, yprd_half, + zprd_half, xperiodic, yperiodic, zperiodic, + error_flag); + return; + } + } + + if (_maxspecial>0) { + const int GX2=static_cast(ceil(static_cast + (nt*_threads_per_atom)/cell_block)); + const auto _xprd_half=static_cast(xprd_half); + const auto _yprd_half=static_cast(yprd_half); + const auto _zprd_half=static_cast(zprd_half); + if (_cutoff > _xprd_half || _cutoff > _yprd_half || + _cutoff > _zprd_half) { + + } + _shared->k_special.set_size(GX2,cell_block); + _shared->k_special.run(&atom.x, &dev_nbor, &nbor_host, &dev_numj_host, + &atom.dev_tag, &dev_nspecial, &dev_special, + &inum, &nt, &_max_nbors, &_threads_per_atom, + &_xprd_half, &_yprd_half, &_zprd_half, + &xperiodic, &yperiodic, &zperiodic); + + } + time_kernel.stop(); + + time_nbor.start(); + if (inum &out, const UCL_D_Vec &in, const int columns_in, const int rows_in) { @@ -872,3 +1246,10 @@ template void Neighbor::build_nbor_list Atom &atom, double *sublo, double *subhi, tagint *, int **, tagint **, bool &success, int &mn, UCL_Vector &error_flag); + +template void Neighbor::build_nbor_list2 + (double **x, const int inum, const int host_inum, const int nall, + Atom &atom, double *sublo, double *subhi, + tagint *, int **, tagint **, bool &success, int &mn, double xprd_half, + double yprd_half, double zprd_half, int xperiodic, int yperiodic, + int zperiodic, UCL_Vector &error_flag); diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 37fbb3bcb53..c0d5e82f912 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -194,6 +194,15 @@ class Neighbor { int **nspecial, tagint **special, bool &success, int &max_nbors, UCL_Vector &error_flag); + template + void build_nbor_list2(double **x, const int inum, const int host_inum, + const int nall, Atom &atom, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success, + int &max_nbors, double xprd_half, double yprd_half, + double zprd_half, int xperiodic, int yperiodic, + int zperiodic, UCL_Vector &error_flag); + /// Return the number of bytes used on device inline double gpu_bytes() { double res = _gpu_bytes + _c_bytes + _cell_bytes; diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index a0fa26b7e4c..4223bec6ce2 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -180,7 +180,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, __global int *nbor_list, __global int *host_nbor_list, __global int *host_numj, - int neigh_bin_size, numtyp cutoff_neigh, + int _max_nbors, numtyp cutoff_neigh, int ncellx, int ncelly, int ncellz, int inum, int nt, int nall, int t_per_atom, int cells_in_cutoff, @@ -284,7 +284,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, } else { stride=0; neigh_counts=host_numj+pid_i-inum; - neigh_list=host_nbor_list+(pid_i-inum)*neigh_bin_size; + neigh_list=host_nbor_list+(pid_i-inum)*_max_nbors; } // loop through neighbors @@ -343,9 +343,8 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, #endif r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z; -//USE CUTOFFSQ? if (r2 < cutoff_neigh*cutoff_neigh && pid_j != pid_i && pid_i < nt) { - if (cnt < neigh_bin_size) { + if (cnt < _max_nbors) { cnt++; *neigh_list = pid_j; neigh_list++; @@ -372,7 +371,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, __global int *nbor_list, __global int *host_nbor_list, __global int *host_numj, - int neigh_bin_size, numtyp cell_size, + int _max_nbors, numtyp cutoff_neigh, int ncellx, int ncelly, int ncellz, int inum, int nt, int nall, int t_per_atom, int cells_in_cutoff) @@ -420,7 +419,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, } else { stride=0; neigh_counts=host_numj+pid_i-inum; - neigh_list=host_nbor_list+(pid_i-inum)*neigh_bin_size; + neigh_list=host_nbor_list+(pid_i-inum)*_max_nbors; } // loop through neighbors @@ -460,9 +459,9 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, diff.z = atom_i.z - pos_sh[j].z; r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z; - if (r2 < cell_size*cell_size && pid_j != pid_i) { + if (r2 < cutoff_neigh*cutoff_neigh && pid_j != pid_i) { cnt++; - if (cnt <= neigh_bin_size) { + if (cnt <= _max_nbors) { *neigh_list = pid_j; neigh_list++; if ((cnt & (t_per_atom-1))==0) @@ -593,3 +592,134 @@ __kernel void kernel_special(__global int *dev_nbor, } } // if ii } + +ucl_inline int minimum_image_check(numtyp dx, numtyp dy, numtyp dz, + numtyp xprd_half, numtyp yprd_half, numtyp zprd_half, + int xperiodic, int yperiodic, int zperiodic) +{ + if (xperiodic && ucl_abs(dx) > xprd_half) return 1; + if (yperiodic && ucl_abs(dy) > yprd_half) return 1; + if (zperiodic && ucl_abs(dz) > zprd_half) return 1; + return 0; +} + +__kernel void kernel_special2(const __global numtyp4 *restrict x_, + __global int *dev_nbor, + __global int *host_nbor_list, + const __global int *host_numj, + const __global tagint *restrict tag, + const __global int *restrict nspecial, + const __global tagint *restrict special, + int inum, int nt, int max_nbors, int t_per_atom, + numtyp xprd_half, numtyp yprd_half, numtyp zprd_half, + int xperiodic, int yperiodic, int zperiodic) { + int tid=THREAD_ID_X; + int ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); + ii+=tid/t_per_atom; + int offset=tid & (t_per_atom-1); + + if (ii 0 + tagint special_preload[SPECIAL_DATA_PRELOAD_SIZE]; + for (int i = 0, j = 0; (i < n3) && (j < SPECIAL_DATA_PRELOAD_SIZE); i+=UNROLL_FACTOR_SPECIAL, j++) { + special_preload[j] = special[ii + i*nt]; + } +#endif + + for (int m=0; m 0 + if ((c == 0) && (j < SPECIAL_DATA_PRELOAD_SIZE)) { + special_data[c] = special_preload[j]; + } else + #endif + special_data[c] = special[ii + (i+c)*nt]; + } + } + + for (int k=0; k= n1) { + which[k]++; + } + } + for (int k=0; k= n2) { + which[k]++; + } + which[k] <<= SBBITS; + } + for (int c = 0; c < UNROLL_FACTOR_SPECIAL; c++) { + if (i + c < n3) { + for (int l=0; lago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, domain->prd); + success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; From bfffa9c703a733971fba81b26b5fef3fec7c3730 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 10:15:19 -0600 Subject: [PATCH 157/604] need to update the API of the compute functions with device neigh builds in the lib/gpu and src/GPU classes --- lib/gpu/lal_born_coul_long_cs_ext.cpp | 4 ++-- lib/gpu/lal_born_coul_long_ext.cpp | 4 ++-- src/GPU/pair_born_coul_long_cs_gpu.cpp | 4 ++-- src/GPU/pair_born_coul_long_gpu.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs_ext.cpp b/lib/gpu/lal_born_coul_long_cs_ext.cpp index 3d6383acca9..0119ee4f719 100644 --- a/lib/gpu/lal_born_coul_long_cs_ext.cpp +++ b/lib/gpu/lal_born_coul_long_cs_ext.cpp @@ -107,11 +107,11 @@ int** bornclcs_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return BCLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/lal_born_coul_long_ext.cpp b/lib/gpu/lal_born_coul_long_ext.cpp index 6975f5237e3..c89bca194ea 100644 --- a/lib/gpu/lal_born_coul_long_ext.cpp +++ b/lib/gpu/lal_born_coul_long_ext.cpp @@ -107,11 +107,11 @@ int** borncl_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return BORNCLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void borncl_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/src/GPU/pair_born_coul_long_cs_gpu.cpp b/src/GPU/pair_born_coul_long_cs_gpu.cpp index 4f25f08a4c4..8d6c0a8f2b0 100644 --- a/src/GPU/pair_born_coul_long_cs_gpu.cpp +++ b/src/GPU/pair_born_coul_long_cs_gpu.cpp @@ -61,7 +61,7 @@ int **bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, - double *boxlo, double *prd); + double *boxlo, double *prd, int* periodicity); void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -117,7 +117,7 @@ void PairBornCoulLongCSGPU::compute(int eflag, int vflag) firstneigh = bornclcs_gpu_compute_n( neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->q, domain->boxlo, domain->prd); + cpu_time, success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_born_coul_long_gpu.cpp b/src/GPU/pair_born_coul_long_gpu.cpp index 19b545003e7..f5e470693bb 100644 --- a/src/GPU/pair_born_coul_long_gpu.cpp +++ b/src/GPU/pair_born_coul_long_gpu.cpp @@ -50,7 +50,7 @@ int **borncl_gpu_compute_n(const int ago, const int inum_full, const int nall, d int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, - double *boxlo, double *prd); + double *boxlo, double *prd, int* periodicity); void borncl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -105,7 +105,7 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag) firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->q, domain->boxlo, domain->prd); + cpu_time, success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; From 641407fea064cfec5bca56dae0bd09e4855b6bb9 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 4 Nov 2025 11:55:19 -0500 Subject: [PATCH 158/604] small formatting update --- doc/src/fix_gcmc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/fix_gcmc.rst b/doc/src/fix_gcmc.rst index 2b6a7786f8a..792f9a1ba94 100644 --- a/doc/src/fix_gcmc.rst +++ b/doc/src/fix_gcmc.rst @@ -69,7 +69,7 @@ Description """"""""""" This fix performs grand canonical Monte Carlo (GCMC) exchanges of atoms -or molecules with an imaginary ideal gas reservoir at the specified T +or molecules with an imaginary ideal gas reservoir at the specified *T* and chemical potential (:math:`\mu`) as discussed in :ref:`(Frenkel) `. It also attempts Monte Carlo (MC) moves (translations and molecule rotations) within the simulation cell or region. If used with From e387ef9a6368c130a6df5bab72b4c04d5a382450 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 11:40:52 -0600 Subject: [PATCH 159/604] removed older version of the special kernel (without minimum image checks); may revert this change if something is wrong --- lib/gpu/lal_neighbor_gpu.cu | 109 +---------------------- lib/gpu/lal_neighbor_shared.cpp | 3 +- src/GPU/pair_born_coul_long_gpu.cpp | 3 +- src/GPU/pair_lj_charmm_coul_long_gpu.cpp | 16 ++-- 4 files changed, 13 insertions(+), 118 deletions(-) diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index 4223bec6ce2..e3393824c0c 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -486,113 +486,6 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, #define UNROLL_FACTOR_LIST 4 #define UNROLL_FACTOR_SPECIAL 2 -__kernel void kernel_special(__global int *dev_nbor, - __global int *host_nbor_list, - const __global int *host_numj, - const __global tagint *restrict tag, - const __global int *restrict nspecial, - const __global tagint *restrict special, - int inum, int nt, int max_nbors, int t_per_atom) { - int tid=THREAD_ID_X; - int ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); - ii+=tid/t_per_atom; - int offset=tid & (t_per_atom-1); - - if (ii 0 - tagint special_preload[SPECIAL_DATA_PRELOAD_SIZE]; - for (int i = 0, j = 0; (i < n3) && (j < SPECIAL_DATA_PRELOAD_SIZE); i+=UNROLL_FACTOR_SPECIAL, j++) { - special_preload[j] = special[ii + i*nt]; - } -#endif - - for (int m=0; m 0 - if ((c == 0) && (j < SPECIAL_DATA_PRELOAD_SIZE)) { - special_data[c] = special_preload[j]; - } - else -#endif - special_data[c] = special[ii + (i+c)*nt]; - } - } - - for (int k=0; k= n1) { - which[k]++; - } - } - for (int k=0; k= n2) { - which[k]++; - } - which[k] <<= SBBITS; - } - for (int c = 0; c < UNROLL_FACTOR_SPECIAL; c++) { - if (i + c < n3) { - for (int l=0; lago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->q, domain->boxlo, domain->prd, domain->periodicity); + cpu_time, success, atom->q, domain->boxlo, domain->prd, + domain->periodicity); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp index 18abe059862..05470193a83 100644 --- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp @@ -101,18 +101,20 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag) domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi); } inum = atom->nlocal; - firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, - atom->tag, atom->nspecial, atom->special, eflag, vflag, - eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, domain->prd, domain->periodicity); + firstneigh = crml_gpu_compute_n( + neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, + atom->tag, atom->nspecial, atom->special, eflag, vflag, + eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, + success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, - eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q, - atom->nlocal, domain->boxlo, domain->prd); + crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, + eflag_atom, vflag_atom, host_start, cpu_time, + success, atom->q, atom->nlocal, domain->boxlo, domain->prd); } if (!success) error->one(FLERR, "Insufficient memory on accelerator"); From 141242771b6fb101dcd65cbf9955d51429c8d4e8 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 13:10:21 -0600 Subject: [PATCH 160/604] modified the argument list of build_nbor_list() to prepare to consolidate the 2 versions --- lib/gpu/lal_base_charge.cpp | 18 +++++------------- lib/gpu/lal_base_charge.h | 5 ++--- lib/gpu/lal_neighbor.cpp | 32 ++++++++++++++------------------ lib/gpu/lal_neighbor.h | 7 +++---- 4 files changed, 24 insertions(+), 38 deletions(-) diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index eea7cf95ad0..49f0fd53776 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -190,9 +190,7 @@ inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, - double xprd_half, double yprd_half, - double zprd_half, int xperiodic, int yperiodic, - int zperiodic, bool &success) { + double* prd, int* periodicity, bool &success) { success=true; resize_atom(inum,nall,success); resize_local(inum,host_inum,nbor->max_nbors(),success); @@ -201,10 +199,9 @@ inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, atom->cast_copy_x(host_x,host_type); int mn; - nbor->build_nbor_list2(host_x, inum, host_inum, nall, *atom, sublo, subhi, - tag, nspecial, special, success, mn, xprd_half, - yprd_half, zprd_half, xperiodic, yperiodic, - zperiodic, ans->error_flag); + nbor->build_nbor_list(host_x, inum, host_inum, nall, *atom, sublo, subhi, + tag, nspecial, special, success, mn, prd, periodicity, + ans->error_flag); double bytes=ans->gpu_bytes()+nbor->gpu_bytes(); if (bytes>_max_an_bytes) @@ -316,14 +313,9 @@ int** BaseChargeT::compute(const int ago, const int inum_full, // Build neighbor list on GPU if necessary if (ago==0) { - double xprd_half = prd[0] * 0.5; - double yprd_half = prd[1] * 0.5; - double zprd_half = prd[2] * 0.5; build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, - xprd_half, yprd_half, zprd_half, - periodicity[0], periodicity[1], periodicity[2], - success); + prd, periodicity, success); if (!success) return nullptr; diff --git a/lib/gpu/lal_base_charge.h b/lib/gpu/lal_base_charge.h index ef26d68fbda..243febc3e82 100644 --- a/lib/gpu/lal_base_charge.h +++ b/lib/gpu/lal_base_charge.h @@ -132,9 +132,8 @@ class BaseCharge { void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, double xprd_half, double yprd_half, - double zprd_half, int xperiodic, int yperiodic, - int zperiodic, bool &success); + tagint **special, double* prd, int* periodicity, + bool &success); /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, const int nall, diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 698629ea887..ae81c7696d2 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -857,13 +857,12 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, } template -void Neighbor::build_nbor_list2(double **x, const int inum, const int host_inum, +void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, const int nall, Atom &atom, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, bool &success, - int &mn, double xprd_half, double yprd_half, - double zprd_half, int xperiodic, int yperiodic, - int zperiodic, UCL_Vector &error_flag) { + int &mn, double* prd, int* periodicity, + UCL_Vector &error_flag) { _nbor_time_avail=true; const int nt=inum+host_inum; @@ -1194,9 +1193,8 @@ void Neighbor::build_nbor_list2(double **x, const int inum, const int host_inum, time_kernel.stop(); if (_time_device) time_kernel.add_to_total(); - build_nbor_list2(x, inum, host_inum, nall, atom, sublo, subhi, tag, - nspecial, special, success, mn, xprd_half, yprd_half, - zprd_half, xperiodic, yperiodic, zperiodic, + build_nbor_list(x, inum, host_inum, nall, atom, sublo, subhi, tag, + nspecial, special, success, mn, prd, periodicity, error_flag); return; } @@ -1205,13 +1203,12 @@ void Neighbor::build_nbor_list2(double **x, const int inum, const int host_inum, if (_maxspecial>0) { const int GX2=static_cast(ceil(static_cast (nt*_threads_per_atom)/cell_block)); - const auto _xprd_half=static_cast(xprd_half); - const auto _yprd_half=static_cast(yprd_half); - const auto _zprd_half=static_cast(zprd_half); - if (_cutoff > _xprd_half || _cutoff > _yprd_half || - _cutoff > _zprd_half) { - - } + const auto _xprd_half=static_cast(0.5*prd[0]); + const auto _yprd_half=static_cast(0.5*prd[1]); + const auto _zprd_half=static_cast(0.5*prd[2]); + const int xperiodic=periodicity[0]; + const int yperiodic=periodicity[1]; + const int zperiodic=periodicity[2]; _shared->k_special.set_size(GX2,cell_block); _shared->k_special.run(&atom.x, &dev_nbor, &nbor_host, &dev_numj_host, &atom.dev_tag, &dev_nspecial, &dev_special, @@ -1247,9 +1244,8 @@ template void Neighbor::build_nbor_list tagint *, int **, tagint **, bool &success, int &mn, UCL_Vector &error_flag); -template void Neighbor::build_nbor_list2 +template void Neighbor::build_nbor_list (double **x, const int inum, const int host_inum, const int nall, Atom &atom, double *sublo, double *subhi, - tagint *, int **, tagint **, bool &success, int &mn, double xprd_half, - double yprd_half, double zprd_half, int xperiodic, int yperiodic, - int zperiodic, UCL_Vector &error_flag); + tagint *, int **, tagint **, bool &success, int &mn, double* prd, + int* periodicity, UCL_Vector &error_flag); diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index c0d5e82f912..313a1354765 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -195,13 +195,12 @@ class Neighbor { int &max_nbors, UCL_Vector &error_flag); template - void build_nbor_list2(double **x, const int inum, const int host_inum, + void build_nbor_list(double **x, const int inum, const int host_inum, const int nall, Atom &atom, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, bool &success, - int &max_nbors, double xprd_half, double yprd_half, - double zprd_half, int xperiodic, int yperiodic, - int zperiodic, UCL_Vector &error_flag); + int &max_nbors, double* prd, int* periodicity, + UCL_Vector &error_flag); /// Return the number of bytes used on device inline double gpu_bytes() { From cf3099d8aeb4b8568dacca3819c8f9ac30ed4187 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 13:22:45 -0600 Subject: [PATCH 161/604] removed whitespace --- lib/gpu/lal_neighbor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index ae81c7696d2..9df84150053 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -1215,7 +1215,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, &inum, &nt, &_max_nbors, &_threads_per_atom, &_xprd_half, &_yprd_half, &_zprd_half, &xperiodic, &yperiodic, &zperiodic); - + } time_kernel.stop(); From 7bc513be88d5666de11307de7be706f0ced02d61 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 13:37:00 -0600 Subject: [PATCH 162/604] addressed the suggestions by Copilot --- lib/gpu/lal_born_coul_long_cs.cu | 4 ++-- lib/gpu/lal_neighbor_gpu.cu | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index 6e70268878d..3d44cb759fb 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -131,7 +131,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, numtyp r = ucl_sqrt(rsq); fetch(prefactor,j,q_tex); prefactor *= qqrd2e * qtmp; - if (factor_coul>(acctyp)0) { + if (factor_coul > (acctyp)0) { // When bonded parts are being calculated, a minimal distance (EPS_EWALD) // has to be added to the prefactor and erfc in order to make the // used approximation functions valid @@ -277,7 +277,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, fetch(prefactor,j,q_tex); prefactor *= qqrd2e * qtmp; - if (factor_coul>(acctyp)0) { + if (factor_coul > (acctyp)0) { // When bonded parts are being calculated, a minimal distance (EPS_EWALD) // has to be added to the prefactor and erfc in order to make the // used approximation functions valid diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index e3393824c0c..ec22ebf5af0 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -488,8 +488,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, ucl_inline int minimum_image_check(numtyp dx, numtyp dy, numtyp dz, numtyp xprd_half, numtyp yprd_half, numtyp zprd_half, - int xperiodic, int yperiodic, int zperiodic) -{ + int xperiodic, int yperiodic, int zperiodic) { if (xperiodic && ucl_abs(dx) > xprd_half) return 1; if (yperiodic && ucl_abs(dy) > yprd_half) return 1; if (zperiodic && ucl_abs(dz) > zprd_half) return 1; @@ -519,9 +518,9 @@ __kernel void kernel_special(const __global numtyp4 *restrict x_, int n2=nspecial[ii*3+1]; int n3=nspecial[ii*3+2]; - numtyp4 atom_i; + numtyp4 atom_i; fetch4(atom_i,ii,pos_tex); //pos[i]; - + int myj; if (ii < inum) { stride=inum; From fd8e54b9197ef70a8361656f338c67770d891e31 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 4 Nov 2025 15:57:14 -0600 Subject: [PATCH 163/604] removed testing version of ucl_recip in the born/coul/long/cs kernel --- lib/gpu/lal_born_coul_long_cs.cu | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index 3d44cb759fb..f8ab43cae32 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -40,7 +40,7 @@ _texture( q_tex,int2); #define B4 (acctyp)-5.80844129e-3 #define B5 (acctyp)1.14652755e-1 -#if defined _DOUBLE_DOUBLE +#if defined _DOULBE_DOUBLE #define EPSILON (acctyp)(1.0e-20) #define EPS_EWALD (acctyp)(1.0e-6) #define EPS_EWALD_SQR (acctyp)(1.0e-12) @@ -50,8 +50,6 @@ _texture( q_tex,int2); #define EPS_EWALD_SQR (numtyp)(1.0e-7) #endif -#define ucl_recip2(x) ((numtyp)1.0/(x)) - __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict coeff1, const __global numtyp4 *restrict coeff2, @@ -125,7 +123,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp; rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond; - numtyp r2inv = ucl_recip2(rsq); + numtyp r2inv = ucl_recip(rsq); if (rsq < cut_coulsq) { numtyp r = ucl_sqrt(rsq); @@ -137,21 +135,21 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip2(r+EPS_EWALD); + prefactor *= ucl_recip(r+EPS_EWALD); forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent - r2inv = ucl_recip2(rsq + EPS_EWALD_SQR); + r2inv = ucl_recip(rsq + EPS_EWALD_SQR); } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - acctyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); + acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip2(r); + prefactor *= ucl_recip(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } } else forcecoul = (numtyp)0.0; @@ -270,7 +268,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp; rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond; - numtyp r2inv = ucl_recip2(rsq); + numtyp r2inv = ucl_recip(rsq); if (rsq < cut_coulsq) { numtyp r = ucl_sqrt(rsq); @@ -283,23 +281,23 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_, // used approximation functions valid numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip2(r+EPS_EWALD); + prefactor *= ucl_recip(r+EPS_EWALD); forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent - r2inv = ucl_recip2(rsq + EPS_EWALD_SQR); + r2inv = ucl_recip(rsq + EPS_EWALD_SQR); } else { numtyp grij = g_ewald * r; numtyp expm2 = ucl_exp(-grij*grij); - numtyp t = ucl_recip2((numtyp)1.0 + CS_EWALD_P*grij); + numtyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; - prefactor *= ucl_recip2(r); + prefactor *= ucl_recip(r); forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2); } From 2c97c66df3c2b949145719bf2a948e176a434703 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 4 Nov 2025 18:29:58 -0500 Subject: [PATCH 164/604] update macOS GitHub runner to use macOS 15 instead of 13 --- .github/workflows/unittest-macos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml index e11c4e6ba4f..bb3695fd67b 100644 --- a/.github/workflows/unittest-macos.yml +++ b/.github/workflows/unittest-macos.yml @@ -19,7 +19,7 @@ jobs: build: name: MacOS Unit Test if: ${{ github.repository == 'lammps/lammps' }} - runs-on: macos-13 + runs-on: macos-15-intel env: CCACHE_DIR: ${{ github.workspace }}/.ccache From 8810b00afb29b1e9c97a3f8a0bb94e45cbb9de61 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 4 Nov 2025 17:27:35 -0700 Subject: [PATCH 165/604] Simplify --- src/KOKKOS/atom_vec_kokkos.cpp | 598 ++++++++++++++------------------- src/KOKKOS/atom_vec_kokkos.h | 16 +- src/KOKKOS/comm_kokkos.cpp | 42 +-- 3 files changed, 279 insertions(+), 377 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 7e99bfb9dd0..c6ab0ed8e38 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -35,6 +35,9 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) buffer_size = 0; size_exchange = 0; + datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = + datamask_border = datamask_border_vel = datamask_exchange = EMPTY_MASK; + k_count = DAT::tdual_int_1d("atom:k_count",1); atomKK = (AtomKokkos *) atom; } @@ -60,17 +63,15 @@ void AtomVecKokkos::setup_fields() /* ---------------------------------------------------------------------- */ -/* ---------------------------------------------------------------------- */ - template struct AtomVecKokkos_PackComm { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -78,29 +79,29 @@ struct AtomVecKokkos_PackComm { uint64_t _datamask; AtomVecKokkos_PackComm( - const AtomKokkos* atomKK, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const uint64_t &datamask): - _x(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { - const int size_forward = atomKK->avecKK->size_forward; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; - const size_t elements = size_forward; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { @@ -284,15 +285,10 @@ struct AtomVecKokkos_PackCommSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -300,70 +296,70 @@ struct AtomVecKokkos_PackCommSelf { uint64_t _datamask; AtomVecKokkos_PackCommSelf( - const AtomKokkos* atomKK, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const uint64_t datamask): - _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + const AtomKokkos* atomKK, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _nfirst(nfirst),_list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + _x(i+_nfirst,0) = _x(j,0); + _x(i+_nfirst,1) = _x(j,1); + _x(i+_nfirst,2) = _x(j,2); } else { if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _x(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _x(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _x(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _x(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; } } if constexpr (!DEFAULT) { if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); + _mu(i+_nfirst,0) = _mu(j,0); + _mu(i+_nfirst,1) = _mu(j,1); + _mu(i+_nfirst,2) = _mu(j,2); } if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); + _sp(i+_nfirst,0) = _sp(j,0); + _sp(i+_nfirst,1) = _sp(j,1); + _sp(i+_nfirst,2) = _sp(j,2); + _sp(i+_nfirst,3) = _sp(j,3); } if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); + _dpdTheta(i+_nfirst) = _dpdTheta(j); if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); + _uCond(i+_nfirst) = _uCond(j); if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); + _uMech(i+_nfirst) = _uMech(j); if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); + _uChem(i+_nfirst) = _uChem(j); } } }; @@ -497,14 +493,10 @@ struct AtomVecKokkos_PackCommSelfFused { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -525,13 +517,13 @@ struct AtomVecKokkos_PackCommSelfFused { const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const uint64_t datamask): - _x(atomKK->k_x.view()),_xw(atomKK->k_x.view()), - _mu(atomKK->k_mu.view()),_muw(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()),_spw(atomKK->k_sp.view()), - _dpdTheta(atomKK->k_dpdTheta.view()),_dpdThetaw(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()),_uCondw(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()),_uMechw(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()),_uChemw(atomKK->k_uChem.view()), + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -558,46 +550,46 @@ struct AtomVecKokkos_PackCommSelfFused { j = _g2l(j-nlocal); if (_pbc_flag(ii) == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + _x(i+_nfirst,0) = _x(j,0); + _x(i+_nfirst,1) = _x(j,1); + _x(i+_nfirst,2) = _x(j,2); } else { if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _x(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _x(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _x(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _x(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } } if constexpr (!DEFAULT) { if (_datamask & MU_MASK) { - _muw(i+_nfirst,0) = _mu(j,0); - _muw(i+_nfirst,1) = _mu(j,1); - _muw(i+_nfirst,2) = _mu(j,2); + _mu(i+_nfirst,0) = _mu(j,0); + _mu(i+_nfirst,1) = _mu(j,1); + _mu(i+_nfirst,2) = _mu(j,2); } if (_datamask & SP_MASK) { - _spw(i+_nfirst,0) = _sp(j,0); - _spw(i+_nfirst,1) = _sp(j,1); - _spw(i+_nfirst,2) = _sp(j,2); - _spw(i+_nfirst,3) = _sp(j,3); + _sp(i+_nfirst,0) = _sp(j,0); + _sp(i+_nfirst,1) = _sp(j,1); + _sp(i+_nfirst,2) = _sp(j,2); + _sp(i+_nfirst,3) = _sp(j,3); } if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i+_nfirst) = _dpdTheta(j); + _dpdTheta(i+_nfirst) = _dpdTheta(j); if (_datamask & UCOND_MASK) - _uCondw(i+_nfirst) = _uCond(j); + _uCond(i+_nfirst) = _uCond(j); if (_datamask & UMECH_MASK) - _uMechw(i+_nfirst) = _uMech(j); + _uMech(i+_nfirst) = _uMech(j); if (_datamask & UCHEM_MASK) - _uChemw(i+_nfirst) = _uChem(j); + _uChem(i+_nfirst) = _uChem(j); } } }; @@ -773,7 +765,7 @@ struct AtomVecKokkos_PackCommVel { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_int_1d _mask; typename AT::t_kkfloat_1d_3 _v; typename AT::t_kkfloat_1d_4 _mu; @@ -1128,8 +1120,8 @@ struct AtomVecKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3_randomread _torque; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3 _torque; typename AT::t_double_2d_lr _buf; int _first; uint64_t _datamask; @@ -1199,10 +1191,8 @@ struct AtomVecKokkos_UnPackReverseSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3_randomread _torque; - typename AT::t_kkacc_1d_3 _fw,_fmw,_fm_longw; - typename AT::t_kkfloat_1d_3 _torquew; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3 _torque; typename AT::t_int_1d_const _list; int _nfirst; uint64_t _datamask; @@ -1212,36 +1202,36 @@ struct AtomVecKokkos_UnPackReverseSelf { const int &nfirst, const typename DAT::tdual_int_1d &list, const uint64_t &datamask): - _f(atomKK->k_f.view()),_fw(atomKK->k_f.view()), - _fm(atomKK->k_fm.view()),_fmw(atomKK->k_fm.view()), - _fm_long(atomKK->k_fm_long.view()),_fm_longw(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()),_torquew(atomKK->k_torque.view()), + _f(atomKK->k_f.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _torque(atomKK->k_torque.view()), _nfirst(nfirst),_list(list.view()), _datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); - _fw(j,0) += _f(i+_nfirst,0); - _fw(j,1) += _f(i+_nfirst,1); - _fw(j,2) += _f(i+_nfirst,2); + _f(j,0) += _f(i+_nfirst,0); + _f(j,1) += _f(i+_nfirst,1); + _f(j,2) += _f(i+_nfirst,2); if (_datamask & FM_MASK) { - _fmw(j,0) += _fm(i+_nfirst,0); - _fmw(j,1) += _fm(i+_nfirst,1); - _fmw(j,2) += _fm(i+_nfirst,2); + _fm(j,0) += _fm(i+_nfirst,0); + _fm(j,1) += _fm(i+_nfirst,1); + _fm(j,2) += _fm(i+_nfirst,2); } if (_datamask & FML_MASK) { - _fm_longw(j,0) += _fm_long(i+_nfirst,0); - _fm_longw(j,1) += _fm_long(i+_nfirst,1); - _fm_longw(j,2) += _fm_long(i+_nfirst,2); + _fm_long(j,0) += _fm_long(i+_nfirst,0); + _fm_long(j,1) += _fm_long(i+_nfirst,1); + _fm_long(j,2) += _fm_long(i+_nfirst,2); } if (_datamask & TORQUE_MASK) { - _torquew(j,0) += _torque(i+_nfirst,0); - _torquew(j,1) += _torque(i+_nfirst,1); - _torquew(j,2) += _torque(i+_nfirst,2); + _torque(j,0) += _torque(i+_nfirst,0); + _torque(j,1) += _torque(i+_nfirst,1); + _torque(j,2) += _torque(i+_nfirst,2); } } }; @@ -1273,7 +1263,7 @@ struct AtomVecKokkos_UnPackReverse { typedef ArrayTypes AT; typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; - typename AT::t_kkfloat_1d_3 _torque; + typename AT::t_kkacc_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; uint64_t _datamask; @@ -1353,7 +1343,7 @@ struct AtomVecKokkos_PackBorder { typename AT::t_double_2d_lr _buf; const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_kkfloat_1d_3_lr _x; const typename AT::t_tagint_1d _tag; const typename AT::t_int_1d _type; const typename AT::t_int_1d _mask; @@ -1636,7 +1626,7 @@ struct AtomVecKokkos_PackBorderVel { typename AT::t_double_2d_lr_um _buf; const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; const typename AT::t_tagint_1d _tag; const typename AT::t_int_1d _type; @@ -1980,29 +1970,29 @@ struct AtomVecKokkos_PackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, _improper_atom3,_improper_atom4; typename AT::t_kkfloat_1d_4 _mu; typename AT::t_kkfloat_1d_4 _sp; @@ -2010,36 +2000,6 @@ struct AtomVecKokkos_PackExchangeFunctor { typename AT::t_kkfloat_1d_3 _omega; typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_kkfloat_1d_4 _muw; - typename AT::t_kkfloat_1d_4 _spw; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d_3 _omegaw; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; - typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _sendlist; typename AT::t_int_1d_const _copylist; @@ -2047,103 +2007,61 @@ struct AtomVecKokkos_PackExchangeFunctor { uint64_t _datamask; AtomVecKokkos_PackExchangeFunctor( - const AtomKokkos* atomKK, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist, - const uint64_t datamask): - _x(atomKK->k_x.view()), - _v(atomKK->k_v.view()), - _tag(atomKK->k_tag.view()), - _type(atomKK->k_type.view()), - _mask(atomKK->k_mask.view()), - _image(atomKK->k_image.view()), - _q(atomKK->k_q.view()), - _molecule(atomKK->k_molecule.view()), - _nspecial(atomKK->k_nspecial.view()), - _special(atomKK->k_special.view()), - _num_bond(atomKK->k_num_bond.view()), - _bond_type(atomKK->k_bond_type.view()), - _bond_atom(atomKK->k_bond_atom.view()), - _num_angle(atomKK->k_num_angle.view()), - _angle_type(atomKK->k_angle_type.view()), - _angle_atom1(atomKK->k_angle_atom1.view()), - _angle_atom2(atomKK->k_angle_atom2.view()), - _angle_atom3(atomKK->k_angle_atom3.view()), - _num_dihedral(atomKK->k_num_dihedral.view()), - _dihedral_type(atomKK->k_dihedral_type.view()), - _dihedral_atom1(atomKK->k_dihedral_atom1.view()), - _dihedral_atom2(atomKK->k_dihedral_atom2.view()), - _dihedral_atom3(atomKK->k_dihedral_atom3.view()), - _dihedral_atom4(atomKK->k_dihedral_atom4.view()), - _num_improper(atomKK->k_num_improper.view()), - _improper_type(atomKK->k_improper_type.view()), - _improper_atom1(atomKK->k_improper_atom1.view()), - _improper_atom2(atomKK->k_improper_atom2.view()), - _improper_atom3(atomKK->k_improper_atom3.view()), - _improper_atom4(atomKK->k_improper_atom4.view()), - _mu(atomKK->k_mu.view()), - _sp(atomKK->k_sp.view()), - _radius(atomKK->k_radius.view()), - _rmass(atomKK->k_rmass.view()), - _omega(atomKK->k_omega.view()), - _dpdTheta(atomKK->k_dpdTheta.view()), - _uCond(atomKK->k_uCond.view()), - _uMech(atomKK->k_uMech.view()), - _uChem(atomKK->k_uChem.view()), - _uCG(atomKK->k_uCG.view()), - _uCGnew(atomKK->k_uCGnew.view()), + const AtomKokkos* atomKK, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d sendlist, + DAT::tdual_int_1d copylist, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), - _xw(atomKK->k_x.view()), - _vw(atomKK->k_v.view()), - _tagw(atomKK->k_tag.view()), - _typew(atomKK->k_type.view()), - _maskw(atomKK->k_mask.view()), - _imagew(atomKK->k_image.view()), - _qw(atomKK->k_q.view()), - _moleculew(atomKK->k_molecule.view()), - _nspecialw(atomKK->k_nspecial.view()), - _specialw(atomKK->k_special.view()), - _num_bondw(atomKK->k_num_bond.view()), - _bond_typew(atomKK->k_bond_type.view()), - _bond_atomw(atomKK->k_bond_atom.view()), - _num_anglew(atomKK->k_num_angle.view()), - _angle_typew(atomKK->k_angle_type.view()), - _angle_atom1w(atomKK->k_angle_atom1.view()), - _angle_atom2w(atomKK->k_angle_atom2.view()), - _angle_atom3w(atomKK->k_angle_atom3.view()), - _num_dihedralw(atomKK->k_num_dihedral.view()), - _dihedral_typew(atomKK->k_dihedral_type.view()), - _dihedral_atom1w(atomKK->k_dihedral_atom1.view()), - _dihedral_atom2w(atomKK->k_dihedral_atom2.view()), - _dihedral_atom3w(atomKK->k_dihedral_atom3.view()), - _dihedral_atom4w(atomKK->k_dihedral_atom4.view()), - _num_improperw(atomKK->k_num_improper.view()), - _improper_typew(atomKK->k_improper_type.view()), - _improper_atom1w(atomKK->k_improper_atom1.view()), - _improper_atom2w(atomKK->k_improper_atom2.view()), - _improper_atom3w(atomKK->k_improper_atom3.view()), - _improper_atom4w(atomKK->k_improper_atom4.view()), - _muw(atomKK->k_mu.view()), - _spw(atomKK->k_sp.view()), - _radiusw(atomKK->k_radius.view()), - _rmassw(atomKK->k_rmass.view()), - _omegaw(atomKK->k_omega.view()), - _dpdThetaw(atomKK->k_dpdTheta.view()), - _uCondw(atomKK->k_uCond.view()), - _uMechw(atomKK->k_uMech.view()), - _uChemw(atomKK->k_uChem.view()), - _uCGw(atomKK->k_uCG.view()), - _uCGneww(atomKK->k_uCGnew.view()), - - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atomKK->avecKK->size_exchange), - _datamask(datamask) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _size_exchange(atomKK->avecKK->size_exchange), + _datamask(datamask) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } KOKKOS_INLINE_FUNCTION void operator() (const int &mysend) const { @@ -2263,114 +2181,114 @@ struct AtomVecKokkos_PackExchangeFunctor { const int j = _copylist(mysend); if (j > -1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); + _x(i,0) = _x(j,0); + _x(i,1) = _x(j,1); + _x(i,2) = _x(j,2); + _v(i,0) = _v(j,0); + _v(i,1) = _v(j,1); + _v(i,2) = _v(j,2); + _tag(i) = _tag(j); + _type(i) = _type(j); + _mask(i) = _mask(j); + _image(i) = _image(j); if (_datamask & Q_MASK) - _qw(i) = _q(j); + _q(i) = _q(j); if (_datamask & MOLECULE_MASK) - _moleculew(i) = _molecule(j); + _molecule(i) = _molecule(j); if (_datamask & BOND_MASK) { - _num_bondw(i) = _num_bond(j); + _num_bond(i) = _num_bond(j); for (int k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); + _bond_type(i,k) = _bond_type(j,k); + _bond_atom(i,k) = _bond_atom(j,k); } } if (_datamask & ANGLE_MASK) { - _num_anglew(i) = _num_angle(j); + _num_angle(i) = _num_angle(j); for (int k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); + _angle_type(i,k) = _angle_type(j,k); + _angle_atom1(i,k) = _angle_atom1(j,k); + _angle_atom2(i,k) = _angle_atom2(j,k); + _angle_atom3(i,k) = _angle_atom3(j,k); } } if (_datamask & DIHEDRAL_MASK) { - _num_dihedralw(i) = _num_dihedral(j); + _num_dihedral(i) = _num_dihedral(j); for (int k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); + _dihedral_type(i,k) = _dihedral_type(j,k); + _dihedral_atom1(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4(i,k) = _dihedral_atom4(j,k); } } if (_datamask & IMPROPER_MASK) { - _num_improperw(i) = _num_improper(j); + _num_improper(i) = _num_improper(j); for (int k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); + _improper_type(i,k) = _improper_type(j,k); + _improper_atom1(i,k) = _improper_atom1(j,k); + _improper_atom2(i,k) = _improper_atom2(j,k); + _improper_atom3(i,k) = _improper_atom3(j,k); + _improper_atom4(i,k) = _improper_atom4(j,k); } } if (_datamask & SPECIAL_MASK) { - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); + _nspecial(i,0) = _nspecial(j,0); + _nspecial(i,1) = _nspecial(j,1); + _nspecial(i,2) = _nspecial(j,2); for (int k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); + _special(i,k) = _special(j,k); } if (_datamask & MU_MASK) { - _muw(i,0) = _mu(j,0); - _muw(i,1) = _mu(j,1); - _muw(i,2) = _mu(j,2); - _muw(i,3) = _mu(j,3); + _mu(i,0) = _mu(j,0); + _mu(i,1) = _mu(j,1); + _mu(i,2) = _mu(j,2); + _mu(i,3) = _mu(j,3); } if (_datamask & SP_MASK) { - _spw(i,0) = _sp(j,0); - _spw(i,1) = _sp(j,1); - _spw(i,2) = _sp(j,2); - _spw(i,3) = _sp(j,3); + _sp(i,0) = _sp(j,0); + _sp(i,1) = _sp(j,1); + _sp(i,2) = _sp(j,2); + _sp(i,3) = _sp(j,3); } if (_datamask & RADIUS_MASK) - _radiusw(i) = _radius(j); + _radius(i) = _radius(j); if (_datamask & RMASS_MASK) - _rmassw(i) = _rmass(j); + _rmass(i) = _rmass(j); if (_datamask & OMEGA_MASK) { - _omegaw(i,0) = _omega(j,0); - _omegaw(i,1) = _omega(j,1); - _omegaw(i,2) = _omega(j,2); + _omega(i,0) = _omega(j,0); + _omega(i,1) = _omega(j,1); + _omega(i,2) = _omega(j,2); } if (_datamask & DPDTHETA_MASK) - _dpdThetaw(i) = _dpdTheta(j); + _dpdTheta(i) = _dpdTheta(j); if (_datamask & UCOND_MASK) - _uCondw(i) = _uCond(j); + _uCond(i) = _uCond(j); if (_datamask & UMECH_MASK) - _uMechw(i) = _uMech(j); + _uMech(i) = _uMech(j); if (_datamask & UCHEM_MASK) - _uChemw(i) = _uChem(j); + _uChem(i) = _uChem(j); if (_datamask & UCG_MASK) - _uCGw(i) = _uCG(j); + _uCG(i) = _uCG(j); if (_datamask & UCGNEW_MASK) - _uCGneww(i) = _uCGnew(j); + _uCGnew(i) = _uCGnew(j); } } }; diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index c3814c18942..cf90736aadc 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -100,6 +100,14 @@ class AtomVecKokkos : virtual public AtomVec { int size_exchange; + uint64_t datamask_grow; + uint64_t datamask_comm; + uint64_t datamask_comm_vel; + uint64_t datamask_reverse; + uint64_t datamask_border; + uint64_t datamask_border_vel; + uint64_t datamask_exchange; + protected: DAT::t_tagint_1d d_tag; DAT::t_int_1d d_type, d_mask; @@ -189,14 +197,6 @@ class AtomVecKokkos : virtual public AtomVec { DAT::tdual_int_1d k_count; - uint64_t datamask_grow; - uint64_t datamask_comm; - uint64_t datamask_comm_vel; - uint64_t datamask_reverse; - uint64_t datamask_border; - uint64_t datamask_border_vel; - uint64_t datamask_exchange; - void setup_fields() override; uint64_t field2mask(std::string); int field2size(std::string); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 80a16bc5232..93a5d1fdef9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -132,18 +132,9 @@ void CommKokkos::forward_comm(int dummy) k_sendlist.sync_host(); - if (comm_x_only) { - atomKK->sync(Host,X_MASK); - atomKK->modified(Host,X_MASK); - } else if (ghost_velocity) { - atomKK->sync(Host,X_MASK | V_MASK); - atomKK->modified(Host,X_MASK | V_MASK); - } else { - atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); - } - + atomKK->sync(Host,atomKK->avecKK->datamask_comm_vel); CommBrick::forward_comm(dummy); + atomKK->modified(Host,atomKK->avecKK->datamask_comm_vel); } /* ---------------------------------------------------------------------- */ @@ -259,17 +250,9 @@ void CommKokkos::reverse_comm() k_sendlist.sync_host(); - if (comm_f_only) - atomKK->sync(Host,F_MASK); - else - atomKK->sync(Host,ALL_MASK); - + atomKK->sync(Host,atomKK->avecKK->datamask_reverse); CommBrick::reverse_comm(); - - if (comm_f_only) - atomKK->modified(Host,F_MASK); - else - atomKK->modified(Host,ALL_MASK); + atomKK->modified(Host,atomKK->avecKK->datamask_reverse); } /* ---------------------------------------------------------------------- */ @@ -748,12 +731,12 @@ void CommKokkos::exchange() return; } - atomKK->sync(Host,ALL_MASK); + atomKK->sync(Host,atomKK->avecKK->datamask_exchange); int prev_auto_sync = lmp->kokkos->auto_sync; lmp->kokkos->auto_sync = 1; CommBrick::exchange(); lmp->kokkos->auto_sync = prev_auto_sync; - atomKK->modified(Host,ALL_MASK); + atomKK->modified(Host,atomKK->avecKK->datamask_exchange); } /* ---------------------------------------------------------------------- */ @@ -827,7 +810,7 @@ void CommKokkos::exchange_device() subhi = domain->subhi_lamda; } - atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,atomKK->avecKK->datamask_border_vel); // loop over dimensions for (int dim = 0; dim < 3; dim++) { @@ -1013,7 +996,7 @@ void CommKokkos::exchange_device() } } } - atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); + atomKK->modified(ExecutionSpaceFromDevice::space,atomKK->avecKK->datamask_border_vel); } if (atom->firstgroupname) { @@ -1053,14 +1036,14 @@ void CommKokkos::borders() if (exchange_comm_on_host) borders_device(); else borders_device(); } else { - atomKK->sync(Host,ALL_MASK); + atomKK->sync(Host,atomKK->avecKK->datamask_border_vel); k_sendlist.sync_host(); int prev_auto_sync = lmp->kokkos->auto_sync; lmp->kokkos->auto_sync = 1; CommBrick::borders(); lmp->kokkos->auto_sync = prev_auto_sync; k_sendlist.modify_host(); - atomKK->modified(Host,ALL_MASK); + atomKK->modified(Host,atomKK->avecKK->datamask_border_vel); } if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_legacy) @@ -1130,7 +1113,8 @@ void CommKokkos::borders_device() { MPI_Request request; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - atomKK->sync(exec_space,ALL_MASK); + atomKK->sync(exec_space,atomKK->avecKK->datamask_border_vel); + k_sendlist.sync(); int team_size = 1; @@ -1345,7 +1329,7 @@ void CommKokkos::borders_device() { max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv_kokkos(max); - atomKK->modified(exec_space,ALL_MASK); + atomKK->modified(exec_space,atomKK->avecKK->datamask_border_vel); // reset global->local map From 37ff829a12dbb7ea3e48a67d9876d472629f41f9 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 4 Nov 2025 17:46:03 -0700 Subject: [PATCH 166/604] add MC progress sentence to doc page Note --- doc/src/fix_atom_swap.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/src/fix_atom_swap.rst b/doc/src/fix_atom_swap.rst index 7a591955b2e..64cec9b3b85 100644 --- a/doc/src/fix_atom_swap.rst +++ b/doc/src/fix_atom_swap.rst @@ -75,7 +75,11 @@ swapped atoms are not well equilibrated. run for *M* steps (to attempt *M* total swaps), you will get the same result, but there will be 3x more energy evaluations due to the requirements of (what LAMMPS presumes is a) hybrid MC+MD - simulation. + simulation. If you wish to monitor the progress of an MC-only + simulation (e.g. attempted/accepted swaps, potential energy), you + can run for a small number of *M* steps and divide *X* by *M*, with + little loss in efficiency. If thermo output is performed every + timestep, then *M* steps of MC progress will be output. The *types* keyword is required. At least two atom types must be specified. If not using *semi-grand*, exactly two atom types are From 299de9c0d930dd2d392df6eefe6f48c1c603039d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 4 Nov 2025 23:20:31 -0500 Subject: [PATCH 167/604] avoid redundant integer divisions --- src/image.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/image.cpp b/src/image.cpp index e66d51348bc..c806268617f 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -1074,7 +1074,7 @@ void Image::compute_SSAO() void Image::write_JPG(FILE *fp) { #ifdef LAMMPS_JPEG - int aafactor = fsaa ? 2 : 1; + const int aafactor = fsaa ? 2 : 1; struct jpeg_compress_struct cinfo; struct jpeg_error_mgr jerr; JSAMPROW row_pointer; @@ -1093,7 +1093,7 @@ void Image::write_JPG(FILE *fp) while (cinfo.next_scanline < cinfo.image_height) { row_pointer = (JSAMPROW) - &writeBuffer[(cinfo.image_height - 1 - cinfo.next_scanline) * 3 * (width/aafactor)]; + &writeBuffer[(cinfo.image_height - 1 - cinfo.next_scanline) * 3 * cinfo.image_width]; jpeg_write_scanlines(&cinfo,&row_pointer,1); } @@ -1109,7 +1109,9 @@ void Image::write_JPG(FILE *fp) void Image::write_PNG(FILE *fp) { #ifdef LAMMPS_PNG - int aafactor = fsaa ? 2 : 1; + const int aafactor = fsaa ? 2 : 1; + const int pngwidth = width/aafactor; + const int pngheight = height/aafactor; png_structp png_ptr; png_infop info_ptr; @@ -1130,7 +1132,7 @@ void Image::write_PNG(FILE *fp) png_init_io(png_ptr, fp); png_set_compression_level(png_ptr,Z_BEST_SPEED); - png_set_IHDR(png_ptr,info_ptr,width/aafactor,height/aafactor,8,PNG_COLOR_TYPE_RGB, + png_set_IHDR(png_ptr,info_ptr,pngwidth,pngheight,8,PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); png_text text_ptr[2]; @@ -1150,9 +1152,9 @@ void Image::write_PNG(FILE *fp) png_set_text(png_ptr,info_ptr,text_ptr,1); png_write_info(png_ptr,info_ptr); - auto *row_pointers = new png_bytep[height/aafactor]; - for (int i=0; i < height/aafactor; ++i) - row_pointers[i] = (png_bytep) &writeBuffer[((height/aafactor)-i-1)*3*(width/aafactor)]; + auto *row_pointers = new png_bytep[pngheight]; + for (int i=0; i < pngheight; ++i) + row_pointers[i] = (png_bytep) &writeBuffer[(pngheight-i-1)*3*pngwidth]; png_write_image(png_ptr, row_pointers); png_write_end(png_ptr, info_ptr); @@ -1168,12 +1170,15 @@ void Image::write_PNG(FILE *fp) void Image::write_PPM(FILE *fp) { - int aafactor = fsaa ? 2 : 1; - fprintf(fp,"P6\n%d %d\n255\n",width/aafactor,height/aafactor); + const int aafactor = fsaa ? 2 : 1; + const int ppmheight = height/aafactor; + const int ppmwidth = width/aafactor; + + fprintf(fp,"P6\n%d %d\n255\n",ppmwidth,ppmheight); int y; - for (y = (height/aafactor)-1; y >= 0; y--) - fwrite(&writeBuffer[y*(width/aafactor)*3],3,width/aafactor,fp); + for (y = ppmheight-1; y >= 0; y--) + fwrite(&writeBuffer[y*ppmwidth*3],3,ppmwidth,fp); } /* ---------------------------------------------------------------------- From 83aaea215543303ce657990f549a4ffec38a1799 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 11:37:36 -0500 Subject: [PATCH 168/604] add log files for fix ttm/thermal example --- examples/ttm/in.ttm.thermal | 2 +- examples/ttm/log.5Nov25.ttm.thermal.g++.1 | 115 ++++++++++++++++++++++ examples/ttm/log.5Nov25.ttm.thermal.g++.4 | 115 ++++++++++++++++++++++ 3 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 examples/ttm/log.5Nov25.ttm.thermal.g++.1 create mode 100644 examples/ttm/log.5Nov25.ttm.thermal.g++.4 diff --git a/examples/ttm/in.ttm.thermal b/examples/ttm/in.ttm.thermal index 3a32e6bb5ec..c8e436fefd0 100644 --- a/examples/ttm/in.ttm.thermal +++ b/examples/ttm/in.ttm.thermal @@ -21,7 +21,7 @@ create_atoms 1 region atom_box mass 1 55.845 pair_style eam/fs -pair_coeff * * Fe_mm_eam.fs Fe +pair_coeff * * Fe_mm.eam.fs Fe neighbor 2.0 bin neigh_modify every 5 delay 0 check yes diff --git a/examples/ttm/log.5Nov25.ttm.thermal.g++.1 b/examples/ttm/log.5Nov25.ttm.thermal.g++.1 new file mode 100644 index 00000000000..a2ae6784efd --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.thermal.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 1 by 1 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.001 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm/thermal 11111 props.in 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 14.93 | 14.93 | 14.93 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.45357526305 371.0519174879938 0 + 100 16.75543241116823 -65897.66447227357 334.0178724061985 0.3118430884427655 + 200 26.71531497468794 -65864.69793080767 301.0700387830087 0.2916979745378292 + 300 29.9246700636085 -65834.82701417404 271.2399934256661 0.2269785573249345 + 400 30.24853526226442 -65808.33631392792 244.8009980489116 0.1375160094653321 + 500 32.04897564305613 -65784.39685387127 220.8416770241869 0.1890223832141024 + 600 36.76512871797021 -65762.42723588689 198.8409767023253 0.264042959144865 + 700 42.4845470939158 -65743.06051141671 179.5093370717682 0.2040874264875116 + 800 47.3104129138786 -65726.03209449482 162.5004970639171 0.1742328606277808 + 900 51.04189399106194 -65710.62736794836 147.1173211852656 0.1396207395553715 + 1000 54.83071009787725 -65696.19287727909 132.6786891153929 0.1553862432617664 +Loop time of 11.0144 on 1 procs for 1000 steps with 16000 atoms + +Performance: 0.784 ns/day, 30.596 hours/ns, 90.790 timesteps/s, 1.453 Matom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 10.149 | 10.149 | 10.149 | 0.0 | 92.15 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.039558 | 0.039558 | 0.039558 | 0.0 | 0.36 +Output | 0.00062446 | 0.00062446 | 0.00062446 | 0.0 | 0.01 +Modify | 0.81417 | 0.81417 | 0.81417 | 0.0 | 7.39 +Other | | 0.01075 | | | 0.10 + +Nlocal: 16000 ave 16000 max 16000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 17201 ave 17201 max 17201 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 1.088e+06 ave 1.088e+06 max 1.088e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:11 diff --git a/examples/ttm/log.5Nov25.ttm.thermal.g++.4 b/examples/ttm/log.5Nov25.ttm.thermal.g++.4 new file mode 100644 index 00000000000..aa40cadffa0 --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.thermal.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 2 by 2 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.001 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm/thermal 11111 props.in 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.068 | 5.068 | 5.068 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.4535754628 371.0519174879938 0 + 100 16.69988776493119 -65897.76069610236 334.0869986550932 0.3675916974391917 + 200 26.85362948438215 -65864.35206351527 300.6926506722126 0.3563587352869947 + 300 29.9764357830403 -65834.63300597869 271.028757908424 0.2615120829825031 + 400 30.42884814894755 -65807.80920512842 244.2135314085464 0.2581863201831175 + 500 32.39566589202872 -65783.83731384152 220.2589210566825 0.2371608111923655 + 600 37.06152818575518 -65762.39901369739 198.8400130941485 0.2099819201480675 + 700 43.12798157706226 -65742.27271813873 178.7709299757066 0.1060126739445624 + 800 47.84050809488914 -65724.51172721993 161.0044050963325 0.1268324602668368 + 900 50.82329771080609 -65709.56656390043 146.0670064968364 0.1188053557221307 + 1000 54.003200218348 -65696.46459015549 132.9649113873885 0.1269844742221938 +Loop time of 3.42825 on 4 procs for 1000 steps with 16000 atoms + +Performance: 2.520 ns/day, 9.523 hours/ns, 291.694 timesteps/s, 4.667 Matom-step/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.0319 | 3.0899 | 3.1393 | 2.2 | 90.13 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.054471 | 0.10401 | 0.16205 | 12.0 | 3.03 +Output | 0.00022706 | 0.00025664 | 0.00033877 | 0.0 | 0.01 +Modify | 0.22804 | 0.2284 | 0.2287 | 0.1 | 6.66 +Other | | 0.005645 | | | 0.16 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 8281 ave 8281 max 8281 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 272000 ave 276365 max 267580 min +Histogram: 1 0 1 0 0 0 0 1 0 1 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:03 From 8648c2f9e298e27cc71e1949072d77d4c33ab6fd Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 11:39:01 -0500 Subject: [PATCH 169/604] refactor fix ttm, ttm/grid, and ttm/thermal so ttm/thermal is derived from ttm --- src/EXTRA-FIX/fix_ttm.cpp | 223 +++++++------- src/EXTRA-FIX/fix_ttm.h | 2 +- src/EXTRA-FIX/fix_ttm_grid.cpp | 13 +- src/EXTRA-FIX/fix_ttm_thermal.cpp | 493 +++++------------------------- src/EXTRA-FIX/fix_ttm_thermal.h | 46 +-- 5 files changed, 202 insertions(+), 575 deletions(-) diff --git a/src/EXTRA-FIX/fix_ttm.cpp b/src/EXTRA-FIX/fix_ttm.cpp index c0fa8172a8a..d79dc1d9b99 100644 --- a/src/EXTRA-FIX/fix_ttm.cpp +++ b/src/EXTRA-FIX/fix_ttm.cpp @@ -16,6 +16,7 @@ Contributing authors: Paul Crozier (SNL) Carolyn Phillips (University of Michigan) ------------------------------------------------------------------------- */ +// clang-format on #include "fix_ttm.h" @@ -25,9 +26,9 @@ #include "error.h" #include "force.h" #include "memory.h" +#include "potential_file_reader.h" #include "random_mars.h" #include "respa.h" -#include "potential_file_reader.h" #include "update.h" #include @@ -49,14 +50,10 @@ static constexpr double SHIFT = 0.0; /* ---------------------------------------------------------------------- */ FixTTM::FixTTM(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - random(nullptr), - gfactor1(nullptr), gfactor2(nullptr), ratio(nullptr), flangevin(nullptr), - T_electron(nullptr), T_electron_old(nullptr), - net_energy_transfer(nullptr), net_energy_transfer_all(nullptr) + Fix(lmp, narg, arg), random(nullptr), gfactor1(nullptr), gfactor2(nullptr), ratio(nullptr), + flangevin(nullptr), T_electron(nullptr), T_electron_old(nullptr), net_energy_transfer(nullptr), + net_energy_transfer_all(nullptr) { - if (narg < 13) error->all(FLERR,"Illegal fix ttm command"); - vector_flag = 1; size_vector = 2; global_freq = 1; @@ -64,95 +61,99 @@ FixTTM::FixTTM(LAMMPS *lmp, int narg, char **arg) : nevery = 1; restart_peratom = 1; restart_global = 1; - - seed = utils::inumeric(FLERR,arg[3],false,lmp); - electronic_specific_heat = utils::numeric(FLERR,arg[4],false,lmp); - electronic_density = utils::numeric(FLERR,arg[5],false,lmp); - electronic_thermal_conductivity = utils::numeric(FLERR,arg[6],false,lmp); - gamma_p = utils::numeric(FLERR,arg[7],false,lmp); - gamma_s = utils::numeric(FLERR,arg[8],false,lmp); - v_0 = utils::numeric(FLERR,arg[9],false,lmp); - nxgrid = utils::inumeric(FLERR,arg[10],false,lmp); - nygrid = utils::inumeric(FLERR,arg[11],false,lmp); - nzgrid = utils::inumeric(FLERR,arg[12],false,lmp); - tinit = 0.0; - infile = outfile = nullptr; - - int iarg = 13; - while (iarg < narg) { - if (strcmp(arg[iarg],"set") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); - tinit = utils::numeric(FLERR,arg[iarg+1],false,lmp); - if (tinit <= 0.0) - error->all(FLERR,"Fix ttm initial temperature must be > 0.0"); - iarg += 2; - } else if (strcmp(arg[iarg],"infile") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); - infile = utils::strdup(arg[iarg+1]); - iarg += 2; - } else if (strcmp(arg[iarg],"outfile") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix ttm command"); - outevery = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - outfile = utils::strdup(arg[iarg+2]); - iarg += 3; - } else error->all(FLERR,"Illegal fix ttm command"); - } + gamma_p = 0.0; + + // don't parse arguments for FixTTMThermal + if (!utils::strmatch(style, "^ttm/thermal")) { + std::string mystyle = fmt::format("fix {}", style); + if (narg < 13) utils::missing_cmd_args(FLERR, mystyle, error); + + seed = utils::inumeric(FLERR, arg[3], false, lmp); + electronic_specific_heat = utils::numeric(FLERR, arg[4], false, lmp); + electronic_density = utils::numeric(FLERR, arg[5], false, lmp); + electronic_thermal_conductivity = utils::numeric(FLERR, arg[6], false, lmp); + gamma_p = utils::numeric(FLERR, arg[7], false, lmp); + gamma_s = utils::numeric(FLERR, arg[8], false, lmp); + v_0 = utils::numeric(FLERR, arg[9], false, lmp); + nxgrid = utils::inumeric(FLERR, arg[10], false, lmp); + nygrid = utils::inumeric(FLERR, arg[11], false, lmp); + nzgrid = utils::inumeric(FLERR, arg[12], false, lmp); + + int iarg = 13; + while (iarg < narg) { + if (strcmp(arg[iarg], "set") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mystyle + " set", error); + tinit = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + if (tinit <= 0.0) + error->all(FLERR, iarg + 1, "Fix {} initial temperature must be > 0.0", style); + iarg += 2; + } else if (strcmp(arg[iarg], "infile") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mystyle + " infile", error); + infile = arg[iarg + 1]; + iarg += 2; + } else if (strcmp(arg[iarg], "outfile") == 0) { + if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, mystyle + " outfile", error); + outevery = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + outfile = arg[iarg + 2]; + iarg += 3; + } else + error->all(FLERR, iarg, "Unknown fix {} keyword {}", style, arg[iarg]); + } - // error check + // error check - if (seed <= 0) - error->all(FLERR,"Invalid random number seed in fix ttm command"); - if (electronic_specific_heat <= 0.0) - error->all(FLERR,"Fix ttm electronic_specific_heat must be > 0.0"); - if (electronic_density <= 0.0) - error->all(FLERR,"Fix ttm electronic_density must be > 0.0"); - if (electronic_thermal_conductivity < 0.0) - error->all(FLERR,"Fix ttm electronic_thermal_conductivity must be >= 0.0"); - if (gamma_p <= 0.0) error->all(FLERR,"Fix ttm gamma_p must be > 0.0"); - if (gamma_s < 0.0) error->all(FLERR,"Fix ttm gamma_s must be >= 0.0"); - if (v_0 < 0.0) error->all(FLERR,"Fix ttm v_0 must be >= 0.0"); - if (nxgrid <= 0 || nygrid <= 0 || nzgrid <= 0) - error->all(FLERR,"Fix ttm grid sizes must be > 0"); + if (seed <= 0) error->all(FLERR, 3, "Invalid random number seed in fix ttm command"); + if (electronic_specific_heat <= 0.0) + error->all(FLERR, 4, "Fix {} electronic_specific_heat must be > 0.0", style); + if (electronic_density <= 0.0) + error->all(FLERR, 5, "Fix {} electronic_density must be > 0.0", style); + if (electronic_thermal_conductivity < 0.0) + error->all(FLERR, 6, "Fix {} electronic_thermal_conductivity must be >= 0.0", style); + if (gamma_p <= 0.0) error->all(FLERR, 7, "Fix {} gamma_p must be > 0.0", style); + if (gamma_s < 0.0) error->all(FLERR, 8, "Fix {} gamma_s must be >= 0.0", style); + if (v_0 < 0.0) error->all(FLERR, 9, "Fix {} v_0 must be >= 0.0", style); + if (nxgrid <= 0 || nygrid <= 0 || nzgrid <= 0) + error->all(FLERR, Error::NOPOINTER, "Fix {} grid sizes must be all > 0", style); - v_0_sq = v_0*v_0; + v_0_sq = v_0 * v_0; - // grid OFFSET to perform - // SHIFT to map atom to nearest or lower-left grid point + // grid OFFSET to perform + // SHIFT to map atom to nearest or lower-left grid point - shift = OFFSET + SHIFT; + shift = OFFSET + SHIFT; - // initialize Marsaglia RNG with processor-unique seed + // initialize Marsaglia RNG with processor-unique seed - random = new RanMars(lmp,seed + comm->me); + random = new RanMars(lmp, seed + comm->me); - // allocate per-type arrays for force prefactors + // allocate per-type arrays for force prefactors - gfactor1 = new double[atom->ntypes+1]; - gfactor2 = new double[atom->ntypes+1]; + gfactor1 = new double[atom->ntypes + 1]; + gfactor2 = new double[atom->ntypes + 1]; - // check for allowed maximum number of total grid points + // check for allowed maximum number of total grid points - bigint totalgrid = (bigint) nxgrid * nygrid * nzgrid; - if (totalgrid > MAXSMALLINT) - error->all(FLERR,"Too many grid points in fix ttm"); - ngridtotal = totalgrid; + bigint totalgrid = (bigint) nxgrid * nygrid * nzgrid; + if (totalgrid > MAXSMALLINT) error->all(FLERR, "Too many grid points in fix ttm"); + ngridtotal = totalgrid; - // allocate per-atom flangevin and zero it + // allocate per-atom flangevin and zero it - flangevin = nullptr; - FixTTM::grow_arrays(atom->nmax); + flangevin = nullptr; + FixTTM::grow_arrays(atom->nmax); - for (int i = 0; i < atom->nmax; i++) { - flangevin[i][0] = 0.0; - flangevin[i][1] = 0.0; - flangevin[i][2] = 0.0; - } + for (int i = 0; i < atom->nmax; i++) { + flangevin[i][0] = 0.0; + flangevin[i][1] = 0.0; + flangevin[i][2] = 0.0; + } - // set 2 callbacks + // set 2 callbacks - atom->add_callback(Atom::GROW); - atom->add_callback(Atom::RESTART); + atom->add_callback(Atom::GROW); + atom->add_callback(Atom::RESTART); + } // determines which class deallocate_grid() is called from @@ -163,9 +164,6 @@ FixTTM::FixTTM(LAMMPS *lmp, int narg, char **arg) : FixTTM::~FixTTM() { - delete[] infile; - delete[] outfile; - delete random; delete[] gfactor1; @@ -187,21 +185,20 @@ void FixTTM::post_constructor() // initialize electron temperatures on grid - int ix,iy,iz; + int ix, iy, iz; for (iz = 0; iz < nzgrid; iz++) for (iy = 0; iy < nygrid; iy++) - for (ix = 0; ix < nxgrid; ix++) - T_electron[iz][iy][ix] = tinit; + for (ix = 0; ix < nxgrid; ix++) T_electron[iz][iy][ix] = tinit; // zero net_energy_transfer_all // in case compute_vector accesses it on timestep 0 outflag = 0; - memset(&net_energy_transfer_all[0][0][0],0,ngridtotal*sizeof(double)); + memset(&net_energy_transfer_all[0][0][0], 0, ngridtotal * sizeof(double)); // set initial electron temperatures from user input file - if (infile) read_electron_temperatures(infile); + if (!infile.empty()) read_electron_temperatures(infile); } /* ---------------------------------------------------------------------- */ @@ -220,21 +217,20 @@ int FixTTM::setmask() void FixTTM::init() { if (domain->dimension == 2) - error->all(FLERR,"Cannot use fix ttm with 2d simulation"); + error->all(FLERR, Error::NOLASTLINE, "Cannot use fix {} with 2d simulation", style); if (domain->nonperiodic != 0) - error->all(FLERR,"Cannot use non-periodic boundares with fix ttm"); + error->all(FLERR, Error::NOLASTLINE, "Cannot use non-periodic boundares with fix {}", style); if (domain->triclinic) - error->all(FLERR,"Cannot use fix ttm with triclinic box"); + error->all(FLERR, Error::NOLASTLINE, "Cannot use fix {} with triclinic box", style); // set force prefactors for (int i = 1; i <= atom->ntypes; i++) { - gfactor1[i] = - gamma_p / force->ftm2v; - gfactor2[i] = - sqrt(24.0*force->boltz*gamma_p/update->dt/force->mvv2e) / force->ftm2v; + gfactor1[i] = -gamma_p / force->ftm2v; + gfactor2[i] = sqrt(24.0 * force->boltz * gamma_p / update->dt / force->mvv2e) / force->ftm2v; } - if (utils::strmatch(update->integrate_style,"^respa")) + if (utils::strmatch(update->integrate_style, "^respa")) nlevels_respa = (dynamic_cast(update->integrate))->nlevels; } @@ -242,12 +238,12 @@ void FixTTM::init() void FixTTM::setup(int vflag) { - if (utils::strmatch(update->integrate_style,"^verlet")) { + if (utils::strmatch(update->integrate_style, "^verlet")) { post_force_setup(vflag); } else { - (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa-1); - post_force_respa_setup(vflag,nlevels_respa-1,0); - (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa-1); + (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa - 1); + post_force_respa_setup(vflag, nlevels_respa - 1, 0); + (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa - 1); } } @@ -270,6 +266,8 @@ void FixTTM::post_force_setup(int /*vflag*/) } } +// clang-format off + /* ---------------------------------------------------------------------- */ void FixTTM::post_force(int /*vflag*/) @@ -449,7 +447,7 @@ void FixTTM::end_of_step() // output of grid electron temperatures to file - if (outfile && (update->ntimestep % outevery == 0)) + if (!outfile.empty() && (update->ntimestep % outevery == 0)) write_electron_temperatures(fmt::format("{}.{}",outfile,update->ntimestep)); } @@ -600,7 +598,7 @@ void FixTTM::restart(char *buf) int nzgrid_old = static_cast (rlist[n++]); if (nxgrid_old != nxgrid || nygrid_old != nygrid || nzgrid_old != nzgrid) - error->all(FLERR,"Must restart fix ttm with same grid size"); + error->all(FLERR, Error::NOLASTLINE, "Must restart fix ttm with same grid size"); // change RN seed from initial seed, to avoid same Langevin factors // just increment by 1, since for RanMars that is a new RN stream @@ -681,26 +679,19 @@ double FixTTM::compute_vector(int n) e_energy = 0.0; transfer_energy = 0.0; - int ix,iy,iz; - double dx = domain->xprd/nxgrid; double dy = domain->yprd/nygrid; double dz = domain->zprd/nzgrid; double del_vol = dx*dy*dz; - for (iz = 0; iz < nzgrid; iz++) - for (iy = 0; iy < nygrid; iy++) - for (ix = 0; ix < nxgrid; ix++) { - e_energy += - T_electron[iz][iy][ix]*electronic_specific_heat* - electronic_density*del_vol; - transfer_energy += - net_energy_transfer_all[iz][iy][ix]*update->dt; - //printf("TRANSFER %d %d %d %g\n",ix,iy,iz,transfer_energy); + for (int iz = 0; iz < nzgrid; iz++) { + for (int iy = 0; iy < nygrid; iy++) { + for (int ix = 0; ix < nxgrid; ix++) { + e_energy += T_electron[iz][iy][ix]*electronic_specific_heat*electronic_density*del_vol; + transfer_energy += net_energy_transfer_all[iz][iy][ix] * update->dt; } - - //printf("TRANSFER %g\n",transfer_energy); - + } + } outflag = 1; } @@ -717,7 +708,7 @@ double FixTTM::memory_usage() { double bytes = 0.0; bytes += (double) atom->nmax * 3 * sizeof(double); - bytes += (double) 4*ngridtotal * sizeof(int); + bytes += (double) 4*ngridtotal * sizeof(double); return bytes; } diff --git a/src/EXTRA-FIX/fix_ttm.h b/src/EXTRA-FIX/fix_ttm.h index f05c7a5e6a4..9571e6341b1 100644 --- a/src/EXTRA-FIX/fix_ttm.h +++ b/src/EXTRA-FIX/fix_ttm.h @@ -57,7 +57,7 @@ class FixTTM : public Fix { int outflag, outevery; double shift, tinit; double e_energy, transfer_energy; - char *infile, *outfile; + std::string infile, outfile; class RanMars *random; double electronic_specific_heat, electronic_density; diff --git a/src/EXTRA-FIX/fix_ttm_grid.cpp b/src/EXTRA-FIX/fix_ttm_grid.cpp index ba2fe4dc1ed..bd4333b7c50 100644 --- a/src/EXTRA-FIX/fix_ttm_grid.cpp +++ b/src/EXTRA-FIX/fix_ttm_grid.cpp @@ -53,8 +53,9 @@ FixTTMGrid::FixTTMGrid(LAMMPS *lmp, int narg, char **arg) : pergrid_freq = 1; restart_file = 1; - if (outfile) error->all(FLERR,"Fix ttm/grid does not support outfile option - " - "use dump grid command or restart files instead"); + if (outfile.size() > 0) + error->all(FLERR, Error::NOPOINTER, "Fix ttm/grid does not support outfile option - " + "use dump grid command or restart files instead"); skin_original = neighbor->skin; } @@ -94,10 +95,9 @@ void FixTTMGrid::post_constructor() // set initial electron temperatures from user input file // communicate new T_electron values to ghost grid points - if (infile) { + if (!infile.empty()) { read_electron_temperatures(infile); - grid->forward_comm(Grid3d::FIX,this,0,1,sizeof(double), - grid_buf1,grid_buf2,MPI_DOUBLE); + grid->forward_comm(Grid3d::FIX,this,0,1,sizeof(double), grid_buf1,grid_buf2,MPI_DOUBLE); } } @@ -108,7 +108,8 @@ void FixTTMGrid::init() FixTTM::init(); if (neighbor->skin > skin_original) - error->all(FLERR,"Cannot extend neighbor skin after fix ttm/grid defined"); + error->all(FLERR, Error::NOLASTLINE, + "Cannot extend neighbor skin after fix ttm/grid defined"); } /* ---------------------------------------------------------------------- */ diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp index 4abe3247439..034e538eaab 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -1,4 +1,3 @@ -// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories @@ -23,6 +22,7 @@ ------------------------------------------------------------------------- */ +// clang-format on #include "fix_ttm_thermal.h" @@ -32,15 +32,15 @@ #include "error.h" #include "force.h" #include "memory.h" +#include "potential_file_reader.h" #include "random_mars.h" #include "respa.h" -#include "potential_file_reader.h" #include "update.h" +#include #include #include #include -#include using namespace LAMMPS_NS; using namespace FixConst; @@ -57,68 +57,52 @@ static constexpr double SHIFT = 0.0; /* ---------------------------------------------------------------------- */ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), - random(nullptr), - gfactor1(nullptr), gfactor2(nullptr), ratio(nullptr), flangevin(nullptr), - T_electron(nullptr), T_electron_old(nullptr), - net_energy_transfer(nullptr), net_energy_transfer_all(nullptr) , - gamma_p_grid(nullptr), inductive_response_grid(nullptr), - c_e_grid(nullptr), k_e_grid(nullptr) + FixTTM(lmp, narg, arg), gamma_p_grid(nullptr), inductive_response_grid(nullptr), + c_e_grid(nullptr), k_e_grid(nullptr) { - if (narg < 8) error->all(FLERR,"Illegal fix ttm command"); - vector_flag = 1; - size_vector = 2; - global_freq = 1; - extvector = 1; - nevery = 1; - restart_peratom = 1; - restart_global = 1; - - e_property_file = nullptr; - - seed = utils::inumeric(FLERR,arg[3],false,lmp); - e_property_file = utils::strdup(arg[4]); - nxgrid = utils::inumeric(FLERR,arg[5],false,lmp); - nygrid = utils::inumeric(FLERR,arg[6],false,lmp); - nzgrid = utils::inumeric(FLERR,arg[7],false,lmp); + if (narg < 8) utils::missing_cmd_args(FLERR, "fix ttm/thermal", error); + seed = utils::inumeric(FLERR, arg[3], false, lmp); + e_property_file = arg[4]; + nxgrid = utils::inumeric(FLERR, arg[5], false, lmp); + nygrid = utils::inumeric(FLERR, arg[6], false, lmp); + nzgrid = utils::inumeric(FLERR, arg[7], false, lmp); inductive_power = 0.0; tinit = 0.0; - infile = outfile = nullptr; + gamma_p = 0.0; // to avoid uninitialzed data access when calling FixTTM::init() int iarg = 8; while (iarg < narg) { - if (strcmp(arg[iarg],"set") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); - tinit = (utils::numeric(FLERR,arg[iarg+1],false,lmp)); + if (strcmp(arg[iarg], "set") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix ttm/thermal set", error); + tinit = utils::numeric(FLERR, arg[iarg + 1], false, lmp); if (tinit <= 0.0) - error->all(FLERR,"Fix ttm initial temperature must be > 0.0"); + error->all(FLERR, iarg + 1, "Fix ttm/thermal initial temperature must be > 0.0"); iarg += 2; - } else if (strcmp(arg[iarg],"source") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); - inductive_power = (utils::numeric(FLERR,arg[iarg+1],false,lmp)); + } else if (strcmp(arg[iarg], "source") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix ttm/thermal source", error); + inductive_power = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"infile") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ttm command"); - infile = utils::strdup(arg[iarg+1]); + } else if (strcmp(arg[iarg], "infile") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix ttm/thermal infile", error); + infile = arg[iarg + 1]; iarg += 2; - } else if (strcmp(arg[iarg],"outfile") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix ttm command"); - outevery = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - outfile = utils::strdup(arg[iarg+2]); + } else if (strcmp(arg[iarg], "outfile") == 0) { + if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "fix ttm/thermal outfile", error); + outevery = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + outfile = arg[iarg + 2]; iarg += 3; - } else error->all(FLERR,"Illegal fix ttm command"); + } else + error->all(FLERR, iarg, "Unknown fix ttm/thermal keyword {}", arg[iarg]); } // error check - if (seed <= 0) - error->all(FLERR,"Invalid random number seed in fix ttm command"); + if (seed <= 0) error->all(FLERR, 3, "Invalid random number seed in fix ttm/thermal command"); if (nxgrid <= 0 || nygrid <= 0 || nzgrid <= 0) - error->all(FLERR,"Fix ttm grid sizes must be > 0"); - + error->all(FLERR, Error::NOPOINTER, "Fix ttm grid sizes must be > 0"); // grid OFFSET to perform // SHIFT to map atom to nearest or lower-left grid point @@ -127,24 +111,23 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : // initialize Marsaglia RNG with processor-unique seed - random = new RanMars(lmp,seed + comm->me); + random = new RanMars(lmp, seed + comm->me); // allocate per-type arrays for force prefactors - gfactor1 = new double[atom->ntypes+1]; - gfactor2 = new double[atom->ntypes+1]; + gfactor1 = new double[atom->ntypes + 1]; + gfactor2 = new double[atom->ntypes + 1]; // check for allowed maximum number of total grid points bigint totalgrid = (bigint) nxgrid * nygrid * nzgrid; if (totalgrid > MAXSMALLINT) - error->all(FLERR,"Too many grid points in fix ttm"); + error->all(FLERR, Error::NOPOINTER, "Too many grid points in fix ttm/thermal"); ngridtotal = totalgrid; // allocate per-atom flangevin and zero it - flangevin = nullptr; - FixTTMThermal::grow_arrays(atom->nmax); + FixTTM::grow_arrays(atom->nmax); for (int i = 0; i < atom->nmax; i++) { flangevin[i][0] = 0.0; @@ -156,132 +139,47 @@ FixTTMThermal::FixTTMThermal(LAMMPS *lmp, int narg, char **arg) : atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); - - // determines which class deallocate_grid() is called from - - deallocate_flag = 0; - } /* ---------------------------------------------------------------------- */ FixTTMThermal::~FixTTMThermal() { - delete[] infile; - delete[] e_property_file; - - delete random; - - delete[] gfactor1; - delete[] gfactor2; - - memory->destroy(flangevin); - - if (!deallocate_flag) FixTTMThermal::deallocate_grid(); + FixTTMThermal::deallocate_grid(); } /* ---------------------------------------------------------------------- */ - inline double safe_effective_kappa(double a, double b) { - if (a == 0 || b == 0) return 0; - return 2.0 * a * b / (a + b); - } +static inline double safe_effective_kappa(double a, double b) +{ + if (a == 0 || b == 0) return 0; + return 2.0 * a * b / (a + b); +} /* ---------------------------------------------------------------------- */ void FixTTMThermal::post_constructor() { - // allocate global grid on each proc - // needs to be done in post_contructor() beccause is virtual method - - allocate_grid(); - - // initialize electron temperatures on grid - - int ix,iy,iz; - for (iz = 0; iz < nzgrid; iz++) - for (iy = 0; iy < nygrid; iy++) - for (ix = 0; ix < nxgrid; ix++) - T_electron[iz][iy][ix] = tinit; - - // zero net_energy_transfer_all - // in case compute_vector accesses it on timestep 0 - - outflag = 0; - memset(&net_energy_transfer_all[0][0][0],0,ngridtotal*sizeof(double)); + FixTTM::post_constructor(); // set electron grid properties from file read_electron_properties(e_property_file); - - // set initial electron temperatures from user input file - - if (infile) read_electron_temperatures(infile); -} - -/* ---------------------------------------------------------------------- */ - -int FixTTMThermal::setmask() -{ - int mask = 0; - mask |= POST_FORCE; - mask |= POST_FORCE_RESPA; - mask |= END_OF_STEP; - return mask; } /* ---------------------------------------------------------------------- */ void FixTTMThermal::init() { - if (domain->dimension == 2) - error->all(FLERR,"Cannot use fix ttm with 2d simulation"); - if (domain->nonperiodic != 0) - error->all(FLERR,"Cannot use non-periodic boundares with fix ttm"); - if (domain->triclinic) - error->all(FLERR,"Cannot use fix ttm with triclinic box"); + FixTTM::init(); // to allow this, would have to reset grid bounds dynamically // for RCB balancing would have to reassign grid pts to procs // and create a new GridComm, and pass old GC data to new GC if (domain->box_change) - error->all(FLERR,"Cannot use fix ttm with changing box shape, size, or sub-domains"); - - // set force prefactors - - if (utils::strmatch(update->integrate_style,"^respa")) - nlevels_respa = (dynamic_cast(update->integrate))->nlevels; + error->all(FLERR, Error::NOLASTLINE, + "Cannot use fix ttm/thermal with changing box shape, size, or sub-domains"); } -/* ---------------------------------------------------------------------- */ - -void FixTTMThermal::setup(int vflag) -{ - if (utils::strmatch(update->integrate_style,"^verlet")) { - post_force_setup(vflag); - } else { - (dynamic_cast(update->integrate))->copy_flevel_f(nlevels_respa-1); - post_force_respa_setup(vflag,nlevels_respa-1,0); - (dynamic_cast(update->integrate))->copy_f_flevel(nlevels_respa-1); - } -} - -/* ---------------------------------------------------------------------- */ - -void FixTTMThermal::post_force_setup(int /*vflag*/) -{ - double **f = atom->f; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - // apply langevin forces that have been stored from previous run - - for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) { - f[i][0] += flangevin[i][0]; - f[i][1] += flangevin[i][1]; - f[i][2] += flangevin[i][2]; - } - } -} +// clang-format off /* ---------------------------------------------------------------------- */ @@ -317,46 +215,31 @@ void FixTTMThermal::post_force(int /*vflag*/) if (iz >= nzgrid) iz -= nzgrid; if (T_electron[iz][iy][ix] < 0) - error->one(FLERR,"Electronic temperature dropped below zero"); - //Come back and check this for scaling + error->one(FLERR, Error::NOLASTLINE, "Electronic temperature dropped below zero"); + //Come back and check this for scaling for (int i = 1; i <= atom->ntypes; i++) { - gfactor1[i] = - gamma_p_grid[iz][iy][ix] / force->ftm2v; - gfactor2[i] = sqrt(24.0*force->boltz*gamma_p_grid[iz][iy][ix]/update->dt/force->mvv2e) / force->ftm2v; - } + gfactor1[i] = - gamma_p_grid[iz][iy][ix] / force->ftm2v; + gfactor2[i] = sqrt(24.0*force->boltz*gamma_p_grid[iz][iy][ix]/update->dt/force->mvv2e) / force->ftm2v; + } double tsqrt = sqrt(T_electron[iz][iy][ix]); - gamma1 = gfactor1[type[i]]; gamma2 = gfactor2[type[i]] * tsqrt; if (T_electron[iz][iy][ix] > 0) { - flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); - flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); - flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); - - f[i][0] += flangevin[i][0]; - f[i][1] += flangevin[i][1]; - f[i][2] += flangevin[i][2]; - } + flangevin[i][0] = gamma1*v[i][0] + gamma2*(random->uniform()-0.5); + flangevin[i][1] = gamma1*v[i][1] + gamma2*(random->uniform()-0.5); + flangevin[i][2] = gamma1*v[i][2] + gamma2*(random->uniform()-0.5); + + f[i][0] += flangevin[i][0]; + f[i][1] += flangevin[i][1]; + f[i][2] += flangevin[i][2]; + } } } } /* ---------------------------------------------------------------------- */ -void FixTTMThermal::post_force_respa_setup(int vflag, int ilevel, int /*iloop*/) -{ - if (ilevel == nlevels_respa-1) post_force_setup(vflag); -} - -/* ---------------------------------------------------------------------- */ - -void FixTTMThermal::post_force_respa(int vflag, int ilevel, int /*iloop*/) -{ - if (ilevel == nlevels_respa-1) post_force(vflag); -} - -/* ---------------------------------------------------------------------- */ - void FixTTMThermal::end_of_step() { int ix,iy,iz; @@ -497,11 +380,10 @@ void FixTTMThermal::end_of_step() +(inductive_power*inductive_response_grid[iz][iy][ix])); } } - } // output of grid electron temperatures to file - if (outfile && (update->ntimestep % outevery == 0)) + if (!outfile.empty() && (update->ntimestep % outevery == 0)) write_electron_temperatures(fmt::format("{}.{}",outfile,update->ntimestep)); } @@ -537,24 +419,22 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) double gamma_p_tmp = values.next_double(); double ind_tmp = values.next_double(); - - // check correctness of input data if ((ix < 0) || (ix >= nxgrid) || (iy < 0) || (iy >= nygrid) || (iz < 0) || (iz >= nzgrid)) - throw TokenizerException("Fix ttm invalid grid index in fix ttm grid file",""); + throw TokenizerException("Fix ttm/thermal invalid grid index in grid file",""); if (c_e_tmp < 0.0) - throw TokenizerException("Fix ttm electron specific heat must be > 0.0",""); + throw TokenizerException("Fix ttm/thermal electron specific heat must be > 0.0",""); if (k_e_tmp < 0.0) - throw TokenizerException("Fix ttm electron conductivity must be > 0.0",""); + throw TokenizerException("Fix ttm/thermal electron conductivity must be > 0.0",""); if (gamma_p_tmp < 0.0) - throw TokenizerException("Fix ttm electron coupling must be > 0.0",""); + throw TokenizerException("Fix ttm/thermal electron coupling must be > 0.0",""); if (ind_tmp < 0.0) - throw TokenizerException("Fix ttm electron inductive response must be >= 0.0",""); + throw TokenizerException("Fix ttm/thermal electron inductive response must be >= 0.0",""); c_e_grid[iz][iy][ix] = c_e_tmp; k_e_grid[iz][iy][ix] = k_e_tmp; @@ -563,7 +443,7 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) prop_initial_set[iz][iy][ix] = 1; } } catch (std::exception &e) { - error->one(FLERR, e.what()); + error->one(FLERR, Error::NOLASTLINE, e.what()); } // check completeness of input data @@ -572,7 +452,8 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) for (int iy = 0; iy < nygrid; iy++) for (int ix = 0; ix < nxgrid; ix++) if (prop_initial_set[iz][iy][ix] == 0) - error->all(FLERR,"Fix ttm infile did not set all properties"); + error->all(FLERR, Error::NOLASTLINE, + "Fix ttm/thermal infile did not set all properties"); memory->destroy(prop_initial_set); } @@ -581,212 +462,6 @@ void FixTTMThermal::read_electron_properties(const std::string &filename) MPI_Bcast(&gamma_p_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); MPI_Bcast(&inductive_response_grid[0][0][0],ngridtotal,MPI_DOUBLE,0,world); } -/* ---------------------------------------------------------------------- - read in initial electron temperatures from a user-specified file - only read by proc 0, grid values are Bcast to other procs -------------------------------------------------------------------------- */ - -void FixTTMThermal::read_electron_temperatures(const std::string &filename) -{ - if (comm->me == 0) { - - int ***T_initial_set; - memory->create(T_initial_set,nzgrid,nygrid,nxgrid,"ttm:T_initial_set"); - memset(&T_initial_set[0][0][0],0,ngridtotal*sizeof(int)); - - // read initial electron temperature values from file - bigint nread = 0; - - try { - PotentialFileReader reader(lmp, filename, "electron temperature grid"); - - while (nread < ngridtotal) { - // reader will skip over comment-only lines - auto values = reader.next_values(4); - ++nread; - - int ix = values.next_int() - 1; - int iy = values.next_int() - 1; - int iz = values.next_int() - 1; - double T_tmp = values.next_double(); - - // check correctness of input data - - if ((ix < 0) || (ix >= nxgrid) || (iy < 0) || (iy >= nygrid) || (iz < 0) || (iz >= nzgrid)) - throw TokenizerException("Fix ttm invalid grid index in fix ttm grid file",""); - - if (T_tmp < 0.0) - throw TokenizerException("Fix ttm electron temperatures must be > 0.0",""); - - T_electron[iz][iy][ix] = T_tmp; - T_initial_set[iz][iy][ix] = 1; - } - } catch (std::exception &e) { - error->one(FLERR, e.what()); - } - - // check completeness of input data - - for (int iz = 0; iz < nzgrid; iz++) - for (int iy = 0; iy < nygrid; iy++) - for (int ix = 0; ix < nxgrid; ix++) - if (T_initial_set[iz][iy][ix] == 0) - error->all(FLERR,"Fix ttm infile did not set all temperatures"); - - memory->destroy(T_initial_set); - } - - MPI_Bcast(&T_electron[0][0][0],ngridtotal,MPI_DOUBLE,0,world); -} - -/* ---------------------------------------------------------------------- - write out current electron temperatures to user-specified file - only written by proc 0 -------------------------------------------------------------------------- */ - -void FixTTMThermal::write_electron_temperatures(const std::string &filename) -{ - if (comm->me) return; - - FILE *fp = fopen(filename.c_str(),"w"); - if (!fp) error->one(FLERR,"Fix ttm could not open output file {}: {}", - filename,utils::getsyserror()); - utils::print(fp,"# DATE: {} UNITS: {} COMMENT: Electron temperature " - "{}x{}x{} grid at step {}. Created by fix {}\n #Grid X,Y,Z Temperature\n", utils::current_date(), - update->unit_style, nxgrid, nygrid, nzgrid, update->ntimestep, style); - - int ix,iy,iz; - - for (iz = 0; iz < nzgrid; iz++) - for (iy = 0; iy < nygrid; iy++) - for (ix = 0; ix < nxgrid; ix++) - fprintf(fp,"%d %d %d %20.16g\n",ix+1,iy+1,iz+1,T_electron[iz][iy][ix]); - - fclose(fp); -} - -/* ---------------------------------------------------------------------- */ -void FixTTMThermal::grow_arrays(int ngrow) -{ - memory->grow(flangevin,ngrow,3,"ttm:flangevin"); -} - -/* ---------------------------------------------------------------------- - pack entire state of Fix into one write -------------------------------------------------------------------------- */ - -void FixTTMThermal::write_restart(FILE *fp) -{ - double *rlist; - memory->create(rlist,nxgrid*nygrid*nzgrid+4,"ttm:rlist"); - - int n = 0; - rlist[n++] = nxgrid; - rlist[n++] = nygrid; - rlist[n++] = nzgrid; - rlist[n++] = seed; - - // store global grid values - - for (int iz = 0; iz < nzgrid; iz++) - for (int iy = 0; iy < nygrid; iy++) - for (int ix = 0; ix < nxgrid; ix++) - rlist[n++] = T_electron[iz][iy][ix]; - - if (comm->me == 0) { - int size = n * sizeof(double); - fwrite(&size,sizeof(int),1,fp); - fwrite(rlist,sizeof(double),n,fp); - } - - memory->destroy(rlist); -} - -/* ---------------------------------------------------------------------- - use state info from restart file to restart the Fix -------------------------------------------------------------------------- */ - -void FixTTMThermal::restart(char *buf) -{ - int n = 0; - auto *rlist = (double *) buf; - - // check that restart grid size is same as current grid size - - int nxgrid_old = static_cast (rlist[n++]); - int nygrid_old = static_cast (rlist[n++]); - int nzgrid_old = static_cast (rlist[n++]); - - if (nxgrid_old != nxgrid || nygrid_old != nygrid || nzgrid_old != nzgrid) - error->all(FLERR,"Must restart fix ttm with same grid size"); - - // change RN seed from initial seed, to avoid same Langevin factors - // just increment by 1, since for RanMars that is a new RN stream - - seed = static_cast (rlist[n++]) + 1; - delete random; - random = new RanMars(lmp,seed+comm->me); - - // restore global grid values - - for (int iz = 0; iz < nzgrid; iz++) - for (int iy = 0; iy < nygrid; iy++) - for (int ix = 0; ix < nxgrid; ix++) - T_electron[iz][iy][ix] = rlist[n++]; -} - -/* ---------------------------------------------------------------------- - pack values in local atom-based arrays for restart file -------------------------------------------------------------------------- */ - -int FixTTMThermal::pack_restart(int i, double *buf) -{ - // pack buf[0] this way because other fixes unpack it - - buf[0] = 4; - buf[1] = flangevin[i][0]; - buf[2] = flangevin[i][1]; - buf[3] = flangevin[i][2]; - return 4; -} - -/* ---------------------------------------------------------------------- - unpack values from atom->extra array to restart the fix -------------------------------------------------------------------------- */ - -void FixTTMThermal::unpack_restart(int nlocal, int nth) -{ - double **extra = atom->extra; - - // skip to Nth set of extra values - // unpack the Nth first values this way because other fixes pack them - - int m = 0; - for (int i = 0; i < nth; i++) m += static_cast (extra[nlocal][m]); - m++; - - flangevin[nlocal][0] = extra[nlocal][m++]; - flangevin[nlocal][1] = extra[nlocal][m++]; - flangevin[nlocal][2] = extra[nlocal][m++]; -} - -/* ---------------------------------------------------------------------- - size of atom nlocal's restart data -------------------------------------------------------------------------- */ - -int FixTTMThermal::size_restart(int /*nlocal*/) -{ - return 4; -} - -/* ---------------------------------------------------------------------- - maxsize of any atom's restart data -------------------------------------------------------------------------- */ - -int FixTTMThermal::maxsize_restart() -{ - return 4; -} /* ---------------------------------------------------------------------- return the energy of the electronic subsystem or the net_energy transfer @@ -806,16 +481,14 @@ double FixTTMThermal::compute_vector(int n) double dz = domain->zprd/nzgrid; double del_vol = dx*dy*dz; - for (iz = 0; iz < nzgrid; iz++) - for (iy = 0; iy < nygrid; iy++) - for (ix = 0; ix < nxgrid; ix++) { - e_energy += - T_electron[iz][iy][ix]*c_e_grid[iz][iy][ix]*del_vol; - transfer_energy += - net_energy_transfer_all[iz][iy][ix]*update->dt; + for (int iz = 0; iz < nzgrid; iz++) { + for (int iy = 0; iy < nygrid; iy++) { + for (int ix = 0; ix < nxgrid; ix++) { + e_energy += T_electron[iz][iy][ix]*c_e_grid[iz][iy][ix]*del_vol; + transfer_energy += net_energy_transfer_all[iz][iy][ix]*update->dt; } - - + } + } outflag = 1; } @@ -830,9 +503,8 @@ double FixTTMThermal::compute_vector(int n) double FixTTMThermal::memory_usage() { - double bytes = 0.0; - bytes += (double) atom->nmax * 3 * sizeof(double); - bytes += (double) 4*ngridtotal * sizeof(int); + double bytes = FixTTM::memory_usage(); + bytes += (double) 4*ngridtotal * sizeof(double); return bytes; } @@ -842,16 +514,11 @@ double FixTTMThermal::memory_usage() void FixTTMThermal::allocate_grid() { - memory->create(T_electron_old,nzgrid,nygrid,nxgrid,"ttm:T_electron_old"); - memory->create(T_electron,nzgrid,nygrid,nxgrid,"ttm:T_electron"); + FixTTM::allocate_grid(); memory->create(c_e_grid,nzgrid,nygrid,nxgrid,"ttm:c_e_grid"); memory->create(k_e_grid,nzgrid,nygrid,nxgrid,"ttm:k_e_grid"); memory->create(gamma_p_grid,nzgrid,nygrid,nxgrid,"ttm:gamma_p_grid"); memory->create(inductive_response_grid,nzgrid,nygrid,nxgrid,"ttm:gamma_p_grid"); - memory->create(net_energy_transfer,nzgrid,nygrid,nxgrid, - "ttm:net_energy_transfer"); - memory->create(net_energy_transfer_all,nzgrid,nygrid,nxgrid, - "ttm:net_energy_transfer_all"); } /* ---------------------------------------------------------------------- @@ -860,12 +527,8 @@ void FixTTMThermal::allocate_grid() void FixTTMThermal::deallocate_grid() { - memory->destroy(T_electron_old); - memory->destroy(T_electron); memory->destroy(c_e_grid); memory->destroy(k_e_grid); memory->destroy(gamma_p_grid); memory->destroy(inductive_response_grid); - memory->destroy(net_energy_transfer); - memory->destroy(net_energy_transfer_all); } diff --git a/src/EXTRA-FIX/fix_ttm_thermal.h b/src/EXTRA-FIX/fix_ttm_thermal.h index d6e32e6067b..6eba0e6796f 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.h +++ b/src/EXTRA-FIX/fix_ttm_thermal.h @@ -20,65 +20,37 @@ FixStyle(ttm/thermal,FixTTMThermal); #ifndef LMP_FIX_TTM_THERMAL_H #define LMP_FIX_TTM_THERMAL_H -#include "fix.h" +#include "fix_ttm.h" namespace LAMMPS_NS { -class FixTTMThermal : public Fix { +class FixTTMThermal : public FixTTM { public: FixTTMThermal(class LAMMPS *, int, char **); ~FixTTMThermal() override; + void post_constructor() override; - int setmask() override; void init() override; - void setup(int) override; - void post_force_setup(int); + void post_force(int) override; - void post_force_respa_setup(int, int, int); - void post_force_respa(int, int, int) override; void end_of_step() override; - void grow_arrays(int) override; - void write_restart(FILE *) override; - void restart(char *) override; - int pack_restart(int, double *) override; - void unpack_restart(int, int) override; - int size_restart(int) override; - int maxsize_restart() override; + double compute_vector(int) override; double memory_usage() override; protected: - int nlevels_respa; - int seed; - int nxgrid, nygrid, nzgrid; // size of global grid - int ngridtotal; // total size of global grid - int deallocate_flag; - int outflag, outevery; - double shift, tinit; - double e_energy, transfer_energy; - char *infile, *outfile, *e_property_file; - - class RanMars *random; double inductive_power; - double *gfactor1, *gfactor2, *ratio, **flangevin; - double ***T_electron, ***T_electron_old; - double ***net_energy_transfer, ***net_energy_transfer_all; - double ***T_atomic; - int ***nsum, ***nsum_all; - double ***sum_vsq, ***sum_vsq_all; - double ***sum_mass_vsq, ***sum_mass_vsq_all; + std::string e_property_file; + double ***gamma_p_grid; double ***inductive_response_grid; double ***c_e_grid; double ***k_e_grid; - - virtual void allocate_grid(); - virtual void deallocate_grid(); + void allocate_grid() override; + void deallocate_grid() override; virtual void read_electron_properties(const std::string &); - virtual void read_electron_temperatures(const std::string &); - virtual void write_electron_temperatures(const std::string &); }; } // namespace LAMMPS_NS From 1fac30e018c24fc86f3ff40ef379647396b0092f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 11:42:57 -0500 Subject: [PATCH 170/604] update logs for fix ttm and fix ttm/grid after changing EAM/fs potential --- examples/ttm/in.ttm | 24 ++--- examples/ttm/in.ttm.grid | 24 ++--- examples/ttm/log.1Dec22.ttm.grid.g++.1 | 130 ------------------------ examples/ttm/log.1Dec22.ttm.grid.g++.4 | 131 ------------------------ examples/ttm/log.26Aug21.ttm.g++.1 | 113 --------------------- examples/ttm/log.26Aug21.ttm.g++.4 | 113 --------------------- examples/ttm/log.5Nov25.ttm.g++.1 | 115 +++++++++++++++++++++ examples/ttm/log.5Nov25.ttm.g++.4 | 115 +++++++++++++++++++++ examples/ttm/log.5Nov25.ttm.grid.g++.1 | 132 +++++++++++++++++++++++++ examples/ttm/log.5Nov25.ttm.grid.g++.4 | 132 +++++++++++++++++++++++++ 10 files changed, 518 insertions(+), 511 deletions(-) delete mode 100644 examples/ttm/log.1Dec22.ttm.grid.g++.1 delete mode 100644 examples/ttm/log.1Dec22.ttm.grid.g++.4 delete mode 100644 examples/ttm/log.26Aug21.ttm.g++.1 delete mode 100644 examples/ttm/log.26Aug21.ttm.g++.4 create mode 100644 examples/ttm/log.5Nov25.ttm.g++.1 create mode 100644 examples/ttm/log.5Nov25.ttm.g++.4 create mode 100644 examples/ttm/log.5Nov25.ttm.grid.g++.1 create mode 100644 examples/ttm/log.5Nov25.ttm.grid.g++.4 diff --git a/examples/ttm/in.ttm b/examples/ttm/in.ttm index 1b259cfc198..83ae5019f86 100644 --- a/examples/ttm/in.ttm +++ b/examples/ttm/in.ttm @@ -4,24 +4,24 @@ boundary p p p variable latc equal 2.87 lattice bcc ${latc} -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_box 1 sim_box -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +create_box 1 sim_box +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_atoms 1 region atom_box +create_atoms 1 region atom_box mass 1 55.845 pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe +pair_coeff * * Fe_mm.eam.fs Fe neighbor 2.0 bin neigh_modify every 5 delay 0 check yes diff --git a/examples/ttm/in.ttm.grid b/examples/ttm/in.ttm.grid index 5c3b9c41fbc..27549017c4f 100644 --- a/examples/ttm/in.ttm.grid +++ b/examples/ttm/in.ttm.grid @@ -4,24 +4,24 @@ boundary p p p variable latc equal 2.87 lattice bcc ${latc} -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_box 1 sim_box -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & +create_box 1 sim_box +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} & units lattice -create_atoms 1 region atom_box +create_atoms 1 region atom_box mass 1 55.845 pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe +pair_coeff * * Fe_mm.eam.fs Fe neighbor 2.0 bin neigh_modify every 5 delay 0 check yes diff --git a/examples/ttm/log.1Dec22.ttm.grid.g++.1 b/examples/ttm/log.1Dec22.ttm.grid.g++.1 deleted file mode 100644 index 9d4a91cf55f..00000000000 --- a/examples/ttm/log.1Dec22.ttm.grid.g++.1 +++ /dev/null @@ -1,130 +0,0 @@ -LAMMPS (3 Nov 2022) -units metal -atom_style atomic -boundary p p p - -variable latc equal 2.87 -lattice bcc ${latc} -lattice bcc 2.87 -Lattice spacing in x,y,z = 2.87 2.87 2.87 -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 10 units lattice -create_box 1 sim_box -Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) - 1 by 1 by 1 MPI processor grid -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 10 units lattice -create_atoms 1 region atom_box -Created 16000 atoms - using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) - create_atoms CPU = 0.002 seconds - -mass 1 55.845 - -pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe -Reading eam/fs potential file FeVoter-ChenRecheck.fs with DATE: 2016-04-04 - -neighbor 2.0 bin -neigh_modify every 5 delay 0 check yes - -fix 1 all nve - -fix twotemp all ttm/grid 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 - -compute pe all pe/atom -compute ke all ke/atom - -timestep 0.0001 -thermo 100 - -thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] - thermo_modify format float "%20.16g" - -# dump output - -#compute 1 all property/grid 10 10 10 id ix iy iz -#dump 1 all grid 100 tmp.dump c_1:grid:data[*] f_twotemp:grid:data -#dump_modify 1 sort 1 - -#compute 10 all property/grid 10 10 10 id xc yc zc -#fix ave all ave/grid 10 10 100 10 10 10 f_twotemp:grid:data - -#dump ave all grid 100 tmp.dump.ave # c_10:grid:data[*] f_ave:grid:data -#dump_modify ave sort id - -#dump vtk all grid/vtk 100 tmp.dump.*.vtk f_ave:grid:data -#dump_modify vtk sort id buffer no - -# run simulation - -run 1000 -Neighbor list info ... - update: every = 5 steps, delay = 0 steps, check = yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 6.524 - ghost atom cutoff = 6.524 - binsize = 3.262, bins = 18 18 18 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair eam/fs, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 10.99 | 10.99 | 10.99 Mbytes - Step Temp TotEng f_twotemp[1] f_twotemp[2] - 0 0 -68483.52254543516 371.9188105082105 0 - 100 17.01353086098387 -68446.50228930202 334.6217068813629 0.3763710887774046 - 200 27.91331236535322 -68413.16008042906 301.3181773007303 0.3165912892484031 - 300 32.20115656493125 -68383.19634217303 271.3756381838045 0.2901111802983097 - 400 33.46056398887347 -68355.73057141017 243.9344715501159 0.2548133388123378 - 500 35.5346204243821 -68331.63060947017 219.84946888619 0.2388591367999414 - 600 40.61692458441596 -68309.36124792948 197.5527667607886 0.3056696014124338 - 700 46.20303146200326 -68290.12727395598 178.3775768561405 0.1982123493608406 - 800 50.4375018189932 -68272.72651051797 160.995046695269 0.1708386295858845 - 900 52.17011714635106 -68257.85059865142 146.1567281868867 0.1032829304640776 - 1000 53.49296457217382 -68244.38715993936 132.7166474251702 0.06428993394665769 -Loop time of 5.99191 on 1 procs for 1000 steps with 16000 atoms - -Performance: 1.442 ns/day, 16.644 hours/ns, 166.892 timesteps/s, 2.670 Matom-step/s -100.0% CPU use with 1 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 5.3136 | 5.3136 | 5.3136 | 0.0 | 88.68 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.035738 | 0.035738 | 0.035738 | 0.0 | 0.60 -Output | 0.00049119 | 0.00049119 | 0.00049119 | 0.0 | 0.01 -Modify | 0.62365 | 0.62365 | 0.62365 | 0.0 | 10.41 -Other | | 0.01844 | | | 0.31 - -Nlocal: 16000 ave 16000 max 16000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 13449 ave 13449 max 13449 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 896000 ave 896000 max 896000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 896000 -Ave neighs/atom = 56 -Neighbor list builds = 0 -Dangerous builds = 0 -Total wall time: 0:00:06 diff --git a/examples/ttm/log.1Dec22.ttm.grid.g++.4 b/examples/ttm/log.1Dec22.ttm.grid.g++.4 deleted file mode 100644 index 4bcce5365e0..00000000000 --- a/examples/ttm/log.1Dec22.ttm.grid.g++.4 +++ /dev/null @@ -1,131 +0,0 @@ -LAMMPS (3 Nov 2022) -WARNING: Using I/O redirection is unreliable with parallel runs. Better use -in switch to read input file. (../lammps.cpp:528) -units metal -atom_style atomic -boundary p p p - -variable latc equal 2.87 -lattice bcc ${latc} -lattice bcc 2.87 -Lattice spacing in x,y,z = 2.87 2.87 2.87 -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 10 units lattice -create_box 1 sim_box -Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) - 1 by 2 by 2 MPI processor grid -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 10 units lattice -create_atoms 1 region atom_box -Created 16000 atoms - using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) - create_atoms CPU = 0.001 seconds - -mass 1 55.845 - -pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe -Reading eam/fs potential file FeVoter-ChenRecheck.fs with DATE: 2016-04-04 - -neighbor 2.0 bin -neigh_modify every 5 delay 0 check yes - -fix 1 all nve - -fix twotemp all ttm/grid 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 - -compute pe all pe/atom -compute ke all ke/atom - -timestep 0.0001 -thermo 100 - -thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] - thermo_modify format float "%20.16g" - -# dump output - -#compute 1 all property/grid 10 10 10 id ix iy iz -#dump 1 all grid 100 tmp.dump c_1:grid:data[*] f_twotemp:grid:data -#dump_modify 1 sort 1 - -#compute 10 all property/grid 10 10 10 id xc yc zc -#fix ave all ave/grid 10 10 100 10 10 10 f_twotemp:grid:data - -#dump ave all grid 100 tmp.dump.ave # c_10:grid:data[*] f_ave:grid:data -#dump_modify ave sort id - -#dump vtk all grid/vtk 100 tmp.dump.*.vtk f_ave:grid:data -#dump_modify vtk sort id buffer no - -# run simulation - -run 1000 -Neighbor list info ... - update: every = 5 steps, delay = 0 steps, check = yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 6.524 - ghost atom cutoff = 6.524 - binsize = 3.262, bins = 18 18 18 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair eam/fs, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 4.843 | 4.843 | 4.843 Mbytes - Step Temp TotEng f_twotemp[1] f_twotemp[2] - 0 0 -68483.52254530673 371.9188105082186 0 - 100 16.95536995775684 -68446.64765713879 334.7745598327931 0.3602932995006087 - 200 27.82619298359641 -68413.48663012494 301.6568409464847 0.2922875754523593 - 300 32.28660976355901 -68383.41369945828 271.6030085280586 0.26987388247804 - 400 33.33119316198579 -68356.74598240001 244.9747750036311 0.2061586600914003 - 500 35.14534756499593 -68332.73504057307 220.9328922343961 0.2800368538794578 - 600 39.58922469808521 -68311.03191758461 199.2602622784512 0.2310303196166884 - 700 45.34652315787151 -68291.65247941404 179.9297699858464 0.1438135463248855 - 800 49.66707856481075 -68274.98092841901 163.2540575286425 0.1600890300738265 - 900 52.17692450487317 -68259.8031091165 148.1017576370548 0.1177316234407941 - 1000 54.24228199265479 -68245.58589458198 133.8816957314364 0.1314999893461343 -Loop time of 2.16917 on 4 procs for 1000 steps with 16000 atoms - -Performance: 3.983 ns/day, 6.025 hours/ns, 461.006 timesteps/s, 7.376 Matom-step/s -97.7% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 1.8638 | 1.8826 | 1.9014 | 1.0 | 86.79 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.057938 | 0.076258 | 0.094755 | 4.8 | 3.52 -Output | 0.00020722 | 0.00022911 | 0.00029186 | 0.0 | 0.01 -Modify | 0.20222 | 0.20238 | 0.20256 | 0.0 | 9.33 -Other | | 0.007738 | | | 0.36 - -Nlocal: 4000 ave 4000 max 4000 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 6329 ave 6329 max 6329 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 224000 ave 227436 max 220450 min -Histogram: 1 0 1 0 0 0 0 1 0 1 - -Total # of neighbors = 896000 -Ave neighs/atom = 56 -Neighbor list builds = 0 -Dangerous builds = 0 -Total wall time: 0:00:02 diff --git a/examples/ttm/log.26Aug21.ttm.g++.1 b/examples/ttm/log.26Aug21.ttm.g++.1 deleted file mode 100644 index a0e8ca6f1f5..00000000000 --- a/examples/ttm/log.26Aug21.ttm.g++.1 +++ /dev/null @@ -1,113 +0,0 @@ -LAMMPS (30 Jul 2021) -units metal -atom_style atomic -boundary p p p - -variable latc equal 2.87 -lattice bcc ${latc} -lattice bcc 2.87 -Lattice spacing in x,y,z = 2.8700000 2.8700000 2.8700000 -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 10 units lattice -create_box 1 sim_box -Created orthogonal box = (-28.700000 -28.700000 -28.700000) to (28.700000 28.700000 28.700000) - 1 by 1 by 1 MPI processor grid -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 10 units lattice -create_atoms 1 region atom_box -Created 16000 atoms - using lattice units in orthogonal box = (-28.700000 -28.700000 -28.700000) to (28.700000 28.700000 28.700000) - create_atoms CPU = 0.005 seconds - -mass 1 55.845 - -include pot_iron.mod -pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe - -neighbor 2.0 bin -neigh_modify every 5 delay 0 check yes - -fix 1 all nve - -fix twotemp all ttm 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 - -compute pe all pe/atom -compute ke all ke/atom - -timestep 0.0001 -thermo 100 - -thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] - thermo_modify format float "%20.16g" - -run 1000 -Neighbor list info ... - update every 5 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 6.524 - ghost atom cutoff = 6.524 - binsize = 3.262, bins = 18 18 18 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair eam/fs, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 10.97 | 10.97 | 10.97 Mbytes -Step Temp TotEng f_twotemp[1] f_twotemp[2] - 0 0 -68483.52254543516 371.9188105082105 0 - 100 17.01353086098387 -68446.50228930202 334.6217068813629 0.3763710887774045 - 200 27.91331236535322 -68413.16008042906 301.3181773007303 0.3165912892484031 - 300 32.20115656493127 -68383.19634217303 271.3756381838044 0.2901111802983097 - 400 33.46056398887347 -68355.73057141017 243.9344715501159 0.2548133388123376 - 500 35.5346204243821 -68331.63060947017 219.84946888619 0.2388591367999412 - 600 40.61692458441595 -68309.36124792948 197.5527667607885 0.3056696014124333 - 700 46.20303146200327 -68290.12727395596 178.3775768561404 0.1982123493608405 - 800 50.43750181899325 -68272.72651051797 160.995046695269 0.1708386295858842 - 900 52.1701171463511 -68257.85059865141 146.1567281868866 0.1032829304640773 - 1000 53.49296457217385 -68244.38715993935 132.7166474251701 0.06428993394665879 -Loop time of 17.1447 on 1 procs for 1000 steps with 16000 atoms - -Performance: 0.504 ns/day, 47.624 hours/ns, 58.327 timesteps/s -99.9% CPU use with 1 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 15.811 | 15.811 | 15.811 | 0.0 | 92.22 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.094539 | 0.094539 | 0.094539 | 0.0 | 0.55 -Output | 0.00093974 | 0.00093974 | 0.00093974 | 0.0 | 0.01 -Modify | 1.1898 | 1.1898 | 1.1898 | 0.0 | 6.94 -Other | | 0.04797 | | | 0.28 - -Nlocal: 16000.0 ave 16000 max 16000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 13449.0 ave 13449 max 13449 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 896000.0 ave 896000 max 896000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 896000 -Ave neighs/atom = 56.000000 -Neighbor list builds = 0 -Dangerous builds = 0 -Total wall time: 0:00:17 diff --git a/examples/ttm/log.26Aug21.ttm.g++.4 b/examples/ttm/log.26Aug21.ttm.g++.4 deleted file mode 100644 index 3cc6615ed91..00000000000 --- a/examples/ttm/log.26Aug21.ttm.g++.4 +++ /dev/null @@ -1,113 +0,0 @@ -LAMMPS (30 Jul 2021) -units metal -atom_style atomic -boundary p p p - -variable latc equal 2.87 -lattice bcc ${latc} -lattice bcc 2.87 -Lattice spacing in x,y,z = 2.8700000 2.8700000 2.8700000 -variable xmax equal 10.0 -variable xmin equal -10.0 -variable ymax equal 10.0 -variable ymin equal -10.0 -variable zmax equal 10.0 -variable zmin equal -10.0 - -region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 ${zmax} units lattice -region sim_box block -10 10 -10 10 -10 10 units lattice -create_box 1 sim_box -Created orthogonal box = (-28.700000 -28.700000 -28.700000) to (28.700000 28.700000 28.700000) - 1 by 2 by 2 MPI processor grid -region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 ${zmax} units lattice -region atom_box block -10 10 -10 10 -10 10 units lattice -create_atoms 1 region atom_box -Created 16000 atoms - using lattice units in orthogonal box = (-28.700000 -28.700000 -28.700000) to (28.700000 28.700000 28.700000) - create_atoms CPU = 0.002 seconds - -mass 1 55.845 - -include pot_iron.mod -pair_style eam/fs -pair_coeff * * FeVoter-ChenRecheck.fs Fe - -neighbor 2.0 bin -neigh_modify every 5 delay 0 check yes - -fix 1 all nve - -fix twotemp all ttm 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 - -compute pe all pe/atom -compute ke all ke/atom - -timestep 0.0001 -thermo 100 - -thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] - thermo_modify format float "%20.16g" - -run 1000 -Neighbor list info ... - update every 5 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 6.524 - ghost atom cutoff = 6.524 - binsize = 3.262, bins = 18 18 18 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair eam/fs, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d - bin: standard -Per MPI rank memory allocation (min/avg/max) = 4.845 | 4.845 | 4.845 Mbytes -Step Temp TotEng f_twotemp[1] f_twotemp[2] - 0 0 -68483.52254530673 371.9188105082105 0 - 100 16.95536995775683 -68446.64765713879 334.7745598327934 0.3602932995006091 - 200 27.82619298359641 -68413.48663012494 301.6568409464845 0.2922875754523596 - 300 32.286609763559 -68383.41369945828 271.6030085280584 0.2698738824780399 - 400 33.33119316198579 -68356.74598240001 244.9747750036312 0.2061586600914007 - 500 35.14534756499593 -68332.73504057307 220.9328922343961 0.2800368538794571 - 600 39.58922469808519 -68311.03191758461 199.2602622784512 0.231030319616688 - 700 45.34652315787152 -68291.65247941406 179.9297699858465 0.1438135463248857 - 800 49.66707856481077 -68274.98092841901 163.2540575286428 0.1600890300738259 - 900 52.17692450487316 -68259.8031091165 148.1017576370546 0.1177316234407944 - 1000 54.24228199265477 -68245.58589458199 133.8816957314364 0.1314999893461338 -Loop time of 5.03135 on 4 procs for 1000 steps with 16000 atoms - -Performance: 1.717 ns/day, 13.976 hours/ns, 198.754 timesteps/s -98.8% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.1173 | 4.2634 | 4.3858 | 5.4 | 84.74 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.2218 | 0.34547 | 0.49422 | 19.4 | 6.87 -Output | 0.00031185 | 0.00036952 | 0.00044986 | 0.0 | 0.01 -Modify | 0.39294 | 0.39605 | 0.39877 | 0.4 | 7.87 -Other | | 0.02604 | | | 0.52 - -Nlocal: 4000.00 ave 4000 max 4000 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 6329.00 ave 6329 max 6329 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 224000.0 ave 227436 max 220450 min -Histogram: 1 0 1 0 0 0 0 1 0 1 - -Total # of neighbors = 896000 -Ave neighs/atom = 56.000000 -Neighbor list builds = 0 -Dangerous builds = 0 -Total wall time: 0:00:05 diff --git a/examples/ttm/log.5Nov25.ttm.g++.1 b/examples/ttm/log.5Nov25.ttm.g++.1 new file mode 100644 index 00000000000..a6dc744b274 --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 1 by 1 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.002 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 14.9 | 14.9 | 14.9 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.45357526305 371.9188105082105 0 + 100 16.85926180175369 -65897.42810516304 334.6158453608917 0.3776557424305327 + 200 26.91738143787221 -65864.07903628545 301.307260183661 0.3142967408573022 + 300 30.10335494902046 -65833.92817996511 271.1760102822967 0.2911744217418918 + 400 30.6821460942467 -65806.37794178465 243.6402654262073 0.2761347453692315 + 500 32.46441845318886 -65782.36077145382 219.6412749061558 0.2537002138088616 + 600 37.39842632210484 -65760.12348770068 197.3872590242496 0.2993079650453979 + 700 43.00572093059723 -65740.92227142058 178.2231041079152 0.2353050993586863 + 800 47.91585893000679 -65723.58668950901 160.9301123360765 0.1592282246939696 + 900 51.32805378793761 -65708.72997152478 146.07875754393 0.1560849243086589 + 1000 54.63740064122083 -65695.88617173544 133.2527989169113 0.12775960609359 +Loop time of 11.0016 on 1 procs for 1000 steps with 16000 atoms + +Performance: 0.785 ns/day, 30.560 hours/ns, 90.896 timesteps/s, 1.454 Matom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 10.203 | 10.203 | 10.203 | 0.0 | 92.75 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.039828 | 0.039828 | 0.039828 | 0.0 | 0.36 +Output | 0.00062765 | 0.00062765 | 0.00062765 | 0.0 | 0.01 +Modify | 0.74728 | 0.74728 | 0.74728 | 0.0 | 6.79 +Other | | 0.01042 | | | 0.09 + +Nlocal: 16000 ave 16000 max 16000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 17201 ave 17201 max 17201 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 1.088e+06 ave 1.088e+06 max 1.088e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:11 diff --git a/examples/ttm/log.5Nov25.ttm.g++.4 b/examples/ttm/log.5Nov25.ttm.g++.4 new file mode 100644 index 00000000000..04dc2caf603 --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 2 by 2 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.001 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.037 | 5.037 | 5.037 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.4535754628 371.9188105082105 0 + 100 16.80016730030894 -65897.57625261409 334.7711997056044 0.3621418151154537 + 200 26.84848103084207 -65864.39605153851 301.6347266008477 0.2933241048760688 + 300 30.00882217757777 -65834.42707325556 271.6786568408265 0.2832796758014579 + 400 30.20781334990041 -65807.71375650795 245.00773539838 0.2138026627784766 + 500 32.09861077113437 -65783.42013908036 220.6900170423646 0.2741656726246444 + 600 36.79157815724962 -65761.79596939837 199.1113034240578 0.1952249742260572 + 700 42.72984514810628 -65742.51439832698 179.8612024539893 0.1428818964794121 + 800 47.69532275574724 -65725.53833811254 162.8676245424384 0.1860499472400562 + 900 51.39235313856362 -65710.37089637665 147.7307930108209 0.1335083658114848 + 1000 55.24706882539561 -65695.7753536183 133.1365828777173 0.1391135652700599 +Loop time of 3.40992 on 4 procs for 1000 steps with 16000 atoms + +Performance: 2.534 ns/day, 9.472 hours/ns, 293.262 timesteps/s, 4.692 Matom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.0296 | 3.0961 | 3.1457 | 2.4 | 90.80 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.053256 | 0.10333 | 0.17004 | 13.4 | 3.03 +Output | 0.00021219 | 0.00023882 | 0.00031399 | 0.0 | 0.01 +Modify | 0.20415 | 0.20463 | 0.20533 | 0.1 | 6.00 +Other | | 0.0056 | | | 0.16 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 8281 ave 8281 max 8281 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 272000 ave 276365 max 267580 min +Histogram: 1 0 1 0 0 0 0 1 0 1 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:03 diff --git a/examples/ttm/log.5Nov25.ttm.grid.g++.1 b/examples/ttm/log.5Nov25.ttm.grid.g++.1 new file mode 100644 index 00000000000..3ea466edbd5 --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.grid.g++.1 @@ -0,0 +1,132 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 1 by 1 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.001 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm/grid 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +# dump output + +#compute 1 all property/grid 10 10 10 id ix iy iz +#dump 1 all grid 100 tmp.dump c_1:grid:data[*] f_twotemp:grid:data +#dump_modify 1 sort 1 + +#compute 10 all property/grid 10 10 10 id xc yc zc +#fix ave all ave/grid 10 10 100 10 10 10 f_twotemp:grid:data + +#dump ave all grid 100 tmp.dump.ave # c_10:grid:data[*] f_ave:grid:data +#dump_modify ave sort id + +#dump vtk all grid/vtk 100 tmp.dump.*.vtk f_ave:grid:data +#dump_modify vtk sort id buffer no + +# run simulation + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 14.91 | 14.91 | 14.91 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.45357526305 371.9188105082105 0 + 100 16.85926180175369 -65897.42810516304 334.6158453608917 0.3776557424305327 + 200 26.91738143787221 -65864.07903628545 301.307260183661 0.3142967408573023 + 300 30.10335494902047 -65833.92817996511 271.1760102822967 0.2911744217418918 + 400 30.68214609424669 -65806.37794178465 243.6402654262073 0.2761347453692315 + 500 32.46441845318886 -65782.36077145384 219.6412749061558 0.2537002138088612 + 600 37.39842632210481 -65760.12348770069 197.3872590242496 0.299307965045398 + 700 43.00572093059721 -65740.92227142058 178.2231041079152 0.2353050993586867 + 800 47.91585893000681 -65723.58668950901 160.9301123360766 0.1592282246939695 + 900 51.32805378793766 -65708.72997152479 146.0787575439301 0.1560849243086592 + 1000 54.63740064122084 -65695.88617173549 133.2527989169113 0.1277596060935903 +Loop time of 10.9683 on 1 procs for 1000 steps with 16000 atoms + +Performance: 0.788 ns/day, 30.468 hours/ns, 91.172 timesteps/s, 1.459 Matom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 10.181 | 10.181 | 10.181 | 0.0 | 92.82 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.040187 | 0.040187 | 0.040187 | 0.0 | 0.37 +Output | 0.00061242 | 0.00061242 | 0.00061242 | 0.0 | 0.01 +Modify | 0.73547 | 0.73547 | 0.73547 | 0.0 | 6.71 +Other | | 0.01077 | | | 0.10 + +Nlocal: 16000 ave 16000 max 16000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 17201 ave 17201 max 17201 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 1.088e+06 ave 1.088e+06 max 1.088e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:11 diff --git a/examples/ttm/log.5Nov25.ttm.grid.g++.4 b/examples/ttm/log.5Nov25.ttm.grid.g++.4 new file mode 100644 index 00000000000..e271c122137 --- /dev/null +++ b/examples/ttm/log.5Nov25.ttm.grid.g++.4 @@ -0,0 +1,132 @@ +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-340-ga6cfcad4bf-modified) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. + using 1 OpenMP thread(s) per MPI task +units metal +atom_style atomic +boundary p p p + +variable latc equal 2.87 +lattice bcc ${latc} +lattice bcc 2.87 +Lattice spacing in x,y,z = 2.87 2.87 2.87 +variable xmax equal 10.0 +variable xmin equal -10.0 +variable ymax equal 10.0 +variable ymin equal -10.0 +variable zmax equal 10.0 +variable zmin equal -10.0 + +region sim_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 ${zmax} units lattice +region sim_box block -10 10 -10 10 -10 10 units lattice +create_box 1 sim_box +Created orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + 1 by 2 by 2 MPI processor grid +region atom_box block ${xmin} ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 ${xmax} ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 ${ymin} ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 ${ymax} ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 ${zmin} ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 ${zmax} units lattice +region atom_box block -10 10 -10 10 -10 10 units lattice +create_atoms 1 region atom_box +Created 16000 atoms + using lattice units in orthogonal box = (-28.7 -28.7 -28.7) to (28.7 28.7 28.7) + create_atoms CPU = 0.001 seconds + +mass 1 55.845 + +pair_style eam/fs +pair_coeff * * Fe_mm.eam.fs Fe +Reading eam/fs potential file Fe_mm.eam.fs with DATE: 2007-06-11 + +neighbor 2.0 bin +neigh_modify every 5 delay 0 check yes + +fix 1 all nve + +fix twotemp all ttm/grid 342785 1.2470e-5 0.087614 0.005365 29.5917 47.5679 58.4613 10 10 10 set 1800.0 + +compute pe all pe/atom +compute ke all ke/atom + +timestep 0.0001 +thermo 100 + +thermo_style custom step temp etotal f_twotemp[1] f_twotemp[2] + thermo_modify format float "%20.16g" + +# dump output + +#compute 1 all property/grid 10 10 10 id ix iy iz +#dump 1 all grid 100 tmp.dump c_1:grid:data[*] f_twotemp:grid:data +#dump_modify 1 sort 1 + +#compute 10 all property/grid 10 10 10 id xc yc zc +#fix ave all ave/grid 10 10 100 10 10 10 f_twotemp:grid:data + +#dump ave all grid 100 tmp.dump.ave # c_10:grid:data[*] f_ave:grid:data +#dump_modify ave sort id + +#dump vtk all grid/vtk 100 tmp.dump.*.vtk f_ave:grid:data +#dump_modify vtk sort id buffer no + +# run simulation + +run 1000 +Neighbor list info ... + update: every = 5 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.3 + ghost atom cutoff = 7.3 + binsize = 3.65, bins = 16 16 16 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair eam/fs, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.02 | 5.02 | 5.02 Mbytes + Step Temp TotEng f_twotemp[1] f_twotemp[2] + 0 0 -65934.4535754628 371.9188105082186 0 + 100 16.80016730030895 -65897.57625261409 334.7711997056047 0.3621418151154534 + 200 26.84848103084207 -65864.39605153851 301.6347266008474 0.2933241048760692 + 300 30.00882217757776 -65834.42707325556 271.6786568408268 0.283279675801458 + 400 30.20781334990041 -65807.71375650795 245.0077353983801 0.2138026627784766 + 500 32.09861077113437 -65783.42013908038 220.6900170423649 0.2741656726246446 + 600 36.79157815724962 -65761.79596939837 199.1113034240578 0.1952249742260569 + 700 42.72984514810626 -65742.51439832698 179.8612024539894 0.142881896479412 + 800 47.69532275574723 -65725.53833811254 162.8676245424386 0.1860499472400557 + 900 51.39235313856362 -65710.37089637667 147.730793010821 0.1335083658114838 + 1000 55.24706882539561 -65695.77535361829 133.1365828777173 0.1391135652700604 +Loop time of 3.3997 on 4 procs for 1000 steps with 16000 atoms + +Performance: 2.541 ns/day, 9.444 hours/ns, 294.143 timesteps/s, 4.706 Matom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.0418 | 3.0975 | 3.1392 | 2.0 | 91.11 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.056279 | 0.097861 | 0.15328 | 11.2 | 2.88 +Output | 0.00022399 | 0.0002448 | 0.00030547 | 0.0 | 0.01 +Modify | 0.19792 | 0.1982 | 0.1986 | 0.1 | 5.83 +Other | | 0.005918 | | | 0.17 + +Nlocal: 4000 ave 4000 max 4000 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 8281 ave 8281 max 8281 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 272000 ave 276365 max 267580 min +Histogram: 1 0 1 0 0 0 0 1 0 1 + +Total # of neighbors = 1088000 +Ave neighs/atom = 68 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:03 From da5ada568ffccd9d6c4438357ac9ac1a1f1d6adf Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 11:46:46 -0500 Subject: [PATCH 171/604] whitespace --- src/EXTRA-FIX/fix_ttm_thermal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EXTRA-FIX/fix_ttm_thermal.cpp b/src/EXTRA-FIX/fix_ttm_thermal.cpp index 034e538eaab..296405c818d 100644 --- a/src/EXTRA-FIX/fix_ttm_thermal.cpp +++ b/src/EXTRA-FIX/fix_ttm_thermal.cpp @@ -356,7 +356,7 @@ void FixTTMThermal::end_of_step() if (T_electron[iz][iy][ix] > 0) { T_electron[iz][iy][ix] = - T_electron_old[iz][iy][ix] + + T_electron_old[iz][iy][ix] + inner_dt/c_e_grid[iz][iy][ix] * ((safe_effective_kappa(k_e_grid[iz][iy][xleft],k_e_grid[iz][iy][ix]))* (T_electron_old[iz][iy][xleft]-T_electron_old[iz][iy][ix])/dx/dx*left + From 25ac2a19baeed53c48afebdd24e17e2286008469 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Wed, 5 Nov 2025 12:52:25 -0700 Subject: [PATCH 172/604] Unnecessary asterisk --- doc/src/fix_heat_flow.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/src/fix_heat_flow.rst b/doc/src/fix_heat_flow.rst index aa2b3fbc061..aa7954d1e2a 100644 --- a/doc/src/fix_heat_flow.rst +++ b/doc/src/fix_heat_flow.rst @@ -22,8 +22,6 @@ Syntax *type* = cp1 ... cpN cpN = value of specifc heat for type N (energy/(mass * temperature) units) -* - Examples """""""" From 332e3ccc3b06bfcc816769e7d5fe47c10a215012 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 16:15:53 -0500 Subject: [PATCH 173/604] make fix pimd/* docs consistent with the code and reformat --- doc/src/fix_pimd.rst | 271 ++++++++++++++++++++++++++----------------- 1 file changed, 162 insertions(+), 109 deletions(-) diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 1b7f63ed7e2..5132cc0a2cb 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -40,7 +40,7 @@ Syntax .. parsed-literal:: - *keywords* = *method* or *integrator* or *ensemble* or *fmmode* or *fmass* or *scale* or *temp* or *thermostat* or *tau* or *iso* or *aniso* or *barostat* or *taup* or *fixcom* or *lj* + *keywords* = *method* or *integrator* or *ensemble* or *fmmode* or *fmass* or *scale* or *sp* or *temp* or *thermostat* or *tau* or *iso* or *aniso* or *barostat* or *taup* or *fixcom* or *esynch* *method* value = *nmpimd* (default) or *pimd* *integrator* value = *obabo* or *baoab* *ensemble* value = *nvt* or *nve* or *nph* or *npt* @@ -59,13 +59,7 @@ Syntax *barostat* value = *BZP* or *MTTK* *taup* value = barostat damping parameter (time unit) *fixcom* value = *yes* or *no* - *lj* values = epsilon sigma mass planck mvv2e - epsilon = energy scale for reduced units (energy units) - sigma = length scale for reduced units (length units) - mass = mass scale for reduced units (mass units) - planck = Planck's constant for other unit style - mvv2e = mass * velocity^2 to energy conversion factor for other unit style - *esynch* value = *yes* or *no* (only in *pimd/langevin/bosonic*) + *esynch* value = *yes* or *no* (only in *pimd/langevin/bosonic*) Examples """""""" @@ -196,18 +190,19 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics Mode *pimd* added to fix pimd/langevin. -Fix pimd/langevin supports the *method* values *nmpimd* and *pimd*. The default -value is *nmpimd*. If *method* is *nmpimd*, the normal mode representation is -used to integrate the equations of motion. The exact solution of harmonic -oscillator is used to propagate the free ring polymer part of the Hamiltonian. -If *method* is *pimd*, the Cartesian representation is used to integrate the -equations of motion. The harmonic force is added to the total force of the -system, and the numerical integrator is used to propagate the Hamiltonian. +Fix pimd/langevin supports the *method* values *nmpimd* and *pimd*. The +default value is *nmpimd*. If *method* is *nmpimd*, the normal mode +representation is used to integrate the equations of motion. The exact +solution of harmonic oscillator is used to propagate the free ring +polymer part of the Hamiltonian. If *method* is *pimd*, the Cartesian +representation is used to integrate the equations of motion. The +harmonic force is added to the total force of the system, and the +numerical integrator is used to propagate the Hamiltonian. -Fix *pimd/nvt/bosonic* only supports the *pimd* and *nmpimd* methods. Fix -*pimd/langevin/bosonic* only supports the *pimd* method, which is the default -in this fix. These restrictions are related to the use of normal -modes, which change in bosons. +Fix *pimd/nvt/bosonic* only supports the *pimd* and *nmpimd* +methods. Fix *pimd/langevin/bosonic* only supports the *pimd* method, +which is the default in this fix. These restrictions are related to the +use of normal modes, which change in bosons. The keyword *integrator* specifies the Trotter splitting method used by *fix pimd/langevin*. See :ref:`(Liu) ` for a discussion on the OBABO and BAOAB @@ -238,8 +233,9 @@ If *fmmode* is *normal*, then the fictitious mass is where :math:`\lambda_i` is the eigenvalue of the :math:`i`-th normal mode. -In *pimd/langevin/bosonic*, *fmmode* should not be used, and would raise an error if set to -a value other than *physical*, due to the lack of support for bosonic normal modes. +In *pimd/langevin/bosonic*, *fmmode* should not be used, and would raise +an error if set to a value other than *physical*, due to the lack of +support for bosonic normal modes. .. note:: @@ -248,72 +244,115 @@ a value other than *physical*, due to the lack of support for bosonic normal mod (:math:`\sum_{i=1}^P \frac{1}{2}m\omega_P^2(q_i - q_{i+1})^2`, :math:`m` is always the actual mass of the particles). -The keyword *sp* is a scaling factor on Planck's constant. Scaling the Planck's constant means modifying the "quantumness" of the PIMD simulation. Using the physical value of Planck's constant corresponds to a fully quantum simulation, and 0 corresponds to the classical limit. -For unit styles other than *lj*, the default value of 1.0 is appropriate for most situations. -For *lj* units, a fully quantum simulation translates into setting *sp* to the de Boer quantumness parameter :math:`\Lambda^{\ast}` (see :ref:`de Boer `): +.. versionchanged:: TBD + + *sp* keyword added to *fix pimd/langevin* + +The keyword *sp* is a scaling factor on Planck's constant. Scaling the +Planck's constant means modifying the "quantumness" of the PIMD +simulation. Using the physical value of Planck's constant corresponds to +a fully quantum simulation, and 0 corresponds to the classical limit. +For unit styles other than *lj*, the default value of 1.0 is appropriate +for most situations. For *lj* units, a fully quantum simulation +translates into setting *sp* to the de Boer quantumness parameter +:math:`\Lambda^{\ast}` (see :ref:`de Boer `): .. math:: \Lambda^{\ast}=h/\sigma\sqrt{m\varepsilon} -where :math:`h` is Planck's constant, :math:`\sigma` is the length scale, :math:`\epsilon` is the energy scale, and :math:`m` is the mass of the particles. -For example, for Neon, :math:`m = 20.1797` Dalton, :math:`\varepsilon = 3.0747 \times 10^{-3}` eV and :math:`\sigma = 2.7616` Å. Then we have +where :math:`h` is Planck's constant, :math:`\sigma` is the length +scale, :math:`\epsilon` is the energy scale, and :math:`m` is the mass +of the particles. For example, for Neon, :math:`m = 20.1797` Dalton, +:math:`\varepsilon = 3.0747 \times 10^{-3}` eV and :math:`\sigma = +2.7616` Å. Then we have .. math:: \Lambda^{\ast} = \frac{4.135667403\times 10^{-3}\ \mathrm{eV} \cdot\ \mathrm{ps}}{2.7616\ \mathrm{Å}\times \sqrt{20.1797\ \mathrm{Dalton}\times\ 3.0747\times 10^{-3}\ \mathrm{eV}\times 1.0364269\times 10^{-4}\ \mathrm{eV}\cdot\mathrm{Dalton}^{-1}\cdot\mathrm{Å}^{-2}\cdot\mathrm{ps}^{2}}} = 0.600. -Thus for a fully quantum simulation of Neon using *lj* units, *sp* should be set to 0.600. -The modification of the quantumness should be done by scaling :math:`\Lambda^{\ast}`. +Thus for a fully quantum simulation of Neon using *lj* units, *sp* +should be set to 0.600. The modification of the quantumness should be +done by scaling :math:`\Lambda^{\ast}`. -The keyword *ensemble* for fix style *pimd/langevin* determines which ensemble is it -going to sample. The value can be *nve* (microcanonical), *nvt* (canonical), *nph* (isoenthalpic), -and *npt* (isothermal-isobaric). -Fix *pimd/langevin/bosonic* currently does not support *ensemble* other than *nve*, *nvt*. +The keyword *ensemble* for fix style *pimd/langevin* determines which +ensemble is it going to sample. The value can be *nve* (microcanonical), +*nvt* (canonical), *nph* (isoenthalpic), and *npt* +(isothermal-isobaric). Fix *pimd/langevin/bosonic* currently does not +support *ensemble* other than *nve*, *nvt*. -The keyword *temp* specifies temperature parameter for fix styles *pimd/nvt* and *pimd/langevin*. It should read -a positive floating-point number. +The keyword *temp* specifies temperature parameter for fix styles +*pimd/nvt* and *pimd/langevin*. It should read a positive floating-point +number. .. note:: - For pimd simulations, a temperature values should be specified even for nve ensemble. Temperature will make a difference - for nve pimd, since the spring elastic frequency between the beads will be affected by the temperature. + For pimd simulations, a temperature values should be specified even + for nve ensemble. Temperature will make a difference for nve pimd, + since the spring elastic frequency between the beads will be affected + by the temperature. -The keyword *thermostat* reads *style* and *seed* of thermostat for fix style *pimd/langevin*. -*style* can only be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti `), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. +The keyword *thermostat* reads *style* and *seed* of thermostat for fix +style *pimd/langevin*. *style* can only be *PILE_L* (path integral +Langevin equation local thermostat, as described in :ref:`Ceriotti +`), and *seed* should a positive integer number, which serves +as the seed of the pseudo random number generator. .. note:: The fix style *pimd/langevin* uses the stochastic PILE_L thermostat to control temperature. This thermostat works on the normal modes of the ring polymer. The *tau* parameter controls the centroid mode, and the *scale* parameter controls the non-centroid modes. -The keyword *tau* specifies the thermostat damping time parameter for fix style *pimd/langevin*. It is in time unit. It only works on the centroid mode. - -The keyword *scale* specifies a scaling parameter for the damping times of the non-centroid modes for fix style *pimd/langevin*. The default -damping time of the non-centroid mode :math:`i` is :math:`\frac{P}{\beta\hbar}\sqrt{\lambda_i\times\mathrm{fmass}}` (*fmmode* is *physical*) or :math:`\frac{P}{\beta\hbar}\sqrt{\mathrm{fmass}}` (*fmmode* is *normal*). The damping times of all non-centroid modes are the default values divided by *scale*. This keyword should be used only with *method*=*nmpimd*. - -The barostat parameters for fix style *pimd/langevin* with *npt* or *nph* ensemble is specified using one of *iso* and *aniso* -keywords. A *pressure* value should be given with pressure unit. The keyword *iso* means couple all 3 diagonal components together when pressure is computed (hydrostatic pressure), and dilate/contract the dimensions together. The keyword *aniso* means x, y, and z dimensions are controlled independently using the Pxx, Pyy, and Pzz components of the stress tensor as the driving forces, and the specified scalar external pressure. -These parameters are not supported in *pimd/langevin/bosonic*. - -The keyword *barostat* reads *style* of barostat for fix style *pimd/langevin*. *style* can -be *BZP* (Bussi-Zykova-Parrinello, as described in :ref:`Bussi `) or *MTTK* (Martyna-Tuckerman-Tobias-Klein, as described in :ref:`Martyna1 ` and :ref:`Martyna2 `). - -The keyword *taup* specifies the barostat damping time parameter for fix style *pimd/langevin*. It is in time unit. It is not supported in *pimd/langevin/bosonic*. - -The keyword *fixcom* specifies whether the center-of-mass of the extended ring-polymer system is fixed during the pimd simulation. -Once *fixcom* is set to be *yes*, the center-of-mass velocity will be distracted from the centroid-mode velocities in each step. - -The keyword *lj* should be used if :doc:`lj units ` is used for *fix pimd/langevin*. Typically one may want to use -reduced units to run the simulation, and then convert the results into some physical units (for example, :doc:`metal units `). In this case, the 5 quantities in the physical mass units are needed: epsilon (energy scale), sigma (length scale), mass, Planck's constant, mvv2e (mass * velocity^2 to energy conversion factor). Planck's constant and mvv2e can be found in src/update.cpp. If there is no need to convert reduced units to physical units, you can omit the keyword *lj* and these five values will be set to 1. - -Fix *pimd/langevin/bosonic* also has a keyword not available in fix *pimd/langevin*: *esynch*, with default *yes*. If set to *no*, some time consuming synchronization of spring energies and the primitive kinetic energy estimator between processors is avoided. +The keyword *tau* specifies the thermostat damping time parameter for +fix style *pimd/langevin*. It is in time unit. It only works on the +centroid mode. + +The keyword *scale* specifies a scaling parameter for the damping times +of the non-centroid modes for fix style *pimd/langevin*. The default +damping time of the non-centroid mode :math:`i` is +:math:`\frac{P}{\beta\hbar}\sqrt{\lambda_i\times\mathrm{fmass}}` +(*fmmode* is *physical*) or +:math:`\frac{P}{\beta\hbar}\sqrt{\mathrm{fmass}}` (*fmmode* is +*normal*). The damping times of all non-centroid modes are the default +values divided by *scale*. This keyword should be used only with +*method*=*nmpimd*. + +The barostat parameters for fix style *pimd/langevin* with *npt* or +*nph* ensemble is specified using one of *iso* and *aniso* keywords. A +*pressure* value should be given with pressure unit. The keyword *iso* +means couple all 3 diagonal components together when pressure is +computed (hydrostatic pressure), and dilate/contract the dimensions +together. The keyword *aniso* means x, y, and z dimensions are +controlled independently using the Pxx, Pyy, and Pzz components of the +stress tensor as the driving forces, and the specified scalar external +pressure. These parameters are not supported in +*pimd/langevin/bosonic*. + +The keyword *barostat* reads *style* of barostat for fix style +*pimd/langevin*. *style* can be *BZP* (Bussi-Zykova-Parrinello, as +described in :ref:`Bussi `) or *MTTK* +(Martyna-Tuckerman-Tobias-Klein, as described in :ref:`Martyna1 +` and :ref:`Martyna2 `). + +The keyword *taup* specifies the barostat damping time parameter for fix +style *pimd/langevin*. It is in time unit. It is not supported in +*pimd/langevin/bosonic*. + +The keyword *fixcom* specifies whether the center-of-mass of the +extended ring-polymer system is fixed during the pimd simulation. Once +*fixcom* is set to be *yes*, the center-of-mass velocity will be +distracted from the centroid-mode velocities in each step. + +Fix *pimd/langevin/bosonic* also has a keyword not available in fix +*pimd/langevin*: *esynch*, with default *yes*. If set to *no*, some time +consuming synchronization of spring energies and the primitive kinetic +energy estimator between processors is avoided. The PIMD algorithm in LAMMPS is implemented as a hyper-parallel scheme -as described in :ref:`Calhoun `. In LAMMPS this is done by using -:doc:`multi-replica feature ` in LAMMPS, where each -quasi-particle system is stored and simulated on a separate partition -of processors. The following diagram illustrates this approach. The +as described in :ref:`Calhoun `. In LAMMPS this is done by +using :doc:`multi-replica feature ` in LAMMPS, where each +quasi-particle system is stored and simulated on a separate partition of +processors. The following diagram illustrates this approach. The original system with 2 ring polymers is shown in red. Since each ring has 4 quasi-beads (imaginary time slices), there are 4 replicas of the system, each running on one of the 4 partitions of processors. Each @@ -322,24 +361,25 @@ replica (shown in green) owns one quasi-bead in each ring. .. image:: JPG/pimd.jpg :align: center -To run a PIMD simulation with M quasi-beads in each ring polymer using -N MPI tasks for each partition's domain-decomposition, you would use P -= MxN processors (cores) and run the simulation as follows: +To run a PIMD simulation with M quasi-beads in each ring polymer using N +MPI tasks for each partition's domain-decomposition, you would use P = +MxN processors (cores) and run the simulation as follows: .. code-block:: bash mpirun -np P lmp_mpi -partition MxN -in script Note that in the LAMMPS input script for a multi-partition simulation, -it is often very useful to define a :doc:`uloop-style variable ` such as +it is often very useful to define a :doc:`uloop-style variable +` such as .. code-block:: LAMMPS variable ibead uloop M pad where M is the number of quasi-beads (partitions) used in the -calculation. The uloop variable can then be used to manage I/O -related tasks for each of the partitions, e.g. +calculation. The uloop variable can then be used to manage I/O related +tasks for each of the partitions, e.g. .. code-block:: LAMMPS @@ -350,12 +390,14 @@ related tasks for each of the partitions, e.g. .. note:: - Fix *pimd/langevin* dumps the Cartesian coordinates, but dumps the velocities and - forces in the normal mode representation. If the Cartesian velocities and forces are - needed, it is easy to perform the transformation when doing post-processing. + Fix *pimd/langevin* dumps the Cartesian coordinates, but dumps the + velocities and forces in the normal mode representation. If the + Cartesian velocities and forces are needed, it is easy to perform the + transformation when doing post-processing. - It is recommended to dump the image flags (*ix iy iz*) for fix *pimd/langevin*. It - will be useful if you want to calculate some estimators during post-processing. + It is recommended to dump the image flags (*ix iy iz*) for fix + *pimd/langevin*. It will be useful if you want to calculate some + estimators during post-processing. Major differences of *fix pimd/nvt* and *fix pimd/langevin* are: @@ -366,8 +408,9 @@ Major differences of *fix pimd/nvt* and *fix pimd/langevin* are: #. *Fix pimd/langevin* allows multiple processes for each bead. For *fix pimd/nvt*, there is a large chance that multi-process tasks for each bead may fail. #. The dump of *fix pimd/nvt* are all Cartesian. *Fix pimd/langevin* dumps normal-mode velocities and forces, and Cartesian coordinates. -Initially, the inter-replica communication and normal mode transformation parts of *fix pimd/langevin* are written based on -those of *fix pimd/nvt*, but are significantly revised. +Initially, the inter-replica communication and normal mode +transformation parts of *fix pimd/langevin* are written based on those +of *fix pimd/nvt*, but are significantly revised. Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -379,13 +422,13 @@ a fix in an input script that reads a restart file, so that the operation of the fix continues in an uninterrupted fashion. Fix *pimd/langevin* writes the state of the barostat overall beads to -:doc:`binary restart files `. Since it uses a stochastic thermostat, -the state of the thermostat is not written. However, the state of the system -can be restored by reading the restart file, except that it will re-initialize -the random number generator. +:doc:`binary restart files `. Since it uses a stochastic +thermostat, the state of the thermostat is not written. However, the +state of the system can be restored by reading the restart file, except +that it will re-initialize the random number generator. -None of the :doc:`fix_modify ` options -are relevant to fix pimd/nvt. +None of the :doc:`fix_modify ` options are relevant to fix +pimd/nvt. Fix *pimd/nvt* computes a global 3-vector, which can be accessed by various :doc:`output commands `. The three quantities in @@ -399,13 +442,18 @@ the global vector are: The vector values calculated by fix *pimd/nvt* are "extensive", except for the temperature, which is "intensive". -Fix *pimd/nvt/bosonic* computes a global 4-vector. The first three are the same as in *pimd/nvt* (the justification for the correctness of the virial estimator for bosons appears in the supporting information of :ref:`(Hirshberg2) `). The fourth is the current value of the scalar primitive estimator for the kinetic energy of the quantum system :ref:`(Hirshberg1) `. +Fix *pimd/nvt/bosonic* computes a global 4-vector. The first three are +the same as in *pimd/nvt* (the justification for the correctness of the +virial estimator for bosons appears in the supporting information of +:ref:`(Hirshberg2) `). The fourth is the current +value of the scalar primitive estimator for the kinetic energy of the +quantum system :ref:`(Hirshberg1) `. -Fix *pimd/langevin* computes a global vector of quantities, which -can be accessed by various :doc:`output commands `. Note that -it outputs multiple log files, and different log files contain information -about different beads or modes (see detailed explanations below). If *ensemble* -is *nve* or *nvt*, the vector has 10 values: +Fix *pimd/langevin* computes a global vector of quantities, which can be +accessed by various :doc:`output commands `. Note that it +outputs multiple log files, and different log files contain information +about different beads or modes (see detailed explanations below). If +*ensemble* is *nve* or *nvt*, the vector has 10 values: #. kinetic energy of the bead (if *method*=*pimd*) or normal mode (if *method*=*nmpimd*) #. spring elastic energy of the bead (if *method*=*pimd*) or normal mode (if *method*=*nmpimd*) @@ -418,10 +466,11 @@ is *nve* or *nvt*, the vector has 10 values: #. thermodynamic pressure estimator #. centroid-virial pressure estimator -The first 3 are different for different log files, and the others are the same for different log files. +The first 3 are different for different log files, and the others are +the same for different log files. -If *ensemble* is *nph* or *npt*, the vector stores internal variables of the barostat. If *iso* is used, -the vector has 15 values: +If *ensemble* is *nph* or *npt*, the vector stores internal variables of +the barostat. If *iso* is used, the vector has 15 values: #. kinetic energy of the normal mode #. spring elastic energy of the normal mode @@ -439,7 +488,8 @@ the vector has 15 values: #. barostat cell Jacobian #. enthalpy of the extended system (sum of 4, 12, 13, and 14; conserved if *ensemble* is *nph*) -If *aniso* or *x* or *y* or *z* is used for the barostat, the vector has 17 values: +If *aniso* or *x* or *y* or *z* is used for the barostat, the vector has +17 values: #. kinetic energy of the normal mode #. spring elastic energy of the normal mode @@ -459,8 +509,8 @@ If *aniso* or *x* or *y* or *z* is used for the barostat, the vector has 17 valu #. barostat cell Jacobian #. enthalpy of the extended system (sum of 4, 14, 15, and 16; conserved if *ensemble* is *nph*) -Fix *pimd/langevin/bosonic* computes a global 6-vector. The quantities in the -global vector are: +Fix *pimd/langevin/bosonic* computes a global 6-vector. The quantities +in the global vector are: #. kinetic energy of the beads, #. spring elastic energy of the beads, @@ -479,16 +529,17 @@ classical energy can then be obtained by adding the sum of second output over all log files. All vector values calculated by fix *pimd/langevin/bosonic* are "extensive". -For both *pimd/nvt/bosonic* and *pimd/langevin/bosonic*, the contribution of the -exterior spring to the primitive estimator is printed to the first log -file. The contribution of the :math:`P-1` interior springs is printed -to the other :math:`P-1` log files. The contribution of the constant -:math:`\frac{PdN}{2 \beta}` (with :math:`d` being the dimensionality) is -equally divided over log files. +For both *pimd/nvt/bosonic* and *pimd/langevin/bosonic*, the +contribution of the exterior spring to the primitive estimator is +printed to the first log file. The contribution of the :math:`P-1` +interior springs is printed to the other :math:`P-1` log files. The +contribution of the constant :math:`\frac{PdN}{2 \beta}` (with :math:`d` +being the dimensionality) is equally divided over log files. -No parameter of fix *pimd/nvt* or *pimd/langevin* can be used with the *start/stop* keywords -of the :doc:`run ` command. Fix *pimd/nvt* or *pimd/langevin* is not invoked during -:doc:`energy minimization `. +No parameter of fix *pimd/nvt* or *pimd/langevin* can be used with the +*start/stop* keywords of the :doc:`run ` command. Fix *pimd/nvt* +or *pimd/langevin* is not invoked during :doc:`energy minimization +`. Restrictions """""""""""" @@ -502,8 +553,8 @@ Fix *pimd/langevin* can be used with :doc:`lj units `. See the documentation above for how to use it. Only some combinations of fix styles and their options support -partitions with multiple processors. LAMMPS will stop with an -error if multi-processor partitions are not supported. +partitions with multiple processors. LAMMPS will stop with an error if +multi-processor partitions are not supported. A PIMD simulation can be initialized with a single data file read via the :doc:`read_data ` command. However, this means all @@ -525,11 +576,13 @@ Related commands Default """"""" -The keyword defaults for fix *pimd/nvt* are method = pimd, fmass = 1.0, sp -= 1.0, temp = 300.0, and nhc = 2. +The keyword defaults for fix *pimd/nvt* are method = pimd, fmass = 1.0, +sp = 1.0, temp = 300.0, and nhc = 2. -The keyword defaults for fix *pimd/langevin* are integrator = obabo, method = nmpimd, ensemble = nvt, fmmode = physical, fmass = 1.0, -scale = 1, temp = 298.15, thermostat = PILE_L, tau = 1.0, iso = 1.0, taup = 1.0, barostat = BZP, fixcom = yes, and lj = 1 for all its arguments. +The keyword defaults for fix *pimd/langevin* are integrator = obabo, +method = nmpimd, ensemble = nvt, fmmode = physical, fmass = 1.0, scale = +1, temp = 298.15, thermostat = PILE_L, tau = 1.0, iso = 1.0, taup = 1.0, +barostat = BZP, fixcom = yes, and sp = 1.0 for all its arguments. ---------- From 153ad7b65a0dd4db642d029bbcd57784ea5bc3ee Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 5 Nov 2025 20:26:23 -0500 Subject: [PATCH 174/604] run some GitHub workflow automated tests also on the maintenance branch --- .github/workflows/check-cpp23.yml | 1 + .github/workflows/check-vla.yml | 1 + .github/workflows/compile-msvc.yml | 1 + .github/workflows/style-check.yml | 1 + .github/workflows/unittest-linux.yml | 1 + .github/workflows/unittest-macos.yml | 1 + 6 files changed, 6 insertions(+) diff --git a/.github/workflows/check-cpp23.yml b/.github/workflows/check-cpp23.yml index dfda1a4da83..91e13401a1a 100644 --- a/.github/workflows/check-cpp23.yml +++ b/.github/workflows/check-cpp23.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop diff --git a/.github/workflows/check-vla.yml b/.github/workflows/check-vla.yml index b08985442f6..dfb9a096b02 100644 --- a/.github/workflows/check-vla.yml +++ b/.github/workflows/check-vla.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop diff --git a/.github/workflows/compile-msvc.yml b/.github/workflows/compile-msvc.yml index 5e525678acc..cfb223f2fa1 100644 --- a/.github/workflows/compile-msvc.yml +++ b/.github/workflows/compile-msvc.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 12b4c6e2ab7..838f9790987 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop diff --git a/.github/workflows/unittest-linux.yml b/.github/workflows/unittest-linux.yml index e79c715d130..5d77f29d77b 100644 --- a/.github/workflows/unittest-linux.yml +++ b/.github/workflows/unittest-linux.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml index bb3695fd67b..e9fc21a0878 100644 --- a/.github/workflows/unittest-macos.yml +++ b/.github/workflows/unittest-macos.yml @@ -5,6 +5,7 @@ on: push: branches: - develop + - maintenance pull_request: branches: - develop From dd559e4d07ed03d2d49b991b414d793b64bc226c Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Wed, 5 Nov 2025 21:33:55 -0500 Subject: [PATCH 175/604] update example langevin_reduced_units_group --- examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp b/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp index 59660226b94..a9291bcc17d 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/in.lmp @@ -18,7 +18,7 @@ group virtual_atom type 2 timestep 0.00044905847 -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} thermo_style custom step temp f_1[*] vol press thermo 10 From 74ce8fa1444d46e0037923d06fdbd49a8c6c44d0 Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Wed, 5 Nov 2025 21:38:21 -0500 Subject: [PATCH 176/604] update log files --- .../log.31Oct25.langevin.reduced.group.g++ | 2 +- .../log.31Oct25.langevin.reduced.group.g++.0 | 51 ++++++++++--------- .../log.31Oct25.langevin.reduced.group.g++.1 | 47 ++++++++--------- .../log.31Oct25.langevin.reduced.group.g++.2 | 45 ++++++++-------- .../log.31Oct25.langevin.reduced.group.g++.3 | 47 ++++++++--------- 5 files changed, 98 insertions(+), 94 deletions(-) diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ index 6bbb1e127f0..b280a3db209 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++ @@ -1,2 +1,2 @@ -LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-385-g4714d8fc87) Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 index ee04f26d22a..ef01b2ca59c 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.0 @@ -1,5 +1,6 @@ -LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-385-g4714d8fc87) Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. using 1 OpenMP thread(s) per MPI task variable ibead uloop 4 pad @@ -14,7 +15,7 @@ Reading data file ... 1 by 1 by 1 MPI processor grid reading atoms ... 200 atoms - read_data CPU = 0.002 seconds + read_data CPU = 0.001 seconds pair_coeff * * 1.0 1.0 pair_modify shift yes @@ -29,8 +30,8 @@ group virtual_atom type 2 timestep 0.00044905847 -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 1 +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 1 thermo_style custom step temp f_1[*] vol press thermo 10 @@ -45,9 +46,9 @@ Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule Initializing PI Langevin equation thermostat... Bead ID | omega | tau | c1 | c2 0 0.00000000e+00 1.00000000e+00 9.99775496e-01 2.11886210e-02 - 1 1.86362182e+02 2.68294777e-03 9.19718608e-01 3.92578249e-01 - 2 2.63555925e+02 1.89713056e-03 8.88383268e-01 4.59102569e-01 - 3 1.86362182e+02 2.68294777e-03 9.19718608e-01 3.92578249e-01 + 1 1.86375706e+02 2.68275308e-03 9.19713023e-01 3.92591335e-01 + 2 2.63575052e+02 1.89699289e-03 8.88375638e-01 4.59117333e-01 + 3 1.86375706e+02 2.68275308e-03 9.19713023e-01 3.92591335e-01 PILE_L thermostat successfully initialized! Neighbor list info ... @@ -65,30 +66,30 @@ Neighbor list info ... Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press 0 0 0 0 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 - 10 0.038411443 11.465816 0 -875.32113 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 -1.3493346 - 20 0.081832749 24.427076 0 -874.86623 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 -1.3008247 - 30 0.13467173 40.199511 0 -876.21109 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 -1.3087995 - 40 0.18759421 55.996873 0 -876.90255 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 -1.2986964 - 50 0.24981124 74.568656 0 -876.85585 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 -1.2560492 - 60 0.30820757 91.999959 0 -878.23845 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 -1.2551723 - 70 0.37793499 112.81359 0 -881.46154 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 -1.2724164 - 80 0.43487408 129.80991 0 -884.65404 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 -1.2869644 - 90 0.50355318 150.31062 0 -887.38385 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 -1.2951988 - 100 0.57003618 170.1558 0 -891.05656 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 -1.3254094 -Loop time of 0.134782 on 1 procs for 100 steps with 200 atoms + 10 0.038411443 11.465816 0 -875.32114 607.35164 882.79327 -320.26623 288.2661 0.15779338 -2.5239404 -1.3657413 341.38937 -1.3493345 + 20 0.081832744 24.427074 0 -874.86638 2124.5945 531.89382 -307.45159 289.26032 -2.5831586 -2.3962011 -1.3530669 341.38937 -1.3008277 + 30 0.1346717 40.199503 0 -876.21138 2754.5415 400.34522 -299.15173 289.68836 -3.6107134 -2.3455211 -1.3579805 341.38937 -1.3088054 + 40 0.18759415 55.996852 0 -876.90281 3083.9611 337.74565 -297.11938 289.80669 -4.0996923 -2.3657142 -1.3758725 341.38937 -1.298701 + 50 0.2498111 74.568614 0 -876.85624 3326.3374 307.07562 -295.85336 289.91582 -4.3392625 -2.376709 -1.4004763 341.38937 -1.2560075 + 60 0.30820734 91.999891 0 -878.23892 3365.2012 333.37849 -293.70956 289.91798 -4.1338051 -2.3839596 -1.4311286 341.38937 -1.2551804 + 70 0.37793463 112.81349 0 -881.46213 3584.46 288.23481 -276.80315 290.05161 -4.4864321 -2.5011786 -1.4616792 341.38937 -1.2724259 + 80 0.43487362 129.80977 0 -884.6546 3284.7027 302.99827 -249.81444 289.95097 -4.3711116 -2.9102009 -1.4995513 341.38937 -1.2869731 + 90 0.50355257 150.31044 0 -887.38441 3359.4223 309.73339 -213.62144 289.97647 -4.3185021 -2.9826863 -1.5347979 341.38937 -1.295208 + 100 0.5700354 170.15557 0 -891.05699 3139.839 355.2448 -170.81276 289.79697 -3.9630025 -3.1960291 -1.5721301 341.38937 -1.3254168 +Loop time of 0.0572354 on 1 procs for 100 steps with 200 atoms -Performance: 28786.267 tau/day, 741.940 timesteps/s, 148.388 katom-step/s -95.5% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 67787.883 tau/day, 1747.171 timesteps/s, 349.434 katom-step/s +97.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.016075 | 0.016075 | 0.016075 | 0.0 | 11.93 +Pair | 0.0092236 | 0.0092236 | 0.0092236 | 0.0 | 16.12 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00062061 | 0.00062061 | 0.00062061 | 0.0 | 0.46 -Output | 0.09909 | 0.09909 | 0.09909 | 0.0 | 73.52 -Modify | 0.018774 | 0.018774 | 0.018774 | 0.0 | 13.93 -Other | | 0.0002219 | | | 0.16 +Comm | 0.00028943 | 0.00028943 | 0.00028943 | 0.0 | 0.51 +Output | 0.036748 | 0.036748 | 0.036748 | 0.0 | 64.20 +Modify | 0.010874 | 0.010874 | 0.010874 | 0.0 | 19.00 +Other | | 0.0001005 | | | 0.18 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 index c80a127b1ce..2db8609de0e 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.1 @@ -1,5 +1,6 @@ -LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-385-g4714d8fc87) Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. using 1 OpenMP thread(s) per MPI task variable ibead uloop 4 pad @@ -14,7 +15,7 @@ Reading data file ... 1 by 1 by 1 MPI processor grid reading atoms ... 200 atoms - read_data CPU = 0.003 seconds + read_data CPU = 0.001 seconds pair_coeff * * 1.0 1.0 pair_modify shift yes @@ -29,8 +30,8 @@ group virtual_atom type 2 timestep 0.00044905847 -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 2 +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 2 thermo_style custom step temp f_1[*] vol press thermo 10 @@ -56,31 +57,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press - 0 0 0 4.6054235e-24 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 - 10 3.6888879 1101.1331 285.67287 -874.75939 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.78530277 - 20 3.4940756 1042.9816 790.68515 -876.17255 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.64433263 - 30 3.352259 1000.6493 988.97192 -876.91853 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.55573019 - 40 3.7408936 1116.6567 1007.0387 -877.86197 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.77082307 - 50 3.7919069 1131.8842 1072.1488 -879.36819 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.77700475 - 60 3.7086239 1107.0242 1050.5562 -881.73393 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.6957508 - 70 3.8176119 1139.5572 1054.2695 -882.29721 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.74600469 - 80 3.3021658 985.69649 1087.6174 -884.90368 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.38965467 - 90 3.8785892 1157.7589 1017.7424 -887.88222 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.67569569 - 100 3.7316631 1113.9014 996.70261 -890.75765 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.51681218 -Loop time of 0.134755 on 1 procs for 100 steps with 200 atoms + 0 0 0 4.6060919e-24 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 3.6889033 1101.1376 285.71122 -874.7594 607.35164 882.79327 -320.26623 288.2661 0.15779338 -2.5239404 -1.3657413 341.38937 0.78531143 + 20 3.4940988 1042.9885 790.74267 -876.17269 2124.5945 531.89382 -307.45159 289.26032 -2.5831586 -2.3962011 -1.3530669 341.38937 0.64434378 + 30 3.3522964 1000.6605 989.00222 -876.91868 2754.5415 400.34522 -299.15173 289.68836 -3.6107134 -2.3455211 -1.3579805 341.38937 0.55574961 + 40 3.740928 1116.667 1007.0484 -877.86218 3083.9611 337.74565 -297.11938 289.80669 -4.0996923 -2.3657142 -1.3758725 341.38937 0.77083963 + 50 3.7919129 1131.886 1072.1596 -879.36846 3326.3374 307.07562 -295.85336 289.91582 -4.3392625 -2.376709 -1.4004763 341.38937 0.77700325 + 60 3.7085955 1107.0158 1050.5649 -881.73422 3365.2012 333.37849 -293.70956 289.91798 -4.1338051 -2.3839596 -1.4311286 341.38937 0.69572857 + 70 3.8176135 1139.5576 1054.2782 -882.29755 3584.46 288.23481 -276.80315 290.05161 -4.4864321 -2.5011786 -1.4616792 341.38937 0.74599832 + 80 3.3021595 985.6946 1087.6232 -884.90411 3284.7027 302.99827 -249.81444 289.95097 -4.3711116 -2.9102009 -1.4995513 341.38937 0.38964214 + 90 3.8786041 1157.7633 1017.7405 -887.88267 3359.4223 309.73339 -213.62144 289.97647 -4.3185021 -2.9826863 -1.5347979 341.38937 0.67569558 + 100 3.7316837 1113.9076 996.70742 -890.75805 3139.839 355.2448 -170.81276 289.79697 -3.9630025 -3.1960291 -1.5721301 341.38937 0.51681647 +Loop time of 0.0572386 on 1 procs for 100 steps with 200 atoms -Performance: 28791.973 tau/day, 742.087 timesteps/s, 148.417 katom-step/s -95.0% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 67784.045 tau/day, 1747.072 timesteps/s, 349.414 katom-step/s +99.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.015986 | 0.015986 | 0.015986 | 0.0 | 11.86 +Pair | 0.0091238 | 0.0091238 | 0.0091238 | 0.0 | 15.94 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00061376 | 0.00061376 | 0.00061376 | 0.0 | 0.46 -Output | 0.095962 | 0.095962 | 0.095962 | 0.0 | 71.21 -Modify | 0.021985 | 0.021985 | 0.021985 | 0.0 | 16.31 -Other | | 0.0002084 | | | 0.15 +Comm | 0.00028425 | 0.00028425 | 0.00028425 | 0.0 | 0.50 +Output | 0.034156 | 0.034156 | 0.034156 | 0.0 | 59.67 +Modify | 0.013586 | 0.013586 | 0.013586 | 0.0 | 23.74 +Other | | 8.885e-05 | | | 0.16 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 index f3937759c1b..117ee0db6e7 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.2 @@ -1,5 +1,6 @@ -LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-385-g4714d8fc87) Processor partition = 2 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. using 1 OpenMP thread(s) per MPI task variable ibead uloop 4 pad @@ -14,7 +15,7 @@ Reading data file ... 1 by 1 by 1 MPI processor grid reading atoms ... 200 atoms - read_data CPU = 0.003 seconds + read_data CPU = 0.001 seconds pair_coeff * * 1.0 1.0 pair_modify shift yes @@ -29,8 +30,8 @@ group virtual_atom type 2 timestep 0.00044905847 -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 3 +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 3 thermo_style custom step temp f_1[*] vol press thermo 10 @@ -57,30 +58,30 @@ Neighbor list info ... Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press 0 0 0 0 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 - 10 3.2100461 958.19875 487.33842 -874.71922 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.49377608 - 20 3.7003852 1104.565 863.96941 -872.98129 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.79081263 - 30 3.8240127 1141.4678 1032.9144 -873.28849 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.86681413 - 40 3.5116515 1048.228 1123.5034 -876.6311 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.65012938 - 50 3.6895722 1101.3373 1143.8268 -880.66063 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.68747857 - 60 4.0395555 1205.8073 1105.4001 -883.62527 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.84147104 - 70 4.0847536 1219.2989 1195.2014 -886.86715 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.80491283 - 80 3.6472645 1088.7085 1187.7184 -888.21493 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.51747741 - 90 3.6311826 1083.908 1173.2906 -889.91457 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.47096547 - 100 4.0525189 1209.6769 997.93435 -892.34535 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.6877229 -Loop time of 0.134911 on 1 procs for 100 steps with 200 atoms + 10 3.2100516 958.2004 487.3899 -874.71924 607.35164 882.79327 -320.26623 288.2661 0.15779338 -2.5239404 -1.3657413 341.38937 0.49377893 + 20 3.7004281 1104.5778 864.00196 -872.98147 2124.5945 531.89382 -307.45159 289.26032 -2.5831586 -2.3962011 -1.3530669 341.38937 0.79083579 + 30 3.8240183 1141.4695 1032.9297 -873.28891 2754.5415 400.34522 -299.15173 289.68836 -3.6107134 -2.3455211 -1.3579805 341.38937 0.86681287 + 40 3.5116357 1048.2233 1123.5156 -876.63162 3083.9611 337.74565 -297.11938 289.80669 -4.0996923 -2.3657142 -1.3758725 341.38937 0.65011359 + 50 3.6896088 1101.3482 1143.8222 -880.66105 3326.3374 307.07562 -295.85336 289.91582 -4.3392625 -2.376709 -1.4004763 341.38937 0.68749352 + 60 4.0395542 1205.8069 1105.3957 -883.62552 3365.2012 333.37849 -293.70956 289.91798 -4.1338051 -2.3839596 -1.4311286 341.38937 0.84146614 + 70 4.0847702 1219.3039 1195.2086 -886.86728 3584.46 288.23481 -276.80315 290.05161 -4.4864321 -2.5011786 -1.4616792 341.38937 0.8049205 + 80 3.6472589 1088.7068 1187.7165 -888.21501 3284.7027 302.99827 -249.81444 289.95097 -4.3711116 -2.9102009 -1.4995513 341.38937 0.51747319 + 90 3.6312106 1083.9164 1173.2819 -889.9147 3359.4223 309.73339 -213.62144 289.97647 -4.3185021 -2.9826863 -1.5347979 341.38937 0.47098068 + 100 4.0525008 1209.6715 997.92904 -892.34552 3139.839 355.2448 -170.81276 289.79697 -3.9630025 -3.1960291 -1.5721301 341.38937 0.68771054 +Loop time of 0.0572731 on 1 procs for 100 steps with 200 atoms -Performance: 28758.748 tau/day, 741.231 timesteps/s, 148.246 katom-step/s -92.3% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 67743.209 tau/day, 1746.020 timesteps/s, 349.204 katom-step/s +95.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.015957 | 0.015957 | 0.015957 | 0.0 | 11.83 +Pair | 0.0091062 | 0.0091062 | 0.0091062 | 0.0 | 15.90 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00061673 | 0.00061673 | 0.00061673 | 0.0 | 0.46 -Output | 0.095947 | 0.095947 | 0.095947 | 0.0 | 71.12 -Modify | 0.022177 | 0.022177 | 0.022177 | 0.0 | 16.44 -Other | | 0.0002129 | | | 0.16 +Comm | 0.00030389 | 0.00030389 | 0.00030389 | 0.0 | 0.53 +Output | 0.034873 | 0.034873 | 0.034873 | 0.0 | 60.89 +Modify | 0.0129 | 0.0129 | 0.0129 | 0.0 | 22.52 +Other | | 8.977e-05 | | | 0.16 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 index f7c1d899f0c..7a371ecb1dd 100644 --- a/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 +++ b/examples/PACKAGES/pimd/langevin_reduced_units_group/log.31Oct25.langevin.reduced.group.g++.3 @@ -1,5 +1,6 @@ -LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-15-gdec0919f2b-modified) +LAMMPS (10 Sep 2025 - Development - patch_10Sep2025-385-g4714d8fc87) Processor partition = 3 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. using 1 OpenMP thread(s) per MPI task variable ibead uloop 4 pad @@ -14,7 +15,7 @@ Reading data file ... 1 by 1 by 1 MPI processor grid reading atoms ... 200 atoms - read_data CPU = 0.003 seconds + read_data CPU = 0.001 seconds pair_coeff * * 1.0 1.0 pair_modify shift yes @@ -29,8 +30,8 @@ group virtual_atom type 2 timestep 0.00044905847 -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L ${ibead} -fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 lj 0.00965188 3.4 39.948 4.135667403e-3 1.03646168908e-4 thermostat PILE_L 4 +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L ${ibead} +fix 1 real_atom pimd/langevin ensemble nvt integrator obabo temp 1.00888 sp 0.1924 thermostat PILE_L 4 thermo_style custom step temp f_1[*] vol press thermo 10 @@ -56,31 +57,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.117 | 3.117 | 3.117 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press - 0 0 0 2.2103965e-23 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 - 10 3.2431068 968.06738 296.17847 -876.03789 607.21712 882.82576 -320.26623 288.26611 0.15804719 -2.5239438 -1.365741 341.38937 0.51346639 - 20 3.2818989 979.64681 818.11437 -875.95192 2124.4174 531.93097 -307.45149 289.26046 -2.5828684 -2.3962201 -1.3530643 341.38937 0.54670855 - 30 3.6057557 1076.3181 977.15059 -876.80428 2754.4492 400.36397 -299.15054 289.68861 -3.6105669 -2.3455224 -1.3579759 341.38937 0.72247086 - 40 3.7618847 1122.9226 1118.9264 -877.96302 3083.914 337.75607 -297.11804 289.80699 -4.0996108 -2.3657005 -1.3758669 341.38937 0.77694116 - 50 3.8979994 1163.5528 1156.2033 -880.32726 3326.3099 307.07848 -295.83069 289.91613 -4.3392402 -2.3767521 -1.4004822 341.38937 0.80424078 - 60 4.0856511 1219.5669 1111.0176 -882.56588 3365.2087 333.37974 -293.70987 289.91831 -4.1337953 -2.3839273 -1.4311227 341.38937 0.87289367 - 70 4.0217913 1200.5047 1198.0635 -884.65317 3584.4298 288.23959 -276.80409 290.05196 -4.4863948 -2.5011684 -1.4616728 341.38937 0.80584971 - 80 4.1454238 1237.409 1113.1635 -887.63937 3284.7112 302.99837 -249.81432 289.95132 -4.3711108 -2.9101708 -1.4995449 341.38937 0.83007064 - 90 3.8894042 1160.9871 1170.5358 -889.93317 3359.4197 309.73099 -213.621 289.97683 -4.3185208 -2.982677 -1.5347915 341.38937 0.63498319 - 100 3.4570929 1031.9422 1184.8761 -891.18508 3139.8448 355.24492 -170.81232 289.7973 -3.9630016 -3.1960037 -1.5721242 341.38937 0.36734878 -Loop time of 0.134747 on 1 procs for 100 steps with 200 atoms + 0 0 0 2.2107173e-23 -875.67022 -3502.6809 1150.1232 -318.66378 287.5308 2.2459657 -5.5241869 -1.3810467 341.38937 -1.3810467 + 10 3.2431014 968.06577 296.21861 -876.03791 607.35164 882.79327 -320.26623 288.2661 0.15779338 -2.5239404 -1.3657413 341.38937 0.51346275 + 20 3.2819301 979.65614 818.17287 -875.952 2124.5945 531.89382 -307.45159 289.26032 -2.5831586 -2.3962011 -1.3530669 341.38937 0.54672464 + 30 3.605774 1076.3235 977.18002 -876.8044 2754.5415 400.34522 -299.15173 289.68836 -3.6107134 -2.3455211 -1.3579805 341.38937 0.72247782 + 40 3.7618881 1122.9236 1118.9462 -877.96323 3083.9611 337.74565 -297.11938 289.80669 -4.0996923 -2.3657142 -1.3758725 341.38937 0.7769378 + 50 3.8980153 1163.5576 1156.2085 -880.32749 3326.3374 307.07562 -295.85336 289.91582 -4.3392625 -2.376709 -1.4004763 341.38937 0.80424562 + 60 4.0856433 1219.5645 1111.0183 -882.5661 3365.2012 333.37849 -293.70956 289.91798 -4.1338051 -2.3839596 -1.4311286 341.38937 0.87288567 + 70 4.0218152 1200.5118 1198.0667 -884.65344 3584.46 288.23481 -276.80315 290.05161 -4.4864321 -2.5011786 -1.4616792 341.38937 0.80585984 + 80 4.1454112 1237.4052 1113.16 -887.63967 3284.7027 302.99827 -249.81444 289.95097 -4.3711116 -2.9102009 -1.4995513 341.38937 0.83005872 + 90 3.8894075 1160.9881 1170.5368 -889.9334 3359.4223 309.73339 -213.62144 289.97647 -4.3185021 -2.9826863 -1.5347979 341.38937 0.63498128 + 100 3.4570744 1031.9367 1184.8771 -891.18533 3139.839 355.2448 -170.81276 289.79697 -3.9630025 -3.1960291 -1.5721301 341.38937 0.36733397 +Loop time of 0.0572246 on 1 procs for 100 steps with 200 atoms -Performance: 28793.701 tau/day, 742.131 timesteps/s, 148.426 katom-step/s -95.4% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 67800.640 tau/day, 1747.500 timesteps/s, 349.500 katom-step/s +97.0% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.016175 | 0.016175 | 0.016175 | 0.0 | 12.00 +Pair | 0.0090429 | 0.0090429 | 0.0090429 | 0.0 | 15.80 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.0006406 | 0.0006406 | 0.0006406 | 0.0 | 0.48 -Output | 0.097484 | 0.097484 | 0.097484 | 0.0 | 72.35 -Modify | 0.020224 | 0.020224 | 0.020224 | 0.0 | 15.01 -Other | | 0.0002238 | | | 0.17 +Comm | 0.00028691 | 0.00028691 | 0.00028691 | 0.0 | 0.50 +Output | 0.034074 | 0.034074 | 0.034074 | 0.0 | 59.54 +Modify | 0.013731 | 0.013731 | 0.013731 | 0.0 | 23.99 +Other | | 9.008e-05 | | | 0.16 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 From 4fd91c0023b8f199dc911e940711404cfb291a20 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 6 Nov 2025 12:27:59 -0700 Subject: [PATCH 177/604] Restore expected behavior for thermo variables ecouple and econserve with thermo_modify norm yes --- src/thermo.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/thermo.cpp b/src/thermo.cpp index 5798a50ea8f..fb52da2db62 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -2122,6 +2122,7 @@ void Thermo::compute_enthalpy() void Thermo::compute_ecouple() { dvalue = modify->energy_couple(); + if (normflag) dvalue /= natoms; } /* ---------------------------------------------------------------------- */ From 8d666de3f4cb7eebf99b5f12190b2a5b21f17b1d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 6 Nov 2025 12:37:44 -0700 Subject: [PATCH 178/604] Port compute temp/com to KOKKOS --- doc/src/Commands_compute.rst | 2 +- doc/src/compute_temp_com.rst | 9 + src/KOKKOS/Install.sh | 2 + src/KOKKOS/compute_temp_com_kokkos.cpp | 246 ++++++++++++++++++++++ src/KOKKOS/compute_temp_com_kokkos.h | 101 +++++++++ src/KOKKOS/compute_temp_deform_kokkos.cpp | 7 +- src/KOKKOS/compute_temp_deform_kokkos.h | 8 +- src/KOKKOS/compute_temp_kokkos.h | 3 +- src/compute_temp_com.cpp | 3 +- src/compute_temp_com.h | 2 +- 10 files changed, 371 insertions(+), 12 deletions(-) create mode 100644 src/KOKKOS/compute_temp_com_kokkos.cpp create mode 100644 src/KOKKOS/compute_temp_com_kokkos.h diff --git a/doc/src/Commands_compute.rst b/doc/src/Commands_compute.rst index c61501e693f..62cb2b219ac 100644 --- a/doc/src/Commands_compute.rst +++ b/doc/src/Commands_compute.rst @@ -161,7 +161,7 @@ KOKKOS, o = OPENMP, t = OPT. * :doc:`temp/asphere ` * :doc:`temp/body ` * :doc:`temp/chunk ` - * :doc:`temp/com ` + * :doc:`temp/com (k) ` * :doc:`temp/cs ` * :doc:`temp/deform (k) ` * :doc:`temp/deform/eff ` diff --git a/doc/src/compute_temp_com.rst b/doc/src/compute_temp_com.rst index e94c911dfa4..c2044491063 100644 --- a/doc/src/compute_temp_com.rst +++ b/doc/src/compute_temp_com.rst @@ -1,8 +1,11 @@ .. index:: compute temp/com +.. index:: compute temp/com/kk compute temp/com command ======================== +Accelerator Variants: *temp/com/kk* + Syntax """""" @@ -82,6 +85,12 @@ See the :doc:`Howto thermostat ` page for a discussion of different ways to compute temperature and perform thermostatting. +---------- + +.. include:: accel_styles.rst + +---------- + Output info """"""""""" diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index b9dc332afe3..2c6f9b5e0be 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -114,6 +114,8 @@ action compute_sna_grid_local_kokkos.h compute_sna_grid_local.h action compute_sna_grid_local_kokkos_impl.h compute_sna_grid_local.cpp action compute_gaussian_grid_local_kokkos.cpp compute_gaussian_grid_local.cpp action compute_gaussian_grid_local_kokkos.h compute_gaussian_grid_local.h +action compute_temp_com_kokkos.cpp +action compute_temp_com_kokkos.h action compute_temp_deform_kokkos.cpp action compute_temp_deform_kokkos.h action compute_temp_kokkos.cpp diff --git a/src/KOKKOS/compute_temp_com_kokkos.cpp b/src/KOKKOS/compute_temp_com_kokkos.cpp new file mode 100644 index 00000000000..78857b3fada --- /dev/null +++ b/src/KOKKOS/compute_temp_com_kokkos.cpp @@ -0,0 +1,246 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "compute_temp_com_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "group_kokkos.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +ComputeTempCOMKokkos::ComputeTempCOMKokkos(LAMMPS *lmp, int narg, char **arg) : + ComputeTempCOM(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + groupKK = (GroupKokkos *) group; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = V_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +double ComputeTempCOMKokkos::compute_scalar() +{ + atomKK->sync(execution_space,datamask_read); + atomKK->k_mass.sync(); + + invoked_scalar = update->ntimestep; + + if (dynamic) masstotal = groupKK->mass_kk(igroup); + groupKK->vcm_kk(igroup,masstotal,vbias); + + v = atomKK->k_v.view(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view(); + else + mass = atomKK->k_mass.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + double t = 0.0; + CTEMP t_kk; + + copymode = 1; + if (atomKK->rmass) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + else + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + copymode = 0; + + t = t_kk.t0; + + MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world); + if (dynamic) dof_compute(); + if (dof < 0.0 && natoms_temp > 0.0) + error->all(FLERR,"Temperature compute degrees of freedom < 0"); + scalar *= tfactor; + + return scalar; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ComputeTempCOMKokkos::operator()(TagComputeTempCOMScalar, const int &i, CTEMP& t_kk) const { + + KK_FLOAT vthermal[3]; + + vthermal[0] = v(i,0) - vbias[0]; + vthermal[1] = v(i,1) - vbias[1]; + vthermal[2] = v(i,2) - vbias[2]; + if (RMASS) { + if (mask[i] & groupbit) + t_kk.t0 += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + vthermal[2]*vthermal[2]) * rmass[i]; + } else { + if (mask[i] & groupbit) + t_kk.t0 += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + vthermal[2]*vthermal[2]) * mass[type[i]]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void ComputeTempCOMKokkos::compute_vector() +{ + atomKK->sync(execution_space,datamask_read); + atomKK->k_mass.sync(); + + int i; + + invoked_vector = update->ntimestep; + + if (dynamic) masstotal = groupKK->mass_kk(igroup); + groupKK->vcm_kk(igroup,masstotal,vbias); + + v = atomKK->k_v.view(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view(); + else + mass = atomKK->k_mass.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + double t[6]; + for (i = 0; i < 6; i++) t[i] = 0.0; + CTEMP t_kk; + + copymode = 1; + if (atomKK->rmass) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + else + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + copymode = 0; + + t[0] = t_kk.t0; + t[1] = t_kk.t1; + t[2] = t_kk.t2; + t[3] = t_kk.t3; + t[4] = t_kk.t4; + t[5] = t_kk.t5; + + MPI_Allreduce(t,vector,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) vector[i] *= force->mvv2e; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ComputeTempCOMKokkos::operator()(TagComputeTempCOMVector, const int &i, CTEMP& t_kk) const { + + KK_FLOAT vthermal[3]; + + vthermal[0] = v(i,0) - vbias[0]; + vthermal[1] = v(i,1) - vbias[1]; + vthermal[2] = v(i,2) - vbias[2]; + + if (mask[i] & groupbit) { + KK_FLOAT massone = 0.0; + if (RMASS) massone = rmass[i]; + else massone = mass[type[i]]; + t_kk.t0 += massone * vthermal[0]*vthermal[0]; + t_kk.t1 += massone * vthermal[1]*vthermal[1]; + t_kk.t2 += massone * vthermal[2]*vthermal[2]; + t_kk.t3 += massone * vthermal[0]*vthermal[1]; + t_kk.t4 += massone * vthermal[0]*vthermal[2]; + t_kk.t5 += massone * vthermal[1]*vthermal[2]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void ComputeTempCOMKokkos::remove_bias_all() +{ + remove_bias_all_kk(); + atomKK->sync(Host,V_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template +void ComputeTempCOMKokkos::remove_bias_all_kk() +{ + atomKK->sync(execution_space,V_MASK|MASK_MASK); + v = atomKK->k_v.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; + + atomKK->modified(execution_space,V_MASK); +} + +template +KOKKOS_INLINE_FUNCTION +void ComputeTempCOMKokkos::operator()(TagComputeTempCOMRemoveBias, const int &i) const { + if (mask[i] & groupbit) { + v(i,0) -= vbias[0]; + v(i,1) -= vbias[1]; + v(i,2) -= vbias[2]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void ComputeTempCOMKokkos::restore_bias_all() +{ + atomKK->sync(execution_space,V_MASK|MASK_MASK); + v = atomKK->k_v.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; + + atomKK->modified(execution_space,V_MASK); +} + +template +KOKKOS_INLINE_FUNCTION +void ComputeTempCOMKokkos::operator()(TagComputeTempCOMRestoreBias, const int &i) const { + if (mask[i] & groupbit) { + v(i,0) += vbias[0]; + v(i,1) += vbias[1]; + v(i,2) += vbias[2]; + } +} + +namespace LAMMPS_NS { +template class ComputeTempCOMKokkos; +#ifdef LMP_KOKKOS_GPU +template class ComputeTempCOMKokkos; +#endif +} diff --git a/src/KOKKOS/compute_temp_com_kokkos.h b/src/KOKKOS/compute_temp_com_kokkos.h new file mode 100644 index 00000000000..1c4e03ddcef --- /dev/null +++ b/src/KOKKOS/compute_temp_com_kokkos.h @@ -0,0 +1,101 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(temp/com/kk,ComputeTempCOMKokkos); +ComputeStyle(temp/com/kk/device,ComputeTempCOMKokkos); +ComputeStyle(temp/com/kk/host,ComputeTempCOMKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_COMPUTE_TEMP_COM_KOKKOS_H +#define LMP_COMPUTE_TEMP_COM_KOKKOS_H + +#include "compute_temp_com.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +struct TagComputeTempCOMScalar{}; + +template +struct TagComputeTempCOMVector{}; + +struct TagComputeTempCOMRemoveBias{}; +struct TagComputeTempCOMRestoreBias{}; + +template +class ComputeTempCOMKokkos : public ComputeTempCOM { + public: + + struct s_CTEMP { + double t0, t1, t2, t3, t4, t5; + KOKKOS_INLINE_FUNCTION + s_CTEMP() { + t0 = t1 = t2 = t3 = t4 = t5 = 0.0; + } + KOKKOS_INLINE_FUNCTION + s_CTEMP& operator+=(const s_CTEMP &rhs) { + t0 += rhs.t0; + t1 += rhs.t1; + t2 += rhs.t2; + t3 += rhs.t3; + t4 += rhs.t4; + t5 += rhs.t5; + return *this; + } + }; + + typedef s_CTEMP CTEMP; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef CTEMP value_type; + + ComputeTempCOMKokkos(class LAMMPS *, int, char **); + double compute_scalar() override; + void compute_vector() override; + void remove_bias_all() override; + void remove_bias_all_kk() override; + void restore_bias_all() override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempCOMScalar, const int&, CTEMP&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempCOMVector, const int&, CTEMP&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempCOMRemoveBias, const int &i) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempCOMRestoreBias, const int &i) const; + + protected: + typename AT::t_kkfloat_1d_3 v; + typename AT::t_kkfloat_1d_randomread rmass; + typename AT::t_kkfloat_1d_randomread mass; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread mask; + + class GroupKokkos *groupKK; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 5a630538bd8..d6698dc43e9 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -39,7 +39,7 @@ ComputeTempDeformKokkos::ComputeTempDeformKokkos(LAMMPS *lmp, int na domainKK = (DomainKokkos *) domain; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = V_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; + datamask_read = X_MASK | V_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; datamask_modify = EMPTY_MASK; maxbias = 0; @@ -119,6 +119,7 @@ template void ComputeTempDeformKokkos::compute_vector() { atomKK->sync(execution_space,datamask_read); + atomKK->k_mass.sync(); int i; @@ -203,7 +204,7 @@ void ComputeTempDeformKokkos::remove_bias_all() template void ComputeTempDeformKokkos::remove_bias_all_kk() { - atomKK->sync(execution_space,X_MASK|V_MASK); + atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK); v = atomKK->k_v.view(); x = atomKK->k_x.view(); mask = atomKK->k_mask.view(); @@ -246,7 +247,7 @@ void ComputeTempDeformKokkos::operator()(TagComputeTempDeformRemoveB template void ComputeTempDeformKokkos::restore_bias_all() { - atomKK->sync(execution_space,V_MASK); + atomKK->sync(execution_space,V_MASK|MASK_MASK); v = atomKK->k_v.view(); mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; diff --git a/src/KOKKOS/compute_temp_deform_kokkos.h b/src/KOKKOS/compute_temp_deform_kokkos.h index 1c4f9597e47..f70df8ba944 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.h +++ b/src/KOKKOS/compute_temp_deform_kokkos.h @@ -42,6 +42,7 @@ struct TagComputeTempDeformRestoreBias{}; template class ComputeTempDeformKokkos: public ComputeTempDeform { public: + struct s_CTEMP { double t0, t1, t2, t3, t4, t5; KOKKOS_INLINE_FUNCTION @@ -61,9 +62,9 @@ class ComputeTempDeformKokkos: public ComputeTempDeform { }; typedef s_CTEMP CTEMP; - typedef CTEMP value_type; typedef DeviceType device_type; typedef ArrayTypes AT; + typedef CTEMP value_type; ComputeTempDeformKokkos(class LAMMPS *, int, char **); double compute_scalar() override; @@ -99,10 +100,9 @@ class ComputeTempDeformKokkos: public ComputeTempDeform { Few h_rate, h_ratelo; - }; +}; -} +} // namespace LAMMPS_NS #endif #endif - diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index e56eea5ba5c..f7699bb23c9 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -62,7 +62,6 @@ class ComputeTempKokkos : public ComputeTemp { typedef CTEMP value_type; ComputeTempKokkos(class LAMMPS *, int, char **); - double compute_scalar() override; void compute_vector() override; @@ -82,7 +81,7 @@ class ComputeTempKokkos : public ComputeTemp { typename AT::t_int_1d_randomread mask; }; -} +} // namespace LAMMPS_NS #endif #endif diff --git a/src/compute_temp_com.cpp b/src/compute_temp_com.cpp index a6d38a81675..08d82dab356 100644 --- a/src/compute_temp_com.cpp +++ b/src/compute_temp_com.cpp @@ -44,7 +44,8 @@ ComputeTempCOM::ComputeTempCOM(LAMMPS *lmp, int narg, char **arg) : ComputeTempCOM::~ComputeTempCOM() { - delete [] vector; + if (!copymode) + delete [] vector; } /* ---------------------------------------------------------------------- */ diff --git a/src/compute_temp_com.h b/src/compute_temp_com.h index 5ccefda99c8..46044bb6679 100644 --- a/src/compute_temp_com.h +++ b/src/compute_temp_com.h @@ -40,7 +40,7 @@ class ComputeTempCOM : public Compute { void restore_bias_all() override; void restore_bias_thr(int, double *, double *) override; - private: + protected: double tfactor, masstotal; void dof_compute(); From 69ea54e5244018095f0fe3b30a24fe363af43b9e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 6 Nov 2025 12:37:56 -0700 Subject: [PATCH 179/604] Fix bug in fix langevin/kk as suggested by @coolchameleon42 --- src/KOKKOS/fix_langevin_kokkos.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 14374328dd1..f91ec3bfa1b 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -348,6 +348,9 @@ void FixLangevinKokkos::post_force(int /*vflag*/) Kokkos::parallel_for(nlocal,post_functor); } + // f is modified by post_force functor + atomKK->modified(execution_space,datamask_modify); + if (tbiasflag == BIAS) { if (temperature->kokkosable) temperature->restore_bias_all(); else { @@ -374,7 +377,7 @@ void FixLangevinKokkos::post_force(int /*vflag*/) FixLangevinKokkosZeroForceFunctor zero_functor(this); Kokkos::parallel_for(nlocal,zero_functor); } - // f is modified by both post_force and zero_force functors + // f is modified by zero_force functor atomKK->modified(execution_space,datamask_modify); // thermostat omega and angmom From 785cbdf8ee5619e74e8c044699fdbaa46d3b4905 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 6 Nov 2025 22:10:21 -0500 Subject: [PATCH 180/604] small doc update for fix gcmc --- doc/src/fix_gcmc.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/src/fix_gcmc.rst b/doc/src/fix_gcmc.rst index 792f9a1ba94..84f482fc95b 100644 --- a/doc/src/fix_gcmc.rst +++ b/doc/src/fix_gcmc.rst @@ -460,8 +460,9 @@ of the specified chemical potential. Note that very lengthy simulations involving insertions/deletions of billions of gas molecules may run out of atom or molecule IDs and trigger an error, so it is better to run multiple shorter-duration -simulations. Likewise, very large molecules have not been tested and -may turn out to be problematic. +simulations. The :doc:`reset_atoms ` command can be used +to "compress" the atom and molecule IDs between runs. Likewise, very +large molecules have not been tested and may turn out to be problematic. Use of multiple *fix gcmc* commands in the same input script can be problematic if using a template molecule. The issue is that the From 3d4759e50c0337d84df8f6f8c1dc9be3325f2720 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 6 Nov 2025 22:11:54 -0500 Subject: [PATCH 181/604] improve error messages --- src/MC/fix_gcmc.cpp | 188 ++++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 96 deletions(-) diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index f724a865f6a..835d3486a19 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -77,7 +77,7 @@ FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) : if (narg < 11) utils::missing_cmd_args(FLERR, "fix gcmc", error); if (atom->molecular == Atom::TEMPLATE) - error->all(FLERR,"Fix gcmc does not (yet) work with atom_style template"); + error->all(FLERR, Error::NOPOINTER, "Fix gcmc does not (yet) work with atom_style template"); dynamic_group_allow = 1; @@ -103,34 +103,36 @@ FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) : chemical_potential = utils::numeric(FLERR, arg[9], false, lmp); displace = utils::numeric(FLERR, arg[10], false, lmp); - if (nevery <= 0) error->all(FLERR, "Illegal fix gcmc command"); - if (nexchanges < 0) error->all(FLERR, "Illegal fix gcmc command"); - if (nmcmoves < 0) error->all(FLERR, "Illegal fix gcmc command"); - if (seed <= 0) error->all(FLERR, "Illegal fix gcmc command"); + if (nevery <= 0) error->all(FLERR, 3, "Fix gcmc nevery value must be > 0"); + if (nexchanges < 0) error->all(FLERR, 4, "Fix gcmc nexchanges value must be >= 0"); + if (nmcmoves < 0) error->all(FLERR, 5, "Fix gcmc nmcmoves value must be >= 0"); + if (seed <= 0) error->all(FLERR, 7, "Fix gcmc random seed must be > 0"); if (reservoir_temperature < 0.0) - error->all(FLERR, "Illegal fix gcmc command"); - if (displace < 0.0) error->all(FLERR, "Illegal fix gcmc command"); + error->all(FLERR, 8, "Fix gcmc gas reservoir temperature must be >= 0"); + if (displace < 0.0) + error->all(FLERR, 10, "Fix gcmc translation displacement distance must be >= 0"); // read options from end of input line - options(narg-11,&arg[11]); + options(narg - 11, &arg[11]); // random number generator, same for all procs - random_equal = new RanPark(lmp,seed); + random_equal = new RanPark(lmp, seed); // random number generator, not the same for all procs - random_unequal = new RanPark(lmp,seed); + random_unequal = new RanPark(lmp, seed); // error checks on region and its extent being inside simulation box region_xlo = region_xhi = region_ylo = region_yhi = region_zlo = region_zhi = 0.0; if (region) { if (region->bboxflag == 0) - error->all(FLERR,"Fix gcmc region does not support a bounding box"); + error->all(FLERR, Error::NOPOINTER, "Fix gcmc region {} does not support a bounding box", + idregion); if (region->dynamic_check()) - error->all(FLERR,"Fix gcmc region cannot be dynamic"); + error->all(FLERR, Error::NOPOINTER, "Fix gcmc region {} cannot be dynamic", idregion); region_xlo = region->extent_xlo; region_xhi = region->extent_xhi; @@ -230,8 +232,6 @@ FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) : void FixGCMC::options(int narg, char **arg) { - if (narg < 0) error->all(FLERR,"Illegal fix gcmc command"); - // defaults exchmode = EXCHATOM; @@ -240,7 +240,7 @@ void FixGCMC::options(int narg, char **arg) pmoltrans = 0.0; pmolrotate = 0.0; pmctot = 0.0; - max_rotation_angle = 10*MY_PI/180; + max_rotation_angle = 10 * MY_PI / 180; region_volume = 0; max_region_attempts = 1000; molecule_group = 0; @@ -273,119 +273,117 @@ void FixGCMC::options(int narg, char **arg) int iarg = 0; while (iarg < narg) { - if (strcmp(arg[iarg],"mol") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - imol = atom->find_molecule(arg[iarg+1]); + if (strcmp(arg[iarg], "mol") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc", error); + imol = atom->find_molecule(arg[iarg + 1]); if (imol == -1) - error->all(FLERR,"Molecule template ID for fix gcmc does not exist"); + error->all(FLERR, iarg + 1, "Molecule template ID {} for fix gcmc does not exist", + arg[iarg + 1]); if (atom->molecules[imol]->nset > 1 && comm->me == 0) - error->warning(FLERR,"Molecule template for " - "fix gcmc has multiple molecules"); + error->warning(FLERR, "Molecule template for fix gcmc has multiple molecules"); exchmode = EXCHMOL; onemols = atom->molecules; nmol = onemols[imol]->nset; iarg += 2; - } else if (strcmp(arg[iarg],"mcmoves") == 0) { - if (iarg+4 > narg) error->all(FLERR,"Illegal fix gcmc command"); - patomtrans = utils::numeric(FLERR,arg[iarg+1],false,lmp); - pmoltrans = utils::numeric(FLERR,arg[iarg+2],false,lmp); - pmolrotate = utils::numeric(FLERR,arg[iarg+3],false,lmp); + } else if (strcmp(arg[iarg], "mcmoves") == 0) { + if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix gcmc mcmoves", error); + patomtrans = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + pmoltrans = utils::numeric(FLERR, arg[iarg + 2], false, lmp); + pmolrotate = utils::numeric(FLERR, arg[iarg + 3], false, lmp); if (patomtrans < 0 || pmoltrans < 0 || pmolrotate < 0) - error->all(FLERR,"Illegal fix gcmc command"); + error->all(FLERR, "Illegal fix gcmc mcmoves parameters"); pmctot = patomtrans + pmoltrans + pmolrotate; - if (pmctot <= 0) - error->all(FLERR,"Illegal fix gcmc command"); + if (pmctot <= 0) error->all(FLERR, "Illegal fix gcmc mcmoves parameters"); iarg += 4; - } else if (strcmp(arg[iarg],"region") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - region = domain->get_region_by_id(arg[iarg+1]); - if (!region) error->all(FLERR,"Region {} for fix gcmc does not exist",arg[iarg+1]); - idregion = utils::strdup(arg[iarg+1]); + } else if (strcmp(arg[iarg], "region") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc region", error); + region = domain->get_region_by_id(arg[iarg + 1]); + if (!region) + error->all(FLERR, iarg + 1, "Region {} for fix gcmc does not exist", arg[iarg + 1]); + idregion = utils::strdup(arg[iarg + 1]); iarg += 2; - } else if (strcmp(arg[iarg],"maxangle") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - max_rotation_angle = utils::numeric(FLERR,arg[iarg+1],false,lmp); - max_rotation_angle *= MY_PI/180; + } else if (strcmp(arg[iarg], "maxangle") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc maxangle", error); + max_rotation_angle = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + max_rotation_angle *= MY_PI / 180; iarg += 2; - } else if (strcmp(arg[iarg],"pressure") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - pressure = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "pressure") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc pressure", error); + pressure = utils::numeric(FLERR, arg[iarg + 1], false, lmp); pressure_flag = true; iarg += 2; - } else if (strcmp(arg[iarg],"fugacity_coeff") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - fugacity_coeff = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "fugacity_coeff") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc fugacity_coeff", error); + fugacity_coeff = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"charge") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - charge = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "charge") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc charge", error); + charge = utils::numeric(FLERR, arg[iarg + 1], false, lmp); charge_flag = true; iarg += 2; - } else if (strcmp(arg[iarg],"rigid") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - delete [] idrigid; - idrigid = utils::strdup(arg[iarg+1]); + } else if (strcmp(arg[iarg], "rigid") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc rigid", error); + delete[] idrigid; + idrigid = utils::strdup(arg[iarg + 1]); rigidflag = 1; iarg += 2; - } else if (strcmp(arg[iarg],"shake") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - delete [] idshake; - idshake = utils::strdup(arg[iarg+1]); + } else if (strcmp(arg[iarg], "shake") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc shake", error); + delete[] idshake; + idshake = utils::strdup(arg[iarg + 1]); shakeflag = 1; iarg += 2; - } else if (strcmp(arg[iarg],"full_energy") == 0) { + } else if (strcmp(arg[iarg], "full_energy") == 0) { full_flag = true; iarg += 1; - } else if (strcmp(arg[iarg],"group") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); + } else if (strcmp(arg[iarg], "group") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc group", error); if (ngroups >= ngroupsmax) { - ngroupsmax = ngroups+1; - groupstrings = (char **) - memory->srealloc(groupstrings, - ngroupsmax*sizeof(char *), - "fix_gcmc:groupstrings"); + ngroupsmax = ngroups + 1; + groupstrings = (char **) memory->srealloc(groupstrings, ngroupsmax * sizeof(char *), + "fix_gcmc:groupstrings"); } - groupstrings[ngroups] = utils::strdup(arg[iarg+1]); + groupstrings[ngroups] = utils::strdup(arg[iarg + 1]); ngroups++; iarg += 2; - } else if (strcmp(arg[iarg],"grouptype") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix gcmc command"); + } else if (strcmp(arg[iarg], "grouptype") == 0) { + if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "fix gcmc grouptype", error); if (ngrouptypes >= ngrouptypesmax) { - ngrouptypesmax = ngrouptypes+1; - grouptypes = (int*) memory->srealloc(grouptypes,ngrouptypesmax*sizeof(int), - "fix_gcmc:grouptypes"); - grouptypestrings = (char**) - memory->srealloc(grouptypestrings, - ngrouptypesmax*sizeof(char *), - "fix_gcmc:grouptypestrings"); + ngrouptypesmax = ngrouptypes + 1; + grouptypes = (int *) memory->srealloc(grouptypes, ngrouptypesmax * sizeof(int), + "fix_gcmc:grouptypes"); + grouptypestrings = (char **) memory->srealloc( + grouptypestrings, ngrouptypesmax * sizeof(char *), "fix_gcmc:grouptypestrings"); } - grouptypes[ngrouptypes] = utils::expand_type_int(FLERR, arg[iarg+1], Atom::ATOM, lmp); - grouptypestrings[ngrouptypes] = utils::strdup(arg[iarg+2]); + grouptypes[ngrouptypes] = utils::expand_type_int(FLERR, arg[iarg + 1], Atom::ATOM, lmp); + grouptypestrings[ngrouptypes] = utils::strdup(arg[iarg + 2]); ngrouptypes++; iarg += 3; - } else if (strcmp(arg[iarg],"intra_energy") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - energy_intra = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "intra_energy") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc intra_energy", error); + energy_intra = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"tfac_insert") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - tfac_insert = utils::numeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "tfac_insert") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc tfac_insert", error); + tfac_insert = utils::numeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"overlap_cutoff") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - double rtmp = utils::numeric(FLERR,arg[iarg+1],false,lmp); - overlap_cutoffsq = rtmp*rtmp; + } else if (strcmp(arg[iarg], "overlap_cutoff") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc overlap_cutoff", error); + double rtmp = utils::numeric(FLERR, arg[iarg + 1], false, lmp); + overlap_cutoffsq = rtmp * rtmp; overlap_flag = 1; iarg += 2; - } else if (strcmp(arg[iarg],"min") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - min_ngas = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "min") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc min", error); + min_ngas = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else if (strcmp(arg[iarg],"max") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command"); - max_ngas = utils::inumeric(FLERR,arg[iarg+1],false,lmp); + } else if (strcmp(arg[iarg], "max") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix gcmc max", error); + max_ngas = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); iarg += 2; - } else error->all(FLERR,"Illegal fix gcmc command"); + } else { + error->all(FLERR, iarg, "Unknown fix gcmc keyword {}", arg[iarg]); + } } } @@ -478,10 +476,8 @@ void FixGCMC::init() } if (region) { - if (region->bboxflag == 0) - error->all(FLERR,"Fix gcmc region does not support a bounding box"); - if (region->dynamic_check()) - error->all(FLERR,"Fix gcmc region cannot be dynamic"); + if (region->bboxflag == 0) error->all(FLERR, "Fix gcmc region does not support a bounding box"); + if (region->dynamic_check()) error->all(FLERR, "Fix gcmc region cannot be dynamic"); region_xlo = region->extent_xlo; region_xhi = region->extent_xhi; From e0e60c59e786b66a0ae0d3fb95e7d0dab3ea1205 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 6 Nov 2025 22:28:40 -0500 Subject: [PATCH 182/604] no need to define Particle struct in header, no need for typedef of struct in C++, too --- src/output.cpp | 12 ++++++++++-- src/output.h | 5 ----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/output.cpp b/src/output.cpp index 756682e9c7f..e40cf3e416e 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -40,11 +40,19 @@ using namespace LAMMPS_NS; -static constexpr int DELTA = 1; -static constexpr double EPSDT = 1.0e-6; +namespace { +constexpr int DELTA = 1; +constexpr double EPSDT = 1.0e-6; enum {SETUP, WRITE, RESET_DT}; +struct Particle { + int tag; + int type; + double x[3]; +}; +} + /* ---------------------------------------------------------------------- one instance per dump style in style_dump.h ------------------------------------------------------------------------- */ diff --git a/src/output.h b/src/output.h index 574c2566c9f..ec6ca1433ce 100644 --- a/src/output.h +++ b/src/output.h @@ -70,11 +70,6 @@ class Output : protected Pointers { using DumpCreatorMap = std::map; DumpCreatorMap *dump_map; - typedef struct Particle { - int tag; - int type; - double x[3]; - } Particle; MPI_Datatype createParticleStructType(); Output(class LAMMPS *); From 4f7f7ad33523ab731c1abd9a849c2a67d919f4f1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 6 Nov 2025 23:46:35 -0500 Subject: [PATCH 183/604] replace inefficient and redundant function with existing member --- src/label_map.h | 1 - src/output.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/label_map.h b/src/label_map.h index 845953f59d1..be7143ae18e 100644 --- a/src/label_map.h +++ b/src/label_map.h @@ -43,7 +43,6 @@ class LabelMap : protected Pointers { void write_data(FILE *); void read_restart(FILE *fp); void write_restart(FILE *); - inline auto getTypelabel() const { return typelabel; } protected: int natomtypes, nbondtypes, nangletypes, ndihedraltypes, nimpropertypes; diff --git a/src/output.cpp b/src/output.cpp index e40cf3e416e..55464bf41c3 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -724,7 +724,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i for (auto myatom : atoms_root) { int mytype = myatom.type; std::string typestr = std::to_string(mytype); - if (atom->labelmapflag) typestr = atom->lmap->getTypelabel()[mytype-1]; + if (atom->labelmapflag) typestr = atom->lmap->find(mytype, Atom::ATOM); utils::print(fp, "{}[{}, \"{}\"]", indent, myatom.tag, typestr); if (std::next(it) == atoms_root.end()) fprintf(fp, "\n"); else fprintf(fp, ",\n"); From 4113bdc3f8209fc41be2c36f2174fb0f0ba22ef3 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 6 Nov 2025 23:28:43 -0600 Subject: [PATCH 184/604] re-enabled binning on the GPU with the CUDA backend (CUDPP) --- cmake/Modules/Packages/GPU.cmake | 7 +++++-- lib/gpu/lal_neighbor.h | 4 ++++ lib/gpu/lal_neighbor_gpu.cu | 8 ++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 592b7eff2a0..9d688e7e0ad 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -154,8 +154,11 @@ if(GPU_API STREQUAL "CUDA") endif() endif() - cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC} - -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) + set(NVCC_FLAGS -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) + if(CUDPP_OPT) + string(APPEND NVCC_FLAGS " -DUSE_CUDPP") + endif() + cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC} ${NVCC_FLAGS}) cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC} -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 313a1354765..e7f846fcdbd 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -37,6 +37,10 @@ #define LAL_USE_OLD_NEIGHBOR #endif +#ifdef USE_CUDPP +#define LAL_USE_OLD_NEIGHBOR +#endif + namespace LAMMPS_AL { class Neighbor { diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index ec22ebf5af0..6e12a49304c 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -35,8 +35,8 @@ _texture_2d( pos_tex,int4); #endif #ifdef NV_KERNEL -#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2) -// Issue with incorrect results in CUDA >= 11.2 +// Issue with incorrect results with CUDA >= 11.2 and pre-12.0 (same in lal_neighbor.h) +#if (CUDA_VERSION > 11019) && (CUDA_VERSION < 12000) #define LAL_USE_OLD_NEIGHBOR #endif #endif @@ -45,6 +45,10 @@ _texture_2d( pos_tex,int4); #define LAL_USE_OLD_NEIGHBOR #endif +#ifdef USE_CUDPP +#define LAL_USE_OLD_NEIGHBOR +#endif + /* compute the id of the cell where the atoms belong to x: atom coordinates From 99f63c0965d39cf640f0763473b6a3d02f9d93ce Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 7 Nov 2025 08:08:15 -0700 Subject: [PATCH 185/604] Fix some issues --- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 10 +++++++++ src/KOKKOS/atom_vec_atomic_kokkos.h | 1 + src/KOKKOS/atom_vec_kokkos.cpp | 31 +++++++++++++-------------- src/KOKKOS/atom_vec_kokkos.h | 1 - src/KOKKOS/comm_kokkos.cpp | 17 ++++++--------- src/atom_masks.h | 24 ++++++++++----------- src/atom_vec.h | 2 +- 7 files changed, 45 insertions(+), 41 deletions(-) diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 3ee66be1529..0364e0870a6 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -31,6 +31,16 @@ AtomVecKokkos(lmp), AtomVecAtomic(lmp) { } + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecAtomicKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 7ab96b87c62..43756c238c1 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { public: AtomVecAtomicKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index c6ab0ed8e38..3e485af7050 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -51,16 +51,6 @@ AtomVecKokkos::~AtomVecKokkos() ngrow = 0; } -/* ---------------------------------------------------------------------- - process field strings to initialize data structs for all other methods -------------------------------------------------------------------------- */ - -void AtomVecKokkos::setup_fields() -{ - AtomVec::setup_fields(); - set_atom_masks(); -} - /* ---------------------------------------------------------------------- */ template @@ -426,6 +416,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, } } } + atomKK->modified(HostKK,datamask_comm); } else { atomKK->sync(Device,datamask_comm); if (pbc_flag) { @@ -481,6 +472,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, } } } + atomKK->modified(Device,datamask_comm); } return n*size_forward; @@ -734,23 +726,21 @@ struct AtomVecKokkos_UnpackComm { void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { + atomKK->sync(HostKK,datamask_comm); if (comm_x_only) { - atomKK->sync(HostKK,datamask_comm); struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); } else { - atomKK->sync(HostKK,datamask_comm); struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); } atomKK->modified(HostKK,datamask_comm); } else { + atomKK->sync(Device,datamask_comm); if (comm_x_only) { - atomKK->sync(Device,datamask_comm); struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); } else { - atomKK->sync(Device,datamask_comm); struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); Kokkos::parallel_for(n,f); } @@ -1600,10 +1590,10 @@ struct AtomVecKokkos_UnpackBorder { void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf, ExecutionSpace space) { - atomKK->sync(space,datamask_border); - while (first+n >= nmax) grow(0); + atomKK->sync(space,datamask_border); + if (space == Host) { struct AtomVecKokkos_UnpackBorder f(atomKK,buf.view_host(),first,datamask_border); @@ -1945,7 +1935,11 @@ struct AtomVecKokkos_UnpackBorderVel { void AtomVecKokkos::unpack_border_vel_kokkos( const int &n, const int &first, const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { + while (first+n >= nmax) grow(0); + + atomKK->sync(space,datamask_border_vel); + if (space == Host) { struct AtomVecKokkos_UnpackBorderVel f( atomKK, @@ -2300,6 +2294,7 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr DAT::tdual_int_1d k_copylist, ExecutionSpace space) { + atomKK->sync(space,datamask_exchange); set_size_exchange(); if (nsend > (int) (k_buf.view_host().extent(0)* @@ -2552,6 +2547,8 @@ int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nr { while (nlocal + nrecv/size_exchange >= nmax) grow(0); + atomKK->sync(space,datamask_exchange); + if (space == HostKK) { if (k_indices.view_host().data()) { k_count.view_host()(0) = nlocal; @@ -2586,6 +2583,8 @@ int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nr } } + atomKK->modified(space,datamask_exchange); + return k_count.view_host()(0); } diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index cf90736aadc..59124c47df3 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -197,7 +197,6 @@ class AtomVecKokkos : virtual public AtomVec { DAT::tdual_int_1d k_count; - void setup_fields() override; uint64_t field2mask(std::string); int field2size(std::string); void set_atom_masks(); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 93a5d1fdef9..162608198e4 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -810,7 +810,7 @@ void CommKokkos::exchange_device() subhi = domain->subhi_lamda; } - atomKK->sync(ExecutionSpaceFromDevice::space,atomKK->avecKK->datamask_border_vel); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); // loop over dimensions for (int dim = 0; dim < 3; dim++) { @@ -996,7 +996,6 @@ void CommKokkos::exchange_device() } } } - atomKK->modified(ExecutionSpaceFromDevice::space,atomKK->avecKK->datamask_border_vel); } if (atom->firstgroupname) { @@ -1107,13 +1106,11 @@ void CommKokkos::borders_device() { int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax; int nsend,nrecv,sendflag,nfirst,nlast,ngroup; double lo,hi; - int *type; - double **x; double *mlo,*mhi; MPI_Request request; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - atomKK->sync(exec_space,atomKK->avecKK->datamask_border_vel); + atomKK->sync(exec_space,X_MASK); k_sendlist.sync(); @@ -1137,12 +1134,10 @@ void CommKokkos::borders_device() { // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in list for use in future timesteps - x = atom->x; if (mode == Comm::SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { - type = atom->type; mlo = multilo[iswap]; mhi = multihi[iswap]; } @@ -1203,19 +1198,19 @@ void CommKokkos::borders_device() { } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); - for (i = nfirst; i < nlast; i++) { + /*for (i = nfirst; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } - } + }*/ } } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); - if (mode == Comm::SINGLE) { + /*if (mode == Comm::SINGLE) { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { @@ -1243,7 +1238,7 @@ void CommKokkos::borders_device() { sendlist[iswap][nsend++] = i; } } - } + }*/ } } diff --git a/src/atom_masks.h b/src/atom_masks.h index acbab8938f1..009c5924d68 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -43,21 +43,21 @@ // SPIN -#define SP_MASK 0x0000000000040000 -#define FM_MASK 0x0000000000080000 -#define FML_MASK 0x0000000000100000 +#define SP_MASK 0x0000000000040000 +#define FM_MASK 0x0000000000080000 +#define FML_MASK 0x0000000000100000 // DPD -#define DPDRHO_MASK 0x0000000000200000 -#define DPDTHETA_MASK 0x0000000000400000 -#define UCOND_MASK 0x0000000000800000 -#define UMECH_MASK 0x0000000001000000 -#define UCHEM_MASK 0x0000000002000000 -#define UCG_MASK 0x0000000004000000 -#define UCGNEW_MASK 0x0000000008000000 -#define DUCHEM_MASK 0x0000000010000000 -#define DVECTOR_MASK 0x0000000020000000 +#define DPDRHO_MASK 0x0000000000200000 +#define DPDTHETA_MASK 0x0000000000400000 +#define UCOND_MASK 0x0000000000800000 +#define UMECH_MASK 0x0000000001000000 +#define UCHEM_MASK 0x0000000002000000 +#define UCG_MASK 0x0000000004000000 +#define UCGNEW_MASK 0x0000000008000000 +#define DUCHEM_MASK 0x0000000010000000 +#define DVECTOR_MASK 0x0000000020000000 // granular diff --git a/src/atom_vec.h b/src/atom_vec.h index 535fc52e242..edbb3a3bebf 100644 --- a/src/atom_vec.h +++ b/src/atom_vec.h @@ -213,7 +213,7 @@ class AtomVec : protected Pointers { void grow_nmax(); int grow_nmax_bonus(int); - virtual void setup_fields(); + void setup_fields(); int process_fields(const std::vector &, const std::vector &, Method *); void init_method(int, Method *); }; From f40afd69b938f64b7e27f07e1871f1ec460c5cb6 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 7 Nov 2025 09:30:47 -0600 Subject: [PATCH 186/604] ensure consistent settings with regards to neighor sort --- lib/gpu/lal_device.cpp | 37 ++++++++++++++++++++++++++++--------- lib/gpu/lal_device.h | 3 ++- lib/gpu/lal_neighbor.h | 1 + lib/gpu/lal_neighbor_gpu.cu | 5 +++-- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index e9ef2294b2d..30f291dfb8b 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -59,7 +59,8 @@ namespace LAMMPS_AL { template DeviceT::Device() : _init_count(0), _device_init(false), _gpu_mode(GPU_FORCE), _first_device(0), - _last_device(0), _platform_id(-1), _compiled(false) { + _last_device(0), _platform_id(-1), _compiled(false), + _use_old_nbor_build(0), _use_device_sort(0) { } template @@ -370,6 +371,12 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu, _use_old_nbor_build = 1; #endif + #if defined(USE_CUDPP) || defined(USE_HIP_DEVICE_SORT) + _use_device_sort = 1; + #else + _use_device_sort = 0; + #endif + return flag; } @@ -520,11 +527,13 @@ int DeviceT::init(Answer &ans, const bool charge, // NOTE: enforce the hybrid mode (binning on the CPU) // when not using sorting on the device #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT) - if (gpu_nbor==1) gpu_nbor=2; + if (gpu_nbor==1) + gpu_nbor=2; #endif // or when the device supports subgroups #ifndef LAL_USE_OLD_NEIGHBOR - if (gpu_nbor==1) gpu_nbor=2; + if (gpu_nbor==1) + gpu_nbor=2; #endif if (_init_count==0) { @@ -596,14 +605,18 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal, if (_particle_split<1.0 && _particle_split>0.0) ef_nlocal=static_cast(_particle_split*nlocal); + // NOTE: enforce the hybrid mode (binning on the CPU) + // when not using sorting on the device int gpu_nbor=0; if (_gpu_mode==Device::GPU_NEIGH) gpu_nbor=1; else if (_gpu_mode==Device::GPU_HYB_NEIGH) gpu_nbor=2; #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT) - if (gpu_nbor==1) + if (gpu_nbor==1) { gpu_nbor=2; + _gpu_mode=Device::GPU_HYB_NEIGH; + } #endif #ifndef LAL_USE_OLD_NEIGHBOR if (gpu_nbor==1) @@ -913,14 +926,20 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, fprintf(screen,"Neigh block: %d.\n",_block_nbor_build); if (nbor.gpu_nbor()==2) { fprintf(screen,"Neigh mode: Hybrid (binning on host)"); - if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n"); - else fprintf(screen," with subgroup support\n"); + if (_use_old_nbor_build == 1) fprintf(screen," - legacy.\n"); + else fprintf(screen," with subgroup support.\n"); + if (_use_device_sort == 0) + fprintf(screen,"Neigh sorting: Unavailable or disabled.\n"); } else if (nbor.gpu_nbor()==1) { fprintf(screen,"Neigh mode: Device"); - if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n"); - else fprintf(screen," - with subgroup support\n"); + if (_use_old_nbor_build == 1) fprintf(screen," - legacy.\n"); + else fprintf(screen," - with subgroup support.\n"); + if (_use_device_sort == 1) + fprintf(screen,"Neigh sorting: Enabled.\n"); + else + fprintf(screen,"Neigh sorting: Unavailable or disabled.\n"); } else if (nbor.gpu_nbor()==0) - fprintf(screen,"Neigh mode: Host\n"); + fprintf(screen,"Neigh mode: Host.\n"); fprintf(screen,"-------------------------------------"); fprintf(screen,"--------------------------------\n\n"); diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index d6b52484f1a..04d81e406fa 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -347,7 +347,8 @@ class Device { int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id; int _max_shared_types, _max_bio_shared_types, _pppm_max_spline; int _nbor_prefetch; - int _use_old_nbor_build; + int _use_old_nbor_build; // 1 if using old/legacy neighbor build, 0 otherwise + int _use_device_sort; // 1 if using sorting particles using their cell IDs on the device, 0 otherwise UCL_Program *dev_program; UCL_Kernel k_zero, k_info; diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index e7f846fcdbd..80b1dd85467 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -24,6 +24,7 @@ #define IJ_SIZE 131072 +// same settings with lal_neighbor_gpu.cu #if !defined(USE_OPENCL) && !defined(USE_HIP) #ifndef LAL_USE_OLD_NEIGHBOR // Issue with incorrect results with CUDA >= 11.2 and pre-12.0 diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index 6e12a49304c..0a3064d6e23 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -34,14 +34,15 @@ _texture( pos_tex,float4); _texture_2d( pos_tex,int4); #endif +// same settings with lal_neighbor.h #ifdef NV_KERNEL -// Issue with incorrect results with CUDA >= 11.2 and pre-12.0 (same in lal_neighbor.h) +// Issue with incorrect results with CUDA >= 11.2 and pre-12.0 #if (CUDA_VERSION > 11019) && (CUDA_VERSION < 12000) #define LAL_USE_OLD_NEIGHBOR #endif #endif -#ifdef USE_HIP +#if defined(USE_HIP) || defined(__APPLE__) #define LAL_USE_OLD_NEIGHBOR #endif From 01a8acc4965570a6b7b419f95481e1ede5ddecef Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 7 Nov 2025 08:30:57 -0700 Subject: [PATCH 187/604] More tweaks --- src/KOKKOS/atom_vec_kokkos.cpp | 17 ++++++++--------- src/KOKKOS/comm_kokkos.cpp | 22 ++++++++++++++++------ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 3e485af7050..ecd654f9195 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -1455,7 +1455,7 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, dy = pbc[1]; dz = pbc[2]; } - if (space == Host) { + if (space == HostKK) { AtomVecKokkos_PackBorder f( atomKK,buf.view_host(), k_sendlist.view_host(), dx,dy,dz,datamask_border); @@ -1469,7 +1469,7 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, } else { dx = dy = dz = 0; - if (space == Host) { + if (space == HostKK) { AtomVecKokkos_PackBorder f( atomKK,buf.view_host(), k_sendlist.view_host(), dx,dy,dz,datamask_border); @@ -1594,7 +1594,7 @@ void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, atomKK->sync(space,datamask_border); - if (space == Host) { + if (space == HostKK) { struct AtomVecKokkos_UnpackBorder f(atomKK,buf.view_host(),first,datamask_border); Kokkos::parallel_for(n,f); @@ -1760,7 +1760,7 @@ int AtomVecKokkos::pack_border_vel_kokkos( dz = pbc[2]; } if (!deform_vremap) { - if (space == Host) { + if (space == HostKK) { AtomVecKokkos_PackBorderVel f( atomKK, buf.view_host(), k_sendlist.view_host(), @@ -1780,7 +1780,7 @@ int AtomVecKokkos::pack_border_vel_kokkos( dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; dvz = pbc[2]*h_rate[2]; - if (space == Host) { + if (space == HostKK) { AtomVecKokkos_PackBorderVel f( atomKK, buf.view_host(), k_sendlist.view_host(), @@ -1797,7 +1797,7 @@ int AtomVecKokkos::pack_border_vel_kokkos( } } } else { - if (space == Host) { + if (space == HostKK) { AtomVecKokkos_PackBorderVel f( atomKK, buf.view_host(), k_sendlist.view_host(), @@ -1814,8 +1814,6 @@ int AtomVecKokkos::pack_border_vel_kokkos( } } - atomKK->modified(space,datamask_border_vel); - return n*(size_border + size_velocity); } @@ -1940,7 +1938,7 @@ void AtomVecKokkos::unpack_border_vel_kokkos( atomKK->sync(space,datamask_border_vel); - if (space == Host) { + if (space == HostKK) { struct AtomVecKokkos_UnpackBorderVel f( atomKK, buf.view_host(), @@ -2311,6 +2309,7 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr AtomVecKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); Kokkos::parallel_for(nsend,f); + return nsend*size_exchange; } } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 162608198e4..0a8ad813f85 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -132,9 +132,15 @@ void CommKokkos::forward_comm(int dummy) k_sendlist.sync_host(); - atomKK->sync(Host,atomKK->avecKK->datamask_comm_vel); + if (ghost_velocity) + atomKK->sync(Host,atomKK->avecKK->datamask_comm_vel); + else + atomKK->sync(Host,atomKK->avecKK->datamask_comm); CommBrick::forward_comm(dummy); - atomKK->modified(Host,atomKK->avecKK->datamask_comm_vel); + if (ghost_velocity) + atomKK->modified(Host,atomKK->avecKK->datamask_comm_vel); + else + atomKK->modified(Host,atomKK->avecKK->datamask_comm); } /* ---------------------------------------------------------------------- */ @@ -1035,14 +1041,20 @@ void CommKokkos::borders() if (exchange_comm_on_host) borders_device(); else borders_device(); } else { - atomKK->sync(Host,atomKK->avecKK->datamask_border_vel); + if (ghost_velocity) + atomKK->sync(Host,atomKK->avecKK->datamask_border_vel); + else + atomKK->sync(Host,atomKK->avecKK->datamask_border); k_sendlist.sync_host(); int prev_auto_sync = lmp->kokkos->auto_sync; lmp->kokkos->auto_sync = 1; CommBrick::borders(); lmp->kokkos->auto_sync = prev_auto_sync; k_sendlist.modify_host(); - atomKK->modified(Host,atomKK->avecKK->datamask_border_vel); + if (ghost_velocity) + atomKK->modified(Host,atomKK->avecKK->datamask_border_vel); + else + atomKK->modified(Host,atomKK->avecKK->datamask_border); } if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_legacy) @@ -1324,8 +1336,6 @@ void CommKokkos::borders_device() { max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv_kokkos(max); - atomKK->modified(exec_space,atomKK->avecKK->datamask_border_vel); - // reset global->local map if (map_style != Atom::MAP_NONE) From f07d02fba5a48484cf3c116859779ca993f3d023 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 7 Nov 2025 10:54:27 -0500 Subject: [PATCH 188/604] silence warnings about ignored return values --- lib/gpu/geryon/hip_device.h | 2 +- lib/gpu/geryon/hip_macros.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/gpu/geryon/hip_device.h b/lib/gpu/geryon/hip_device.h index 66920786293..4d5d4ee3d63 100644 --- a/lib/gpu/geryon/hip_device.h +++ b/lib/gpu/geryon/hip_device.h @@ -443,7 +443,7 @@ void UCL_Device::clear() { // List all devices along with all properties void UCL_Device::print_all(std::ostream &out) { int driver_version; - hipDriverGetVersion(&driver_version); + (void)hipDriverGetVersion(&driver_version); out << "Driver Version: " << driver_version/1000 << "." << driver_version%100 << std::endl; diff --git a/lib/gpu/geryon/hip_macros.h b/lib/gpu/geryon/hip_macros.h index e16caf4944a..6e9446d4ba2 100644 --- a/lib/gpu/geryon/hip_macros.h +++ b/lib/gpu/geryon/hip_macros.h @@ -61,20 +61,20 @@ #else // not DEBUG // void macros for performance reasons -#define CU_SAFE_CALL_NS( call ) call -#define CU_SAFE_CALL( call) call +#define CU_SAFE_CALL_NS(call) (void)call +#define CU_SAFE_CALL(call) (void)call #endif #ifdef UCL_DESTRUCT_CHECK -#define CU_DESTRUCT_CALL( call) CU_SAFE_CALL( call) -#define CU_DESTRUCT_CALL_NS( call) CU_SAFE_CALL_NS( call) +#define CU_DESTRUCT_CALL(call) CU_SAFE_CALL(call) +#define CU_DESTRUCT_CALL_NS(call) CU_SAFE_CALL_NS(call) #else -#define CU_DESTRUCT_CALL( call) call -#define CU_DESTRUCT_CALL_NS( call) call +#define CU_DESTRUCT_CALL(call) (void)call +#define CU_DESTRUCT_CALL_NS(call) (void)call #endif From 56b514f5899f6a7fb15a54a7c8d46dde29c3ff8b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 7 Nov 2025 11:27:55 -0500 Subject: [PATCH 189/604] review, purge, and update CODEOWNERS file according to current GitHub docs --- .github/CODEOWNERS | 320 ++++++++++++++++++++++----------------------- 1 file changed, 154 insertions(+), 166 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a920a25f1d5..c3b3ddbce19 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -3,191 +3,179 @@ # Order matters, the last match has the highest precedence # library folders -lib/colvars/* @giacomofiorin -lib/compress/* @akohlmey -lib/kokkos/* @stanmoore1 -lib/molfile/* @akohlmey -lib/qmmm/* @akohlmey -lib/vtk/* @rbberger -lib/kim/* @ellio167 -lib/mesont/* @iafoss +/lib/colvars/ @giacomofiorin +/lib/gpu/ @ndtrung81 +/lib/kokkos/ @stanmoore1 +/lib/linalg/ @akohlmey +/lib/molfile/ @akohlmey +/lib/qmmm/ @akohlmey # whole packages -src/ADIOS/* @pnorbert -src/AMOEBA/* @sjplimp -src/BPM/* @jtclemm -src/BROWNIAN/* @samueljmcameron -src/CG-DNA/* @ohenrich -src/CG-SPICA/* @yskmiyazaki -src/COLVARS/* @giacomofiorin -src/COMPRESS/* @rbberger -src/DIELECTRIC/* @ndtrung81 -src/ELECTRODE/* @ludwig-ahrens -src/FEP/* @agiliopadua -src/GPU/* @ndtrung81 -src/GRANULAR/* @jtclemm @dsbolin -src/INTEL/* @wmbrownintel -src/KIM/* @ellio167 -src/KOKKOS/* @stanmoore1 -src/LATTE/* @cnegre -src/MANIFOLD/* @Pakketeretet2 -src/MDI/* @taylor-a-barnes @sjplimp -src/MEAM/* @martok -src/MESONT/* @iafoss -src/ML-HDNNP/* @singraber -src/ML-IAP/* @athomps -src/ML-PACE/* @yury-lysogorskiy -src/ML-POD/* @exapde -src/ML-UF3/* @monk-04 -src/MOFFF/* @hheenen -src/MOLFILE/* @akohlmey -src/NETCDF/* @pastewka -src/OPENMP/* @akohlmey -src/PHONON/* @lingtikong -src/PLUGIN/* @akohlmey -src/PLUMED/* @gtribello -src/PTM/* @pmla -src/QMMM/* @akohlmey -src/REACTION/* @jrgissing -src/REAXFF/* @hasanmetin @stanmoore1 -src/RHEO/* @jtclemm -src/SCAFACOS/* @rhalver -src/SNAP/* @athomps -src/SPIN/* @julient31 -src/TALLY/* @akohlmey -src/UEF/* @danicholson -src/VTK/* @rbberger +/src/ADIOS/ @pnorbert +/src/AMOEBA/ @sjplimp +/src/BPM/ @jtclemm +/src/BROWNIAN/ @samueljmcameron +/src/CG-DNA/ @ohenrich +/src/CG-SPICA/ @yskmiyazaki +/src/COLVARS/ @giacomofiorin +/src/DIELECTRIC/ @ndtrung81 +/src/ELECTRODE/ @ludwig-ahrens @srtee +/src/FEP/ @agiliopadua +/src/GPU/ @ndtrung81 +/src/GRANULAR/ @jtclemm @dsbolin +/src/INTEL/ @wmbrownintel +/src/KIM/ @ellio167 +/src/KOKKOS/ @stanmoore1 +/src/MANIFOLD/ @Pakketeretet2 +/src/MDI/ @taylor-a-barnes @sjplimp +/src/MEAM/ @martok +/src/MESONT/ @iafoss +/src/ML-HDNNP/ @singraber +/src/ML-IAP/ @athomps +/src/ML-PACE/ @yury-lysogorskiy +/src/ML-POD/ @exapde +/src/ML-UF3/ @monk-04 +/src/MOFFF/ @hheenen +/src/MOLFILE/ @akohlmey +/src/NETCDF/ @pastewka +/src/OPENMP/ @akohlmey +/src/PHONON/ @lingtikong +/src/PLUGIN/ @akohlmey +/src/PLUMED/ @gtribello +/src/PTM/ @pmla +/src/QMMM/ @akohlmey +/src/REACTION/ @jrgissing +/src/REAXFF/ @hasanmetin @stanmoore1 +/src/RHEO/ @jtclemm +/src/SCAFACOS/ @rhalver +/src/SNAP/ @athomps +/src/SPIN/ @julient31 +/src/TALLY/ @akohlmey +/src/UEF/ @danicholson # individual files in packages -src/GPU/pair_vashishta_gpu.* @andeplane -src/KOKKOS/pair_vashishta_kokkos.* @andeplane @stanmoore1 -src/KOSSOS/pair_pod_kokkos.* @exapde @stanmoore1 -src/MANYBODY/pair_vashishta_table.* @andeplane -src/MANYBODY/pair_atm.* @sergeylishchuk -src/MANYBODY/pair_nb3b_screened.* @flodesani -src/REPLICA/*_grem.* @dstelter92 -src/EXTRA-COMMAND/geturl.* @akohlmey -src/EXTRA-COMMAND/group_ndx.* @akohlmey -src/EXTRA-COMMAND/ndx_group.* @akohlmey -src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel -src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps -src/EXTRA-DUMP/dump_extxyz.* @fxcoudert -src/EXTRA-FIX/fix_deform_pressure.* @jtclemm -src/EXTRA-PAIR/pair_dispersion_d3.* @soniasalomoni @arthurfl -src/EXTRA-PAIR/d3_parameters.h @soniasalomoni @arthurfl -src/MISC/*_tracker.* @jtclemm -src/MC/fix_gcmc.* @athomps -src/MC/fix_sgcmc.* @athomps -src/REAXFF/compute_reaxff_atom.* @rbberger -src/KOKKOS/compute_reaxff_atom_kokkos.* @rbberger -src/REPLICA/fix_pimd_langevin.* @Yi-FanLi -src/DPD-BASIC/pair_dpd_coul_slater_long.* @Eddy-Barraud -src/GPU/pair_dpd_coul_slater_long.* @Eddy-Barraud +/src/GPU/pair_vashishta_gpu.* @andeplane +/src/KOKKOS/pair_vashishta_kokkos.* @andeplane @stanmoore1 +/src/KOSSOS/pair_pod_kokkos.* @exapde @stanmoore1 +/src/MANYBODY/pair_vashishta_table.* @andeplane +/src/MANYBODY/pair_atm.* @sergeylishchuk +/src/MANYBODY/pair_nb3b_screened.* @flodesani +/src/REPLICA/*_grem.* @dstelter92 +/src/EXTRA-COMMAND/geturl.* @akohlmey +/src/EXTRA-COMMAND/group_ndx.* @akohlmey +/src/EXTRA-COMMAND/ndx_group.* @akohlmey +/src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel +/src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps +/src/EXTRA-DUMP/dump_extxyz.* @fxcoudert @akohlmey +/src/EXTRA-FIX/fix_deform_pressure.* @jtclemm +/src/EXTRA-PAIR/pair_dispersion_d3.* @soniasalomoni @arthurfl +/src/EXTRA-PAIR/d3_parameters.h @soniasalomoni @arthurfl +/src/MISC/*_tracker.* @jtclemm +/src/MC/fix_gcmc.* @athomps +/src/MC/fix_sgcmc.* @athomps +/src/REAXFF/compute_reaxff_atom.* @rbberger +/src/KOKKOS/compute_reaxff_atom_kokkos.* @rbberger +/src/REPLICA/fix_pimd_langevin.* @Yi-FanLi +/src/DPD-BASIC/pair_dpd_coul_slater_long.* @Eddy-Barraud +/src/GPU/pair_dpd_coul_slater_long.* @Eddy-Barraud # core LAMMPS classes -src/lammps.* @sjplimp -src/pointers.h @sjplimp -src/atom.* @sjplimp -src/atom_vec.* @sjplimp -src/angle.* @sjplimp -src/bond.* @sjplimp -src/comm*.* @sjplimp -src/compute.* @sjplimp -src/dihedral.* @sjplimp -src/domain.* @sjplimp @stanmoore1 -src/dump*.* @sjplimp -src/error.* @sjplimp -src/finish.* @sjplimp -src/fix.* @sjplimp -src/force.* @sjplimp -src/group.* @sjplimp -src/improper.* @sjplimp -src/info.* @akohlmey -src/kspace.* @sjplimp -src/lmptype.h @sjplimp -src/label_map.* @jrgissing @akohlmey -src/library.* @sjplimp @akohlmey -src/main.cpp @sjplimp -src/min_*.* @sjplimp -src/memory.* @sjplimp -src/modify.* @sjplimp @stanmoore1 -src/molecule.* @sjplimp @akohlmey -src/my_page.h @sjplimp -src/my_pool_chunk.h @sjplimp -src/npair*.* @sjplimp @jtclemm -src/ntopo*.* @sjplimp @jtclemm -src/nstencil*.* @sjplimp @jtclemm -src/neighbor.* @sjplimp @jtclemm -src/nbin*.* @sjplimp @jtclemm -src/neigh_*.* @sjplimp @jtclemm -src/output.* @sjplimp -src/pair.* @sjplimp -src/rcb.* @sjplimp -src/random_*.* @sjplimp -src/region*.* @sjplimp -src/rcb.* @sjplimp -src/read*.* @sjplimp -src/rerun.* @sjplimp -src/run.* @sjplimp -src/respa.* @sjplimp -src/set.* @sjplimp -src/special.* @sjplimp -src/suffix.h @sjplimp -src/thermo.* @sjplimp -src/universe.* @sjplimp -src/update.* @sjplimp -src/variable.* @sjplimp -src/velocity.* @sjplimp -src/write_data.* @sjplimp -src/write_restart.* @sjplimp -src/write_molecule.* @akohlmey +/src/lammps.* @sjplimp +/src/pointers.h @sjplimp +/src/atom.* @sjplimp +/src/atom_vec.* @sjplimp +/src/angle.* @sjplimp +/src/bond.* @sjplimp +/src/comm*.* @sjplimp +/src/compute.* @sjplimp +/src/dihedral.* @sjplimp +/src/domain.* @sjplimp @stanmoore1 +/src/dump*.* @sjplimp +/src/error.* @sjplimp +/src/finish.* @sjplimp +/src/fix.* @sjplimp +/src/force.* @sjplimp +/src/group.* @sjplimp +/src/improper.* @sjplimp +/src/info.* @akohlmey +/src/kspace.* @sjplimp +/src/lmptype.h @sjplimp +/src/label_map.* @jrgissing @akohlmey +/src/library.* @sjplimp @akohlmey +/src/main.cpp @sjplimp +/src/min_*.* @sjplimp +/src/memory.* @sjplimp +/src/modify.* @sjplimp @stanmoore1 +/src/molecule.* @sjplimp @akohlmey +/src/my_page.h @sjplimp +/src/my_pool_chunk.h @sjplimp +/src/npair*.* @sjplimp @jtclemm +/src/ntopo*.* @sjplimp @jtclemm +/src/nstencil*.* @sjplimp @jtclemm +/src/neighbor.* @sjplimp @jtclemm +/src/nbin*.* @sjplimp @jtclemm +/src/neigh_*.* @sjplimp @jtclemm +/src/output.* @sjplimp +/src/pair.* @sjplimp +/src/rcb.* @sjplimp +/src/random_*.* @sjplimp +/src/region*.* @sjplimp +/src/rcb.* @sjplimp +/src/read*.* @sjplimp +/src/rerun.* @sjplimp +/src/run.* @sjplimp +/src/respa.* @sjplimp +/src/set.* @sjplimp +/src/special.* @sjplimp +/src/suffix.h @sjplimp +/src/thermo.* @sjplimp +/src/universe.* @sjplimp +/src/update.* @sjplimp +/src/variable.* @sjplimp +/src/velocity.* @sjplimp +/src/write_data.* @sjplimp +/src/write_restart.* @sjplimp +/src/write_molecule.* @akohlmey # overrides for specific files -src/dump_movie.* @akohlmey -src/exceptions.h @rbberger -src/fix_nh.* @athomps -src/info.* @akohlmey @rbberger -src/min* @sjplimp @stanmoore1 -src/platform.* @akohlmey -src/timer.* @akohlmey -src/utils.* @akohlmey @rbberger -src/verlet.* @sjplimp @stanmoore1 -src/math_eigen_impl.h @jewettaij -src/fix_press_langevin.* @Bibobu +/src/dump_movie.* @akohlmey +/src/fix_nh.* @athomps +/src/info.* @akohlmey +/src/min* @sjplimp @stanmoore1 +/src/platform.* @akohlmey +/src/timer.* @akohlmey +/src/utils.* @akohlmey +/src/verlet.* @sjplimp @stanmoore1 +/src/math_eigen_impl.h @jewettaij +/src/fix_press_langevin.* @Bibobu # tools -tools/coding_standard/* @akohlmey @rbberger -tools/emacs/* @HaoZeke -tools/lammps-shell/* @akohlmey -tools/msi2lmp/* @akohlmey -tools/offline/* @rbberger -tools/singularity/* @akohlmey @rbberger -tools/swig/* @akohlmey -tools/valgrind/* @akohlmey -tools/vim/* @hammondkd +/tools/coding_standard/ @akohlmey +/tools/emacs/ @HaoZeke +/tools/singularity/ @akohlmey +/tools/swig/ @akohlmey +/tools/valgrind/ @akohlmey +/tools/vim/ @hammondkd # tests -unittest/* @akohlmey +/unittest/ @akohlmey # cmake -cmake/* @akohlmey -cmake/Modules/LAMMPSInterfacePlugin.cmake @akohlmey -cmake/Modules/MPI4WIN.cmake @akohlmey -cmake/Modules/OpenCLLoader.cmake @akohlmey -cmake/Modules/Packages/COLVARS.cmake @giacomofiorin -cmake/Modules/Packages/KIM.cmake @ellio167 -cmake/presets/*.cmake @akohlmey +/cmake/ @akohlmey +/cmake/Modules/Packages/COLVARS.cmake @giacomofiorin +/cmake/Modules/Packages/KIM.cmake @ellio167 # python -python/* @rbberger +/python/ @akohlmey +/python/ipython/ @rbberger # fortran -fortran/* @akohlmey @hammondkd +/fortran/ @akohlmey @hammondkd # docs -doc/* @akohlmey -examples/plugin/* @akohlmey -examples/PACKAGES/pace/plugin/* @akohlmey +/doc/* @akohlmey +/examples/plugin/ @akohlmey +/examples/PACKAGES/pace/plugin/ @akohlmey # for releases -src/version.h @sjplimp +/src/version.h @sjplimp From 71d9f912383f2050c6246c9645a4e9acb4227701 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 7 Nov 2025 10:42:43 -0700 Subject: [PATCH 190/604] Fix some issues --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_angle_kokkos.h | 1 + src/KOKKOS/atom_vec_atomic_kokkos.cpp | 1 - src/KOKKOS/atom_vec_bond_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_bond_kokkos.h | 1 + src/KOKKOS/atom_vec_charge_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_charge_kokkos.h | 1 + src/KOKKOS/atom_vec_dipole_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_dipole_kokkos.h | 1 + src/KOKKOS/atom_vec_dpd_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_dpd_kokkos.h | 1 + src/KOKKOS/atom_vec_full_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_full_kokkos.h | 1 + src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_hybrid_kokkos.h | 1 + src/KOKKOS/atom_vec_kokkos.cpp | 157 ++++++++++++++--------- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_molecular_kokkos.h | 1 + src/KOKKOS/atom_vec_sphere_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_sphere_kokkos.h | 1 + src/KOKKOS/atom_vec_spin_kokkos.cpp | 9 ++ src/KOKKOS/atom_vec_spin_kokkos.h | 2 + 22 files changed, 198 insertions(+), 61 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 776eb3bbd9d..7115046adc5 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -33,6 +33,15 @@ AtomVecKokkos(lmp), AtomVecAngle(lmp) } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 90cadf3aab4..9b28cf73fc0 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -31,6 +31,7 @@ namespace LAMMPS_NS { class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { public: AtomVecAngleKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 0364e0870a6..36ed9cd693b 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -31,7 +31,6 @@ AtomVecKokkos(lmp), AtomVecAtomic(lmp) { } - /* ---------------------------------------------------------------------- process field strings to initialize data structs for all other methods ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index cb49a05e03b..5119951c1d8 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -31,6 +31,15 @@ AtomVecKokkos(lmp), AtomVecBond(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 3978e652c1a..7adea656dd2 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -31,6 +31,7 @@ namespace LAMMPS_NS { class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { public: AtomVecBondKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index f9176006bcc..228842c2f19 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -31,6 +31,15 @@ AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index df3416bd11d..ee2f5c19f8f 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { public: AtomVecChargeKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index dafdf5fdc18..1753941ba23 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -29,6 +29,15 @@ using namespace LAMMPS_NS; AtomVecDipoleKokkos::AtomVecDipoleKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecDipole(lmp), q(nullptr) {} +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index ab5fcd11755..0c0972d4763 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { public: AtomVecDipoleKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index c8833dfeb64..262624f7f00 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -33,6 +33,15 @@ AtomVecKokkos(lmp), AtomVecDPD(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 1f6db4ef234..e522c0c6ce6 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { public: AtomVecDPDKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 17bc931f424..6a3d57a7535 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -31,6 +31,15 @@ AtomVecKokkos(lmp), AtomVecFull(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index af46e8d48d6..df7133638e5 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -31,6 +31,7 @@ namespace LAMMPS_NS { class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { public: AtomVecFullKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 9912cd41604..ee2c12aeb2c 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -29,6 +29,15 @@ AtomVecKokkos(lmp), AtomVecHybrid(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- */ void AtomVecHybridKokkos::grow(int n) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 12f8dbbb8f6..dd4691f0896 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { public: AtomVecHybridKokkos(class LAMMPS *); + void init() override; void grow(int) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index ecd654f9195..81c08607f8d 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -1105,7 +1105,7 @@ void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1121,9 +1121,9 @@ struct AtomVecKokkos_PackReverse { const typename DAT::tdual_double_2d_lr &buf, const int &first, const uint64_t &datamask): _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), _fm(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()), _first(first),_datamask(datamask) { const size_t elements = atomKK->avecKK->size_reverse; const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; @@ -1137,22 +1137,24 @@ struct AtomVecKokkos_PackReverse { _buf(i,m++) = _f(i+_first,1); _buf(i,m++) = _f(i+_first,2); - if (_datamask & FM_MASK) { - _buf(i,m++) = _fm(i+_first,0); - _buf(i,m++) = _fm(i+_first,1); - _buf(i,m++) = _fm(i+_first,2); - } + if constexpr (!DEFAULT) { + if (_datamask & TORQUE_MASK) { + _buf(i,m++) = _torque(i+_first,0); + _buf(i,m++) = _torque(i+_first,1); + _buf(i,m++) = _torque(i+_first,2); + } - if (_datamask & FML_MASK) { - _buf(i,m++) = _fm_long(i+_first,0); - _buf(i,m++) = _fm_long(i+_first,1); - _buf(i,m++) = _fm_long(i+_first,2); - } + if (_datamask & FM_MASK) { + _buf(i,m++) = _fm(i+_first,0); + _buf(i,m++) = _fm(i+_first,1); + _buf(i,m++) = _fm(i+_first,2); + } - if (_datamask & TORQUE_MASK) { - _buf(i,m++) = _torque(i+_first,0); - _buf(i,m++) = _torque(i+_first,1); - _buf(i,m++) = _torque(i+_first,2); + if (_datamask & FML_MASK) { + _buf(i,m++) = _fm_long(i+_first,0); + _buf(i,m++) = _fm_long(i+_first,1); + _buf(i,m++) = _fm_long(i+_first,2); + } } } }; @@ -1163,12 +1165,22 @@ int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->reverse_comm_on_host) { atomKK->sync(HostKK,datamask_reverse); - struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } } else { atomKK->sync(Device,datamask_reverse); - struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } } return n*size_reverse; @@ -1176,7 +1188,7 @@ int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnPackReverseSelf { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1193,9 +1205,9 @@ struct AtomVecKokkos_UnPackReverseSelf { const typename DAT::tdual_int_1d &list, const uint64_t &datamask): _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), _fm(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()), _nfirst(nfirst),_list(list.view()), _datamask(datamask) {}; @@ -1206,22 +1218,24 @@ struct AtomVecKokkos_UnPackReverseSelf { _f(j,1) += _f(i+_nfirst,1); _f(j,2) += _f(i+_nfirst,2); - if (_datamask & FM_MASK) { - _fm(j,0) += _fm(i+_nfirst,0); - _fm(j,1) += _fm(i+_nfirst,1); - _fm(j,2) += _fm(i+_nfirst,2); - } + if constexpr (!DEFAULT) { + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _torque(i+_nfirst,0); + _torque(j,1) += _torque(i+_nfirst,1); + _torque(j,2) += _torque(i+_nfirst,2); + } - if (_datamask & FML_MASK) { - _fm_long(j,0) += _fm_long(i+_nfirst,0); - _fm_long(j,1) += _fm_long(i+_nfirst,1); - _fm_long(j,2) += _fm_long(i+_nfirst,2); - } + if (_datamask & FM_MASK) { + _fm(j,0) += _fm(i+_nfirst,0); + _fm(j,1) += _fm(i+_nfirst,1); + _fm(j,2) += _fm(i+_nfirst,2); + } - if (_datamask & TORQUE_MASK) { - _torque(j,0) += _torque(i+_nfirst,0); - _torque(j,1) += _torque(i+_nfirst,1); - _torque(j,2) += _torque(i+_nfirst,2); + if (_datamask & FML_MASK) { + _fm_long(j,0) += _fm_long(i+_nfirst,0); + _fm_long(j,1) += _fm_long(i+_nfirst,1); + _fm_long(j,2) += _fm_long(i+_nfirst,2); + } } } }; @@ -1232,13 +1246,23 @@ int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list const int nfirst) { if (lmp->kokkos->reverse_comm_on_host) { atomKK->sync(HostKK,datamask_reverse); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } atomKK->modified(HostKK,datamask_reverse); } else { atomKK->sync(Device,datamask_reverse); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } atomKK->modified(Device,datamask_reverse); } @@ -1247,7 +1271,7 @@ int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnPackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1264,9 +1288,9 @@ struct AtomVecKokkos_UnPackReverse { const typename DAT::tdual_int_1d &list, const uint64_t datamask): _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), _fm(atomKK->k_fm.view()), _fm_long(atomKK->k_fm_long.view()), - _torque(atomKK->k_torque.view()), _list(list.view()), _datamask(datamask) { const size_t elements = atomKK->avecKK->size_reverse; @@ -1282,22 +1306,24 @@ struct AtomVecKokkos_UnPackReverse { _f(j,1) += _buf(i,m++); _f(j,2) += _buf(i,m++); - if (_datamask & FM_MASK) { - _fm(j,0) += _buf(i,m++); - _fm(j,1) += _buf(i,m++); - _fm(j,2) += _buf(i,m++); - } + if constexpr (!DEFAULT) { + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _buf(i,m++); + _torque(j,1) += _buf(i,m++); + _torque(j,2) += _buf(i,m++); + } - if (_datamask & FML_MASK) { - _fm_long(j,0) += _buf(i,m++); - _fm_long(j,1) += _buf(i,m++); - _fm_long(j,2) += _buf(i,m++); - } + if (_datamask & FM_MASK) { + _fm(j,0) += _buf(i,m++); + _fm(j,1) += _buf(i,m++); + _fm(j,2) += _buf(i,m++); + } - if (_datamask & TORQUE_MASK) { - _torque(j,0) += _buf(i,m++); - _torque(j,1) += _buf(i,m++); - _torque(j,2) += _buf(i,m++); + if (_datamask & FML_MASK) { + _fm_long(j,0) += _buf(i,m++); + _fm_long(j,1) += _buf(i,m++); + _fm_long(j,2) += _buf(i,m++); + } } } }; @@ -1313,13 +1339,23 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, if (lmp->kokkos->reverse_comm_on_host) { atomKK->sync(HostKK,datamask_reverse); - struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } atomKK->modified(HostKK,datamask_reverse); } else { atomKK->sync(Device,datamask_reverse); - struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); - Kokkos::parallel_for(n,f); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } atomKK->modified(Device,datamask_reverse); } } @@ -2300,6 +2336,7 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; k_buf.resize(newsize,k_buf.view_host().extent(1)); } + if (space == HostKK) { AtomVecKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index cfaedf3804c..5dc14c87733 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -32,6 +32,15 @@ AtomVecKokkos(lmp), AtomVecMolecular(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index 00c6b625e14..5aa1b18bf59 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -31,6 +31,7 @@ namespace LAMMPS_NS { class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { public: AtomVecMolecularKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index e0b366fb50f..d16742b1251 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -35,6 +35,15 @@ AtomVecKokkos(lmp), AtomVecSphere(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by a chunk diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 9381e68144a..7728c847dfc 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { public: AtomVecSphereKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index eebbef84e1d..cf4cf4b2c70 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -43,6 +43,15 @@ AtomVecKokkos(lmp), AtomVecSpin(lmp) { } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::init() +{ + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by a chunk diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 308a4d8ab37..75d5e77ea98 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -32,6 +32,8 @@ namespace LAMMPS_NS { class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { public: AtomVecSpinKokkos(class LAMMPS *); + void init() override; + void grow(int) override; void grow_pointers() override; void force_clear(int, size_t) override; From bb8d56d23c5cc61b4d4e1b97dfa151897bc34f2e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 7 Nov 2025 15:26:18 -0700 Subject: [PATCH 191/604] Fix issue --- src/KOKKOS/comm_kokkos.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 0a8ad813f85..271da5dfaff 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -720,14 +720,15 @@ void CommKokkos::exchange() break; } } + if (!flag) { if (comm->me == 0) { error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " "switching to legacy exchange/border communication"); } + exchange_comm_legacy = true; + lmp->kokkos->exchange_comm_legacy = 1; } - exchange_comm_legacy = true; - lmp->kokkos->exchange_comm_legacy = 1; } } From 865adcf5123ea493949235682f3cb25012e2e914 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 7 Nov 2025 22:03:35 -0500 Subject: [PATCH 192/604] add comment about libomp support for Fedora 43 --- cmake/presets/hip_amd.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/presets/hip_amd.cmake b/cmake/presets/hip_amd.cmake index 2cf28c05c41..858d6c066a7 100644 --- a/cmake/presets/hip_amd.cmake +++ b/cmake/presets/hip_amd.cmake @@ -24,6 +24,8 @@ set(MPI_C_COMPILER "mpicc" CACHE STRING "" FORCE) # change as needed. This is for Fedora Linux 41 and 42 set(_libomp_root "/usr/lib/clang/18") +# This is for Fedora Linux 43 +# set(_libomp_root "/usr/lib/clang/19") # we need to explicitly specify the include dir, since hipcc will # compile each file twice and doesn't find omp.h the second time From f3df39d661183590f644a69955c72930d9dc5771 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Fri, 7 Nov 2025 23:07:37 -0500 Subject: [PATCH 193/604] correct for bond/react refactor also update format to include fix ID --- src/REACTION/fix_bond_react.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index 9ab0926649d..25ce512d39b 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -4337,7 +4337,7 @@ double FixBondReact::compute_vector(int n) std::string FixBondReact::get_thermo_colname(int n) { - return rxn_name[n]; + return fmt::format("f_{}:{}", id, rxns[n].name); } /* ---------------------------------------------------------------------- */ From 032ab324a3d90cdae49c9f72341e874057bec0a3 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Fri, 7 Nov 2025 23:32:15 -0500 Subject: [PATCH 194/604] separate out colname auto code calling 'parse_fields' twice was causing memory issues. apparently can't use thermo's 'fixes' variable yet --- src/thermo.cpp | 31 +++++++++++++++++++++++-------- src/thermo.h | 3 ++- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/thermo.cpp b/src/thermo.cpp index 45798039b9f..1aecb58a9f6 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -117,7 +117,6 @@ Thermo::Thermo(LAMMPS *_lmp, int narg, char **arg) : lostflag = lostbond = Thermo::ERROR; lostbefore = warnbefore = 0; flushflag = 0; - autocolname = 0; triclinic_general = 0; firststep = 0; ntimestep = -1; @@ -696,8 +695,7 @@ void Thermo::modify_params(int narg, char **arg) for (auto &item : keyword_user) item.clear(); iarg += 2; } else if (strcmp(arg[iarg + 1], "auto") == 0) { - autocolname = 1; - parse_fields(line); + colname_auto(); iarg += 2; } else { if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, "thermo_modify colname", error); @@ -1143,8 +1141,6 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, nfield + 1, "Thermo custom compute {} has unsupported format", icompute->id); } - if (autocolname && icompute->thermo_modify_colname) - keyword_user[nfield] = icompute->get_thermo_colname(argindex1[nfield]-1); addfield(word.c_str(), &Thermo::compute_compute, FLOAT); } else if (argi.get_type() == ArgInfo::FIX) { @@ -1179,9 +1175,6 @@ void Thermo::parse_fields(const std::string &str) error->all(FLERR, nfield + 1, "Thermo custom fix {} has unsupported format", ifix->id); } - if (autocolname && ifix->thermo_modify_colname) - keyword_user[nfield] = ifix->get_thermo_colname(argindex1[nfield]-1); - field2index[nfield] = add_fix(ifix->id); addfield(word.c_str(), &Thermo::compute_fix, FLOAT); @@ -1214,6 +1207,28 @@ void Thermo::parse_fields(const std::string &str) field_data.resize(nfield); } +/* ---------------------------------------------------------------------- + update auto-generated column names for computes, fixes +------------------------------------------------------------------------- */ + +void Thermo::colname_auto() +{ + for (ifield = 0; ifield < nfield; ifield++) { + std::string word = keyword[ifield]; + ArgInfo argi(word); + if (argi.get_type() == ArgInfo::COMPUTE) { + auto *icompute = modify->get_compute_by_id(argi.get_name()); + if (icompute->thermo_modify_colname) + keyword_user[ifield] = icompute->get_thermo_colname(argindex1[ifield]-1); + } + if (argi.get_type() == ArgInfo::FIX) { + auto *ifix = modify->get_fix_by_id(argi.get_name()); + if (ifix->thermo_modify_colname) + keyword_user[ifield] = ifix->get_thermo_colname(argindex1[ifield]-1); + } + } +} + /* ---------------------------------------------------------------------- add field to list of quantities to print ------------------------------------------------------------------------- */ diff --git a/src/thermo.h b/src/thermo.h index 5ec740e8c1e..2043c6d23bc 100644 --- a/src/thermo.h +++ b/src/thermo.h @@ -77,7 +77,7 @@ class Thermo : protected Pointers { int firststep; int lostbefore, warnbefore; - int flushflag, lineflag, autocolname; + int flushflag, lineflag; double last_tpcpu, last_spcpu; double last_time, last_cpu1, last_cpu2; @@ -128,6 +128,7 @@ class Thermo : protected Pointers { void deallocate(); void parse_fields(const std::string &); + void colname_auto(); int add_compute(const char *, int); int add_fix(const char *); int add_variable(const char *); From c7cc3adda47313c6a669f5edc1146a26eeba2c63 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 00:35:53 -0500 Subject: [PATCH 195/604] Update compute_reduce.cpp still need to test various compute_reduce options --- src/compute_reduce.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 40df194bcf3..0342478ea01 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -694,13 +694,13 @@ double ComputeReduce::compute_one(int m, int flag) /* ---------------------------------------------------------------------- */ std::string ComputeReduce::get_thermo_colname(int m) { - if (replace[m] >= 0) { + if (replace && replace[m] >= 0) { auto &val1 = values[m]; auto &val2 = values[replace[m]]; - return fmt::format("c_{}:c_{}[{}]<-{}(c_{})",id,val1.id,val1.argindex,modestr,val2.id); + return fmt::format("c_{}:c_{}[{}]<-{}(c_{})", id, val1.id, val1.argindex, modestr, val2.id); } else { auto &val = values[m]; - return fmt::format("c_{}:{}(c_{})",id,modestr,val.id); + return fmt::format("c_{}:{}(c_{})", id, modestr, val.id); } return "none"; } From 57e6724789cc463f555f61bd153d313a69fddaf9 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 00:38:42 -0500 Subject: [PATCH 196/604] whitespace --- src/compute_reduce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute_reduce.h b/src/compute_reduce.h index 85f207d1aec..ed95083892f 100644 --- a/src/compute_reduce.h +++ b/src/compute_reduce.h @@ -40,7 +40,7 @@ class ComputeReduce : public Compute { protected: int mode, nvalues, input_mode; std::string modestr; - + struct value_t { int which; int argindex; From f2412a3d2f79f2835a780064524b1f4b527b7bca Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 10:30:33 -0500 Subject: [PATCH 197/604] detect possible upcoming Windows 11 build 26H1 --- src/platform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/platform.cpp b/src/platform.cpp index c0728a34d0e..bedd716918f 100644 --- a/src/platform.cpp +++ b/src/platform.cpp @@ -258,6 +258,8 @@ std::string platform::os_info() buf = "Windows 11 24H2"; } else if (build == "26200") { buf = "Windows 11 25H2"; + } else if (build == "28000") { + buf = "Windows 11 26H1"; } else { buf = "Windows Build " + build; } From a484e7185bb868a25ebac6bc27e766a7a5d7b07e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 11:29:04 -0500 Subject: [PATCH 198/604] raise access protection for region internal functions --- src/region.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/region.h b/src/region.h index b14f966ff1b..8e4ec00b4e5 100644 --- a/src/region.h +++ b/src/region.h @@ -17,8 +17,15 @@ #include "pointers.h" // IWYU pragma: export namespace LAMMPS_NS { +class DumpImage; +class RegIntersect; +class RegUnion; class Region : protected Pointers { + friend DumpImage; + friend RegIntersect; + friend RegUnion; + public: enum { CONSTANT, VARIABLE, NONE }; @@ -88,17 +95,20 @@ class Region : protected Pointers { virtual void length_restart_string(int &); virtual void reset_vel(); - // implemented by each region, not called by other classes + protected: + + // implemented by each region, generally not called by other classes virtual int inside(double, double, double) = 0; virtual int surface_interior(double *, double) = 0; virtual int surface_exterior(double *, double) = 0; virtual void shape_update() {} + virtual void bbox_update() {} + virtual void pretransform(); virtual void set_velocity_shape() {} virtual void velocity_contact_shape(double *, double *) {} - protected: void add_contact(int, double *, double, double, double); void options(int, char **); void point_on_line_segment(double *, double *, double *, double *); From 1253b7feefdd2eab27f1b8af759c5dd05a8279db Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 12:23:20 -0500 Subject: [PATCH 199/604] use Region::forward_transform() to have general support for tranlated and rotated regions --- doc/src/dump_image.rst | 28 +-- src/dump_image.cpp | 423 ++++++++++++++++++++++++----------------- src/region.h | 6 +- 3 files changed, 263 insertions(+), 194 deletions(-) diff --git a/doc/src/dump_image.rst b/doc/src/dump_image.rst index f513630b93d..04300330c78 100644 --- a/doc/src/dump_image.rst +++ b/doc/src/dump_image.rst @@ -530,20 +530,20 @@ change this via the dump_modify command. .. versionadded:: 10Sep2025 -The *region* keyword can be used to create a graphical representation -of a :doc:`region `. This can be helpful in debugging the -location and extent of regions, especially when those have parameters -controlled by variables. Three styles of representing a region are -available: *filled*, *frame*, and *points*. With style *filled* the -surface of the region is drawn. For region styles that support open -faces, surfaces are not drawn for such open faces. Draw style *frame* -represents the region with a mesh of "wires" the diameter of which can -be set. Unlike with *filled*, you can look inside the region with this -draw style. The third draw style *points* generates a random point -cloud inside the simulation box and draws only those points that are -within the region. Draw styles *filled* and *frame* support only -"primitive" region style (no unions or intersections), but the *points* -draw style supports all region styles. +The *region* keyword can be used to create a graphical representation of +a :doc:`region `. This can be helpful in debugging the location +and extent of regions, especially when those have parameters controlled +by variables. Three styles of representing a region are available: +*filled*, *frame*, and *points*. With style *filled* the surface of the +region is drawn. For region styles that support open faces, surfaces +are not drawn for such open faces. Draw style *frame* represents the +region with a mesh of "wires". The diameter of these "wires" can be +set. Unlike with the *filled* style, you can see what is *inside* the +region with this draw style. The third draw style *points* generates a +random point cloud inside the simulation box and draws only those points +that are within the region. Draw styles *filled* and *frame* support +only "primitive" region style (no unions or intersections), but the +*points* draw style supports all region styles. ---------- diff --git a/src/dump_image.cpp b/src/dump_image.cpp index 1bddd0c16c3..bb958b46646 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -127,7 +127,7 @@ void scale_and_displace_triangle(triangle &tri, const double *radius, const vec3 } void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, double diameter, - const double *center, const double *radius) + const double *center, const double *radius, LAMMPS_NS::Region *reg) { vec3 offset = {center[0], center[1], center[2]}; @@ -148,6 +148,9 @@ void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, if (level <= 1) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_cylinder(tri[0].data(), tri[1].data(), color, diameter, 3); img->draw_cylinder(tri[0].data(), tri[2].data(), color, diameter, 3); img->draw_cylinder(tri[1].data(), tri[2].data(), color, diameter, 3); @@ -160,6 +163,9 @@ void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, if (level == 2) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_cylinder(tri[0].data(), tri[1].data(), color, diameter, 3); img->draw_cylinder(tri[0].data(), tri[2].data(), color, diameter, 3); img->draw_cylinder(tri[1].data(), tri[2].data(), color, diameter, 3); @@ -173,6 +179,9 @@ void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, if (level == 3) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_cylinder(tri[0].data(), tri[1].data(), color, diameter, 3); img->draw_cylinder(tri[0].data(), tri[2].data(), color, diameter, 3); img->draw_cylinder(tri[1].data(), tri[2].data(), color, diameter, 3); @@ -185,6 +194,9 @@ void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_cylinder(tri[0].data(), tri[1].data(), color, diameter, 3); img->draw_cylinder(tri[0].data(), tri[2].data(), color, diameter, 3); img->draw_cylinder(tri[1].data(), tri[2].data(), color, diameter, 3); @@ -193,7 +205,7 @@ void ellipsoid2wireframe(LAMMPS_NS::Image *img, int level, const double *color, } void ellipsoid2filled(LAMMPS_NS::Image *img, int level, const double *color, - const double *center, const double *radius) + const double *center, const double *radius, LAMMPS_NS::Region *reg) { vec3 offset = {center[0], center[1], center[2]}; @@ -213,6 +225,9 @@ void ellipsoid2filled(LAMMPS_NS::Image *img, int level, const double *color, if (level <= 1) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_triangle(tri[0].data(), tri[1].data(), tri[2].data(), color); } } @@ -224,6 +239,9 @@ void ellipsoid2filled(LAMMPS_NS::Image *img, int level, const double *color, if (level == 2) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_triangle(tri[0].data(), tri[1].data(), tri[2].data(), color); } } @@ -235,6 +253,9 @@ void ellipsoid2filled(LAMMPS_NS::Image *img, int level, const double *color, if (level == 3) { for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_triangle(tri[0].data(), tri[1].data(), tri[2].data(), color); } } @@ -245,6 +266,9 @@ void ellipsoid2filled(LAMMPS_NS::Image *img, int level, const double *color, for (auto &tri : trilist) { scale_and_displace_triangle(tri, radius, offset); + reg->forward_transform(tri[0][0], tri[0][1], tri[0][2]); + reg->forward_transform(tri[1][0], tri[1][1], tri[1][2]); + reg->forward_transform(tri[2][0], tri[2][1], tri[2][2]); img->draw_triangle(tri[0].data(), tri[1].data(), tri[2].data(), color); } } @@ -1704,24 +1728,11 @@ void DumpImage::create_image() if (!ptr) error->all(FLERR, "Dump image region {} does not exist", reg.id); reg.ptr = ptr; - if (reg.ptr->rotateflag) { - utils::logmesg(lmp, "Cannot (yet) handle rotating region {}. Skipping...\n", reg.ptr->id); - continue; - } - // update internal variables - reg.ptr->prematch(); - // compute position offset for moving regions + reg.ptr->prematch(); - double dx = 0.0; - double dy = 0.0; - double dz = 0.0; - if (reg.ptr->moveflag) { - dx = reg.ptr->dx; - dy = reg.ptr->dy; - dz = reg.ptr->dz; - } + // for POINTS style we have the same code for all region styles if (reg.style == POINTS) { int seed = (int)(platform::walltime()*1000000) % 1000000; @@ -1748,29 +1759,30 @@ void DumpImage::create_image() pos[1] = rand.uniform() * ylen + yoff; pos[2] = rand.uniform() * zlen + zoff; if (reg.ptr->inside(pos[0], pos[1], pos[2])) { - pos[0] += dx; - pos[1] += dy; - pos[2] += dz; + reg.ptr->forward_transform(pos[0], pos[1], pos[2]); image->draw_sphere(pos, reg.color, reg.diameter); } } - } else { + } else { std::string regstyle = reg.ptr->style; + if (regstyle == "block") { auto *myreg = dynamic_cast(reg.ptr); // inconsistent style. should not happen. if (!myreg) continue; double block[8][3]; - block[0][0] = myreg->xlo + dx; block[0][1] = myreg->ylo + dy; block[0][2] = myreg->zlo + dz; - block[1][0] = myreg->xlo + dx; block[1][1] = myreg->ylo + dy; block[1][2] = myreg->zhi + dz; - block[2][0] = myreg->xlo + dx; block[2][1] = myreg->yhi + dy; block[2][2] = myreg->zhi + dz; - block[3][0] = myreg->xlo + dx; block[3][1] = myreg->yhi + dy; block[3][2] = myreg->zlo + dz; - block[4][0] = myreg->xhi + dx; block[4][1] = myreg->ylo + dy; block[4][2] = myreg->zlo + dz; - block[5][0] = myreg->xhi + dx; block[5][1] = myreg->ylo + dy; block[5][2] = myreg->zhi + dz; - block[6][0] = myreg->xhi + dx; block[6][1] = myreg->yhi + dy; block[6][2] = myreg->zhi + dz; - block[7][0] = myreg->xhi + dx; block[7][1] = myreg->yhi + dy; block[7][2] = myreg->zlo + dz; + block[0][0] = myreg->xlo; block[0][1] = myreg->ylo; block[0][2] = myreg->zlo; + block[1][0] = myreg->xlo; block[1][1] = myreg->ylo; block[1][2] = myreg->zhi; + block[2][0] = myreg->xlo; block[2][1] = myreg->yhi; block[2][2] = myreg->zhi; + block[3][0] = myreg->xlo; block[3][1] = myreg->yhi; block[3][2] = myreg->zlo; + block[4][0] = myreg->xhi; block[4][1] = myreg->ylo; block[4][2] = myreg->zlo; + block[5][0] = myreg->xhi; block[5][1] = myreg->ylo; block[5][2] = myreg->zhi; + block[6][0] = myreg->xhi; block[6][1] = myreg->yhi; block[6][2] = myreg->zhi; + block[7][0] = myreg->xhi; block[7][1] = myreg->yhi; block[7][2] = myreg->zlo; + for (int i = 0; i < 8; ++i) + reg.ptr->forward_transform(block[i][0], block[i][1], block[i][2]); if (reg.style == FRAME) { image->draw_cylinder(block[0],block[1],reg.color,reg.diameter,3); @@ -1811,6 +1823,7 @@ void DumpImage::create_image() image->draw_triangle(block[6], block[7], block[3], reg.color); } } + } else if (regstyle == "cone") { auto *myreg = dynamic_cast(reg.ptr); // inconsistent style. should not happen. @@ -1818,72 +1831,84 @@ void DumpImage::create_image() double lo[3], hi[3]; if (myreg->axis == 'x') { - lo[0] = myreg->lo + dx; - lo[1] = myreg->c1 + dy; - lo[2] = myreg->c2 + dz; - hi[0] = myreg->hi + dx; - hi[1] = myreg->c1 + dy; - hi[2] = myreg->c2 + dz; + lo[0] = myreg->lo; + lo[1] = myreg->c1; + lo[2] = myreg->c2; + hi[0] = myreg->hi; + hi[1] = myreg->c1; + hi[2] = myreg->c2; } else if (myreg->axis == 'y') { - lo[0] = myreg->c1 + dx; - lo[1] = myreg->lo + dy; - lo[2] = myreg->c2 + dz; - hi[0] = myreg->c1 + dx; - hi[1] = myreg->hi + dy; - hi[2] = myreg->c2 + dz; + lo[0] = myreg->c1; + lo[1] = myreg->lo; + lo[2] = myreg->c2; + hi[0] = myreg->c1; + hi[1] = myreg->hi; + hi[2] = myreg->c2; } else { // myreg->axis == 'z' - lo[0] = myreg->c1 + dx; - lo[1] = myreg->c2 + dy; - lo[2] = myreg->lo + dz; - hi[0] = myreg->c1 + dx; - hi[1] = myreg->c2 + dy; - hi[2] = myreg->hi + dz; + lo[0] = myreg->c1; + lo[1] = myreg->c2; + lo[2] = myreg->lo; + hi[0] = myreg->c1; + hi[1] = myreg->c2; + hi[2] = myreg->hi; } double p1[3], p2[3], p3[3], p4[3]; if (reg.style == FRAME) { for (int i = 0; i < RESOLUTION; ++i) { if (myreg->axis == 'x') { - p1[0] = p2[0] = myreg->lo + dx; - p3[0] = p4[0] = myreg->hi + dx; - p1[1] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dy; - p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dz; - p2[1] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dy; - p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dz; - p3[1] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dy; - p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dz; - p4[1] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dy; - p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[0] = p2[0] = myreg->lo; + p3[0] = p4[0] = myreg->hi; + p1[1] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[1] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[1] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[1] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); image->draw_cylinder(p2, p4, reg.color, reg.diameter, 3); } else if (myreg->axis == 'y') { - p1[1] = p2[1] = myreg->lo + dy; - p3[1] = p4[1] = myreg->hi + dy; - p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dx; - p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dz; - p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dz; - p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dx; - p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dz; - p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dx; - p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[1] = p2[1] = myreg->lo; + p3[1] = p4[1] = myreg->hi; + p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); image->draw_cylinder(p2, p4, reg.color, reg.diameter, 3); } else { // if (myreg->axis == 'z') - p1[2] = p2[2] = myreg->lo + dz; - p3[2] = p4[2] = myreg->hi + dz; - p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dx; - p1[1] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dy; - p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[1] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dy; - p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dx; - p3[1] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dy; - p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dx; - p4[1] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dy; + p1[2] = p2[2] = myreg->lo; + p3[2] = p4[2] = myreg->hi; + p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[1] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[1] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[1] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[1] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); @@ -1893,16 +1918,20 @@ void DumpImage::create_image() } else if (reg.style == FILLED) { for (int i = 0; i < RESOLUTION; ++i) { if (myreg->axis == 'x') { - p1[0] = p2[0] = myreg->lo + dx; - p3[0] = p4[0] = myreg->hi + dx; - p1[1] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dy; - p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dz; - p2[1] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dy; - p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dz; - p3[1] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dy; - p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dz; - p4[1] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dy; - p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[0] = p2[0] = myreg->lo; + p3[0] = p4[0] = myreg->hi; + p1[1] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[1] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[1] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[1] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -1911,16 +1940,20 @@ void DumpImage::create_image() } } else if (myreg->axis == 'y') { - p1[1] = p2[1] = myreg->lo + dy; - p3[1] = p4[1] = myreg->hi + dy; - p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dx; - p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dz; - p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dz; - p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dx; - p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dz; - p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dx; - p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[1] = p2[1] = myreg->lo; + p3[1] = p4[1] = myreg->hi; + p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[2] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[2] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[2] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -1928,16 +1961,20 @@ void DumpImage::create_image() image->draw_triangle(p2, p4, p3, reg.color); } } else { // if (myreg->axis == 'z') - p1[2] = p2[2] = myreg->lo + dz; - p3[2] = p4[2] = myreg->hi + dz; - p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1 + dx; - p1[1] = myreg->radiuslo * cos(RADINC * i) + myreg->c2 + dy; - p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[1] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2 + dy; - p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1 + dx; - p3[1] = myreg->radiushi * cos(RADINC * i) + myreg->c2 + dy; - p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1 + dx; - p4[1] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2 + dy; + p1[2] = p2[2] = myreg->lo; + p3[2] = p4[2] = myreg->hi; + p1[0] = myreg->radiuslo * sin(RADINC * i) + myreg->c1; + p1[1] = myreg->radiuslo * cos(RADINC * i) + myreg->c2; + p2[0] = myreg->radiuslo * sin(RADINC * (i+1)) + myreg->c1; + p2[1] = myreg->radiuslo * cos(RADINC * (i+1)) + myreg->c2; + p3[0] = myreg->radiushi * sin(RADINC * i) + myreg->c1; + p3[1] = myreg->radiushi * cos(RADINC * i) + myreg->c2; + p4[0] = myreg->radiushi * sin(RADINC * (i+1)) + myreg->c1; + p4[1] = myreg->radiushi * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -1947,6 +1984,7 @@ void DumpImage::create_image() } } } + } else if (regstyle == "cylinder") { auto *myreg = dynamic_cast(reg.ptr); // inconsistent style. should not happen. @@ -1954,60 +1992,72 @@ void DumpImage::create_image() double lo[3], hi[3]; if (myreg->axis == 'x') { - lo[0] = myreg->lo + dx; - lo[1] = myreg->c1 + dy; - lo[2] = myreg->c2 + dz; - hi[0] = myreg->hi + dx; - hi[1] = myreg->c1 + dy; - hi[2] = myreg->c2 + dz; + lo[0] = myreg->lo; + lo[1] = myreg->c1; + lo[2] = myreg->c2; + hi[0] = myreg->hi; + hi[1] = myreg->c1; + hi[2] = myreg->c2; } else if (myreg->axis == 'y') { - lo[0] = myreg->c1 + dx; - lo[1] = myreg->lo + dy; - lo[2] = myreg->c2 + dz; - hi[0] = myreg->c1 + dx; - hi[1] = myreg->hi + dy; - hi[2] = myreg->c2 + dz; + lo[0] = myreg->c1; + lo[1] = myreg->lo; + lo[2] = myreg->c2; + hi[0] = myreg->c1; + hi[1] = myreg->hi; + hi[2] = myreg->c2; } else { // myreg->axis == 'z' - lo[0] = myreg->c1 + dx; - lo[1] = myreg->c2 + dy; - lo[2] = myreg->lo + dz; - hi[0] = myreg->c1 + dx; - hi[1] = myreg->c2 + dy; - hi[2] = myreg->hi + dz; + lo[0] = myreg->c1; + lo[1] = myreg->c2; + lo[2] = myreg->lo; + hi[0] = myreg->c1; + hi[1] = myreg->c2; + hi[2] = myreg->hi; } double p1[3], p2[3], p3[3], p4[3]; if (reg.style == FRAME) { for (int i = 0; i < RESOLUTION; ++i) { if (myreg->axis == 'x') { - p1[0] = p2[0] = myreg->lo + dx; - p3[0] = p4[0] = myreg->hi + dx; - p1[1] = p3[1] = myreg->radius * sin(RADINC * i) + myreg->c1 + dy; - p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2 + dz; - p2[1] = p4[1] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dy; - p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[0] = p2[0] = myreg->lo; + p3[0] = p4[0] = myreg->hi; + p1[1] = p3[1] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[1] = p4[1] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); image->draw_cylinder(p2, p4, reg.color, reg.diameter, 3); } else if (myreg->axis == 'y') { - p1[1] = p2[1] = myreg->lo + dy; - p3[1] = p4[1] = myreg->hi + dy; - p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1 + dx; - p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2 + dz; - p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[1] = p2[1] = myreg->lo; + p3[1] = p4[1] = myreg->hi; + p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); image->draw_cylinder(p2, p4, reg.color, reg.diameter, 3); } else { // if (myreg->axis == 'z') - p1[2] = p2[2] = myreg->lo + dz; - p3[2] = p4[2] = myreg->hi + dz; - p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1 + dx; - p1[1] = p3[1] = myreg->radius * cos(RADINC * i) + myreg->c2 + dy; - p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[1] = p4[1] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dy; + p1[2] = p2[2] = myreg->lo; + p3[2] = p4[2] = myreg->hi; + p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[1] = p3[1] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[1] = p4[1] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); image->draw_cylinder(p1, p2, reg.color, reg.diameter, 3); image->draw_cylinder(p3, p4, reg.color, reg.diameter, 3); image->draw_cylinder(p1, p3, reg.color, reg.diameter, 3); @@ -2017,12 +2067,16 @@ void DumpImage::create_image() } else if (reg.style == FILLED) { for (int i = 0; i < RESOLUTION; ++i) { if (myreg->axis == 'x') { - p1[0] = p2[0] = myreg->lo + dx; - p3[0] = p4[0] = myreg->hi + dx; - p1[1] = p3[1] = myreg->radius * sin(RADINC * i) + myreg->c1 + dy; - p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2 + dz; - p2[1] = p4[1] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dy; - p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[0] = p2[0] = myreg->lo; + p3[0] = p4[0] = myreg->hi; + p1[1] = p3[1] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[1] = p4[1] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -2030,12 +2084,16 @@ void DumpImage::create_image() image->draw_triangle(p3, p4, p2, reg.color); } } else if (myreg->axis == 'y') { - p1[1] = p2[1] = myreg->lo + dy; - p3[1] = p4[1] = myreg->hi + dy; - p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1 + dx; - p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2 + dz; - p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dz; + p1[1] = p2[1] = myreg->lo; + p3[1] = p4[1] = myreg->hi; + p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[2] = p3[2] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[2] = p4[2] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -2043,12 +2101,16 @@ void DumpImage::create_image() image->draw_triangle(p3, p4, p2, reg.color); } } else { // if (myreg->axis == 'z') - p1[2] = p2[2] = myreg->lo + dz; - p3[2] = p4[2] = myreg->hi + dz; - p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1 + dx; - p1[1] = p3[1] = myreg->radius * cos(RADINC * i) + myreg->c2 + dy; - p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1 + dx; - p2[1] = p4[1] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2 + dy; + p1[2] = p2[2] = myreg->lo; + p3[2] = p4[2] = myreg->hi; + p1[0] = p3[0] = myreg->radius * sin(RADINC * i) + myreg->c1; + p1[1] = p3[1] = myreg->radius * cos(RADINC * i) + myreg->c2; + p2[0] = p4[0] = myreg->radius * sin(RADINC * (i+1)) + myreg->c1; + p2[1] = p4[1] = myreg->radius * cos(RADINC * (i+1)) + myreg->c2; + myreg->forward_transform(p1[0], p1[1], p1[2]); + myreg->forward_transform(p2[0], p2[1], p2[2]); + myreg->forward_transform(p3[0], p3[1], p3[2]); + myreg->forward_transform(p4[0], p4[1], p4[2]); if (!myreg->open_faces[0]) image->draw_triangle(p1, p2, lo, reg.color); if (!myreg->open_faces[1]) image->draw_triangle(p3, p4, hi, reg.color); if (!myreg->open_faces[2]) { @@ -2058,35 +2120,39 @@ void DumpImage::create_image() } } } + } else if (regstyle == "ellipsoid") { auto *myreg = dynamic_cast(reg.ptr); // inconsistent style. should not happen. if (!myreg) continue; double center[3]; - center[0] = myreg->xc + dx; - center[1] = myreg->yc + dy; - center[2] = myreg->zc + dz; + center[0] = myreg->xc; + center[1] = myreg->yc; + center[2] = myreg->zc; double radius[3] = {myreg->a, myreg->b, myreg->c}; if (reg.style == FRAME) { - ellipsoid2wireframe(image, 4, reg.color, reg.diameter, center, radius); + ellipsoid2wireframe(image, 4, reg.color, reg.diameter, center, radius, reg.ptr); } else if (reg.style == FILLED) { - ellipsoid2filled(image, 4, reg.color, center, radius); + ellipsoid2filled(image, 4, reg.color, center, radius, reg.ptr); } + } else if (regstyle == "prism") { auto *myreg = dynamic_cast(reg.ptr); // inconsistent style. should not happen. if (!myreg) continue; double block[8][3]; - block[0][0] = myreg->xlo + dx; block[0][1] = myreg->ylo + dy; block[0][2] = myreg->zlo + dz; - block[1][0] = myreg->xlo + myreg->xz + dx; block[1][1] = myreg->ylo + myreg->yz + dy; block[1][2] = myreg->zhi + dz; - block[2][0] = myreg->xlo + myreg->xy + myreg->xz + dx; block[2][1] = myreg->yhi + myreg->yz + dy; block[2][2] = myreg->zhi + dz; - block[3][0] = myreg->xlo + myreg->xy + dx; block[3][1] = myreg->yhi + dy; block[3][2] = myreg->zlo + dz; - block[4][0] = myreg->xhi + dx; block[4][1] = myreg->ylo + dy; block[4][2] = myreg->zlo + dz; - block[5][0] = myreg->xhi + myreg->xz + dx; block[5][1] = myreg->ylo + myreg->yz + dy; block[5][2] = myreg->zhi + dz; - block[6][0] = myreg->xhi + myreg->xy + myreg->xz + dx; block[6][1] = myreg->yhi + myreg->yz + dy; block[6][2] = myreg->zhi + dz; - block[7][0] = myreg->xhi + myreg->xy + dx; block[7][1] = myreg->yhi + dy; block[7][2] = myreg->zlo + dz; + block[0][0] = myreg->xlo; block[0][1] = myreg->ylo; block[0][2] = myreg->zlo; + block[1][0] = myreg->xlo + myreg->xz; block[1][1] = myreg->ylo + myreg->yz; block[1][2] = myreg->zhi; + block[2][0] = myreg->xlo + myreg->xy + myreg->xz; block[2][1] = myreg->yhi + myreg->yz; block[2][2] = myreg->zhi; + block[3][0] = myreg->xlo + myreg->xy; block[3][1] = myreg->yhi; block[3][2] = myreg->zlo; + block[4][0] = myreg->xhi; block[4][1] = myreg->ylo; block[4][2] = myreg->zlo; + block[5][0] = myreg->xhi + myreg->xz; block[5][1] = myreg->ylo + myreg->yz; block[5][2] = myreg->zhi; + block[6][0] = myreg->xhi + myreg->xy + myreg->xz; block[6][1] = myreg->yhi + myreg->yz; block[6][2] = myreg->zhi; + block[7][0] = myreg->xhi + myreg->xy; block[7][1] = myreg->yhi; block[7][2] = myreg->zlo; + for (int i = 0; i < 8; ++i) + reg.ptr->forward_transform(block[i][0], block[i][1], block[i][2]); if (reg.style == FRAME) { image->draw_cylinder(block[0],block[1],reg.color,reg.diameter,3); @@ -2133,13 +2199,14 @@ void DumpImage::create_image() if (!myreg) continue; double center[3]; - center[0] = myreg->xc + dx; - center[1] = myreg->yc + dy; - center[2] = myreg->zc + dz; + center[0] = myreg->xc; + center[1] = myreg->yc; + center[2] = myreg->zc; if (reg.style == FRAME) { double radius[3] = {myreg->radius,myreg->radius,myreg->radius}; - ellipsoid2wireframe(image, 4, reg.color, reg.diameter, center, radius); + ellipsoid2wireframe(image, 4, reg.color, reg.diameter, center, radius, reg.ptr); } else if (reg.style == FILLED) { + myreg->forward_transform(center[0], center[1], center[2]); image->draw_sphere(center, reg.color, 2.0 * myreg->radius); } } else { diff --git a/src/region.h b/src/region.h index 8e4ec00b4e5..1bc0f58f750 100644 --- a/src/region.h +++ b/src/region.h @@ -95,7 +95,10 @@ class Region : protected Pointers { virtual void length_restart_string(int &); virtual void reset_vel(); - protected: + // track translation and rotation + void forward_transform(double &, double &, double &); + +protected: // implemented by each region, generally not called by other classes @@ -112,7 +115,6 @@ class Region : protected Pointers { void add_contact(int, double *, double, double, double); void options(int, char **); void point_on_line_segment(double *, double *, double *, double *); - void forward_transform(double &, double &, double &); double point[3], runit[3]; private: From e53b891b8af8451e2121e082c7fe08477b7e7080 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 14:07:54 -0500 Subject: [PATCH 200/604] Update thermo_modify.rst --- doc/src/thermo_modify.rst | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/src/thermo_modify.rst b/doc/src/thermo_modify.rst index dcbe3135081..40f54d77b05 100644 --- a/doc/src/thermo_modify.rst +++ b/doc/src/thermo_modify.rst @@ -21,7 +21,7 @@ Syntax *norm* value = *yes* or *no* *flush* value = *yes* or *no* *line* value = *one* or *multi* or *yaml* - *colname* values = ID string, or *default* + *colname* values = ID string, or *auto* or *default* string = new column header name ID = integer from 1 to N, or integer from -1 to -N, where N = # of quantities being output *or* a thermo keyword or reference to compute, fix, property or variable. @@ -158,14 +158,20 @@ block ("yaml"). This modify option overrides the *one*, *multi*, or .. versionadded:: 4May2022 -The *colname* keyword can be used to change the default header keyword -for a column or field of thermodynamic output. The setting for *ID -string* replaces the default text with the provided string. *ID* can be -a positive integer when it represents the column number counting from -the left, a negative integer when it represents the column number from -the right (i.e., :math:`-1` is the last column/keyword), or a thermo keyword -(or compute, fix, property, or variable reference) and then it replaces the -string for that specific thermo keyword. +The *colname* keyword can be used to change the default header keyword for +a column or field of thermodynamic output. The column names can either be +manually set by the user, or automatically generated for certain fixes and +computes. The setting for *ID string* replaces the default text with the +provided string. *ID* can be a positive integer when it represents the +column number counting from the left, a negative integer when it represents +the column number from the right (i.e., :math:`-1` is the last +column/keyword), or a thermo keyword (or compute, fix, property, or +variable reference) and then it replaces the string for that specific +thermo keyword. With a setting of *auto*, certain fixes or computes will +generate more descriptive strings as their thermo keywords, which are +described in the 'output' section of their documentation. Current commands +that automatically generate descriptive thermo output strings include 'fix +nvt', 'fix npt', 'fix nph', 'compute reduce', and 'fix bond/react'. The *colname* keyword can be used multiple times. If multiple *colname* settings refer to the same keyword, the last setting has precedence. A From dbec5a513703a0507565edbe41528baaf88d15dc Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 14:33:32 -0500 Subject: [PATCH 201/604] Update fix_bond_react.rst --- doc/src/fix_bond_react.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/src/fix_bond_react.rst b/doc/src/fix_bond_react.rst index 2df98621784..00d83f617ed 100644 --- a/doc/src/fix_bond_react.rst +++ b/doc/src/fix_bond_react.rst @@ -788,6 +788,13 @@ There is one quantity in the global vector for each *react* argument: (1) cumulative number of reactions that occurred +This fix supports automatically generated thermo column names when using +:doc:`thermo_modify colname auto `. The thermo column names +are 'f_', followed by the fix ID, followed by a colon, followed by the +react-ID. E.g., the first example in the Examples section above would +print a thermo column name of 'f_5:myrxn1', compared to the default column +output name of 'f_5[1]'. + No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. From 162d1770eb80178727592e1054d1cf664fc45036 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 14:43:27 -0500 Subject: [PATCH 202/604] Update fix_nh.rst --- doc/src/fix_nh.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/src/fix_nh.rst b/doc/src/fix_nh.rst index 0a4076364cb..3ea95f94aff 100644 --- a/doc/src/fix_nh.rst +++ b/doc/src/fix_nh.rst @@ -642,6 +642,15 @@ by tchain for eta_dot, followed by ndof for omega, etc: * KE_etap_dot[pchain] = kinetic energy of each barostat thermostat velocity (energy units) * PE_strain[1] = scalar strain energy (energy units) +This fix supports automatically generated thermo column names when using +:doc:`thermo_modify colname auto `. The thermo column names +are 'f_', followed by the fix ID, followed by a colon, followed by a +keyword listed above, followed by an index for that keyword. Indices range +from 1 to the number of values for that keyword. E.g., the first example +in the Examples section above would print a thermo column name of +'f_1:eta[1]', compared to the default column output name of 'f_1[1]'. +Similarly, f_1:eta_dot[1] would be printed instead of the default 'f_1[4]'. + These fixes can ramp their external temperature and pressure over multiple runs, using the *start* and *stop* keywords of the :doc:`run ` command. See the :doc:`run ` command for details of From b054c1773f43152476867743618086afff2dea56 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 14:55:02 -0500 Subject: [PATCH 203/604] correct docs example does not run otherwise! --- doc/src/compute_reduce.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index d9aa7e18211..9502db470e0 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -189,7 +189,7 @@ with maximum stretch, you can do it as follows: compute 1 all property/local batom1 batom2 compute 2 all bond/local dist - compute 3 all reduce max c_1[1] c_1[2] c_2 replace 1 3 replace 2 3 + compute 3 all reduce max c_1[1] c_1[2] c_2 replace 1 3 replace 2 3 inputs local thermo_style custom step temp c_3[1] c_3[2] c_3[3] The first two input values in the compute reduce command are vectors From 9a2f7117e17804d18fc1b986d7857292d15d1bc8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 15:46:37 -0500 Subject: [PATCH 204/604] update bounding box for variable and moving region block --- src/region.cpp | 1 + src/region_block.cpp | 79 ++++++++++++++++++++++++++++++++++++-------- src/region_block.h | 1 + 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/src/region.cpp b/src/region.cpp index 1615ad59528..1b4aac8251a 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -232,6 +232,7 @@ void Region::pretransform() if (zstr) dz = input->variable->compute_equal(zvar); } if (rotateflag) theta = input->variable->compute_equal(tvar); + bbox_update(); } /* ---------------------------------------------------------------------- diff --git a/src/region_block.cpp b/src/region_block.cpp index 1bd570b7d9d..a12be68662c 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -20,10 +20,11 @@ #include "variable.h" #include +#include using namespace LAMMPS_NS; -static constexpr double BIG = 1.0e20; +static constexpr double BIG = 1.0e200; /* ---------------------------------------------------------------------- */ @@ -154,18 +155,21 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : if (ylo > yhi) error->all(FLERR, "Illegal region block ylo: {} >= yhi: {}", ylo, yhi); if (zlo > zhi) error->all(FLERR, "Illegal region block zlo: {} >= zhi: {}", zlo, zhi); - // extent of block - - if (interior && !dynamic && !varshape) { - bboxflag = 1; - extent_xlo = xlo; - extent_xhi = xhi; - extent_ylo = ylo; - extent_yhi = yhi; - extent_zlo = zlo; - extent_zhi = zhi; - } else - bboxflag = 0; + // set extent of block + + if (interior) { + if (dynamic || varshape) { + RegBlock::bbox_update(); + } else { + bboxflag = 1; + extent_xlo = xlo; + extent_xhi = xhi; + extent_ylo = ylo; + extent_yhi = yhi; + extent_zlo = zlo; + extent_zhi = zhi; + } + } else bboxflag = 0; // particle could be close to all 6 planes // particle can only touch 3 planes @@ -437,7 +441,7 @@ int RegBlock::surface_exterior(double *x, double cutoff) change region shape via variable evaluation ------------------------------------------------------------------------- */ -void RegBlock::shape_update() // addition +void RegBlock::shape_update() { if (xlostyle == VARIABLE) xlo = xscale * input->variable->compute_equal(xlovar); if (xhistyle == VARIABLE) xhi = xscale * input->variable->compute_equal(xhivar); @@ -508,6 +512,53 @@ void RegBlock::shape_update() // addition MathExtra::copy3(corners[0][2], corners[5][3]); } +/* update the boundary information based on the corners */ + +void RegBlock::bbox_update() +{ + if (interior) { + if (varshape || dynamic) { + double pos[3]; + double xmin = BIG; + double xmax = -BIG; + double ymin = BIG; + double ymax = -BIG; + double zmin = BIG; + double zmax = -BIG; + + // the corners of face[0] and face[1] cover the full extent of the region + // transform and get min/max in x-, y-, and z-direction for each corner + + for (int i = 0; i < 4; ++i) { + MathExtra::copy3(corners[0][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + MathExtra::copy3(corners[1][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + } + + bboxflag = 1; + extent_xlo = xmin; + extent_xhi = xmax; + extent_ylo = ymin; + extent_yhi = ymax; + extent_zlo = zmin; + extent_zhi = zmax; + } + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ diff --git a/src/region_block.h b/src/region_block.h index 330faccd31a..ec33fcfd67d 100644 --- a/src/region_block.h +++ b/src/region_block.h @@ -37,6 +37,7 @@ class RegBlock : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; protected: double xlo, xhi, ylo, yhi, zlo, zhi; From ed7927d9b91f6dc55162adbb9cb9c692787952bf Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 16:39:44 -0500 Subject: [PATCH 205/604] Update compute_reduce.rst --- doc/src/compute_reduce.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 9502db470e0..b37f3ab611c 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -243,6 +243,19 @@ scalar or vector values from a compute as input. See the :doc:`Howto output ` doc page for an overview of LAMMPS output options. +This fix supports automatically generated thermo column names when using +:doc:`thermo_modify colname auto `. The thermo column names +are 'c_', followed by the compute ID, followed by a colon, followed by the +reduction operation (*mode*), followed by the compute being operated on in +parentheses. E.g., for the first in-text example above, the first printed +thermo column name would be 'c_2:min(c_myPress[1])', rather than the +default 'c_2[1]'. If the *replace* keyword is used, *vec1* of the *replace* +keyword is listed after the colon, followed by '<-', followed by the +reduction operation, followed by *vec2* of the *replace* keyword in +parentheses. E.g., for the second in-text example above, the first printed +thermo column name would be 'c_3:c_1[1]<-max(c_2)' rather than the default +'c_3[1]'. + All the scalar or vector values calculated by this compute are "intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on per-atom or local vectors, in which case the calculated values are From 5cb86896534a9017e167f5ded9197866b427930f Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 16:42:04 -0500 Subject: [PATCH 206/604] do what docs promise --- src/compute_reduce.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 0342478ea01..42d36c657e2 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -699,8 +699,9 @@ std::string ComputeReduce::get_thermo_colname(int m) { auto &val2 = values[replace[m]]; return fmt::format("c_{}:c_{}[{}]<-{}(c_{})", id, val1.id, val1.argindex, modestr, val2.id); } else { + if (m == -1) m = 0; // scalar auto &val = values[m]; - return fmt::format("c_{}:{}(c_{})", id, modestr, val.id); + return fmt::format("c_{}:{}(c_{}[{}])", id, modestr, val.id, val.argindex); } return "none"; } From 4a365d0b6316e13668427fc3608f69dba3e388d9 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 8 Nov 2025 16:47:03 -0500 Subject: [PATCH 207/604] doc typos --- doc/src/compute_reduce.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index b37f3ab611c..24a476dc7c2 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -243,15 +243,15 @@ scalar or vector values from a compute as input. See the :doc:`Howto output ` doc page for an overview of LAMMPS output options. -This fix supports automatically generated thermo column names when using -:doc:`thermo_modify colname auto `. The thermo column names -are 'c_', followed by the compute ID, followed by a colon, followed by the -reduction operation (*mode*), followed by the compute being operated on in -parentheses. E.g., for the first in-text example above, the first printed -thermo column name would be 'c_2:min(c_myPress[1])', rather than the -default 'c_2[1]'. If the *replace* keyword is used, *vec1* of the *replace* -keyword is listed after the colon, followed by '<-', followed by the -reduction operation, followed by *vec2* of the *replace* keyword in +This compute supports automatically generated thermo column names when +using :doc:`thermo_modify colname auto `. The thermo column +names are 'c_', followed by the compute ID, followed by a colon, followed +by the reduction operation (*mode*), followed by the compute being operated +on in parentheses. E.g., for the first in-text example above, the first +printed thermo column name would be 'c_2:min(c_myPress[1])', rather than +the default 'c_2[1]'. If the *replace* keyword is used, *vec1* of the +*replace* keyword is listed after the colon, followed by '<-', followed by +the reduction operation, followed by *vec2* of the *replace* keyword in parentheses. E.g., for the second in-text example above, the first printed thermo column name would be 'c_3:c_1[1]<-max(c_2)' rather than the default 'c_3[1]'. From 1484da936023576134994169fa4ae16099b196a8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 17:09:39 -0500 Subject: [PATCH 208/604] add dynamic bounding box support to cone region --- src/region.cpp | 1 + src/region_block.cpp | 4 +- src/region_cone.cpp | 161 +++++++++++++++++++++++++++++++++++-------- src/region_cone.h | 1 + 4 files changed, 138 insertions(+), 29 deletions(-) diff --git a/src/region.cpp b/src/region.cpp index 1b4aac8251a..8cd3cd3b234 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -35,6 +35,7 @@ Region::Region(LAMMPS *lmp, int /*narg*/, char **arg) : id = utils::strdup(arg[0]); style = utils::strdup(arg[1]); + bboxflag = 0; varshape = 0; xstr = ystr = zstr = tstr = nullptr; dx = dy = dz = 0.0; diff --git a/src/region_block.cpp b/src/region_block.cpp index a12be68662c..0c3ed5dbf5b 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -19,8 +19,8 @@ #include "math_extra.h" #include "variable.h" -#include #include +#include using namespace LAMMPS_NS; @@ -169,7 +169,7 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : extent_zlo = zlo; extent_zhi = zhi; } - } else bboxflag = 0; + } // particle could be close to all 6 planes // particle can only touch 3 planes diff --git a/src/region_cone.cpp b/src/region_cone.cpp index 26dda8ba2e0..1d4fc523a0e 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -20,14 +20,16 @@ #include "domain.h" #include "error.h" #include "input.h" +#include "math_extra.h" #include "variable.h" +#include #include #include using namespace LAMMPS_NS; -static constexpr double BIG = 1.0e20; +static constexpr double BIG = 1.0e200; /* ---------------------------------------------------------------------- */ @@ -264,33 +266,37 @@ RegCone::RegCone(LAMMPS *lmp, int narg, char **arg) : if (hi <= lo) error->all(FLERR, "Illegal cone length in region cone command"); // extent of cone - maxradius = ((radiuslo > radiushi) ? radiuslo : radiushi); + maxradius = std::max(radiuslo, radiushi); - if (interior && !dynamic && !varshape) { - bboxflag = 1; - if (axis == 'x') { - extent_xlo = lo; - extent_xhi = hi; - extent_ylo = c1 - maxradius; - extent_yhi = c1 + maxradius; - extent_zlo = c2 - maxradius; - extent_zhi = c2 + maxradius; - } - if (axis == 'y') { - extent_xlo = c1 - maxradius; - extent_xhi = c1 + maxradius; - extent_ylo = lo; - extent_yhi = hi; - extent_zlo = c2 - maxradius; - extent_zhi = c2 + maxradius; - } - if (axis == 'z') { - extent_xlo = c1 - maxradius; - extent_xhi = c1 + maxradius; - extent_ylo = c2 - maxradius; - extent_yhi = c2 + maxradius; - extent_zlo = lo; - extent_zhi = hi; + if (interior) { + if (dynamic || varshape) { + RegCone::bbox_update(); + } else { + bboxflag = 1; + if (axis == 'x') { + extent_xlo = lo; + extent_xhi = hi; + extent_ylo = c1 - maxradius; + extent_yhi = c1 + maxradius; + extent_zlo = c2 - maxradius; + extent_zhi = c2 + maxradius; + } + if (axis == 'y') { + extent_xlo = c1 - maxradius; + extent_xhi = c1 + maxradius; + extent_ylo = lo; + extent_yhi = hi; + extent_zlo = c2 - maxradius; + extent_zhi = c2 + maxradius; + } + if (axis == 'z') { + extent_xlo = c1 - maxradius; + extent_xhi = c1 + maxradius; + extent_ylo = c2 - maxradius; + extent_yhi = c2 + maxradius; + extent_zlo = lo; + extent_zhi = hi; + } } } else bboxflag = 0; @@ -804,6 +810,107 @@ void RegCone::shape_update() } } +/* update the boundary information */ + +void RegCone::bbox_update() +{ + if (varshape || dynamic) { + double corners[2][4][3], pos[3]; + double xmin, xmax, ymin, ymax, zmin, zmax; + + // define bounding box corners in region internal positions + + if (axis == 'x') { + xmin = lo; + xmax = hi; + ymin = c1 - maxradius; + ymax = c1 + maxradius; + zmin = c2 - maxradius; + zmax = c2 + maxradius; + } + if (axis == 'y') { + xmin = c1 - maxradius; + xmax = c1 + maxradius; + ymin = lo; + ymax = hi; + zmin = c2 - maxradius; + zmax = c2 + maxradius; + } + if (axis == 'z') { + xmin = c1 - maxradius; + xmax = c1 + maxradius; + ymin = c2 - maxradius; + ymax = c2 + maxradius; + zmin = lo; + zmax = hi; + } + + // face[0] + + corners[0][0][0] = xmin; + corners[0][0][1] = ymin; + corners[0][0][2] = zmin; + corners[0][1][0] = xmin; + corners[0][1][1] = ymin; + corners[0][1][2] = zmax; + corners[0][2][0] = xmin; + corners[0][2][1] = ymax; + corners[0][2][2] = zmax; + corners[0][3][0] = xmin; + corners[0][3][1] = ymax; + corners[0][3][2] = zmin; + + // face[1] + + corners[1][0][0] = xmax; + corners[1][0][1] = ymin; + corners[1][0][2] = zmin; + corners[1][1][0] = xmax; + corners[1][1][1] = ymin; + corners[1][1][2] = zmax; + corners[1][2][0] = xmax; + corners[1][2][1] = ymax; + corners[1][2][2] = zmax; + corners[1][3][0] = xmax; + corners[1][3][1] = ymax; + corners[1][3][2] = zmin; + + // the corners of face[0] and face[1] cover the full extent of the region + // transform and get min/max in x-, y-, and z-direction for each corner + + xmin = ymin = zmin = BIG; + xmax = ymax = zmax = -BIG; + + for (int i = 0; i < 4; ++i) { + MathExtra::copy3(corners[0][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + + MathExtra::copy3(corners[1][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + } + + bboxflag = 1; + extent_xlo = xmin; + extent_xhi = xmax; + extent_ylo = ymin; + extent_yhi = ymax; + extent_zlo = zmin; + extent_zhi = zmax; + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ diff --git a/src/region_cone.h b/src/region_cone.h index 31710a9e931..f2b082efe9c 100644 --- a/src/region_cone.h +++ b/src/region_cone.h @@ -36,6 +36,7 @@ class RegCone : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; private: char axis; From 0966d5aff4ca51ae257b0c366f77e2fd72be78c6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 20:34:48 -0500 Subject: [PATCH 209/604] add dynamic bounding box for remaining regions --- src/region_cone.cpp | 3 +- src/region_cylinder.cpp | 175 ++++++++++++++++++++++++++++++++-------- src/region_cylinder.h | 1 + src/region_prism.cpp | 86 +++++++++++++------- src/region_prism.h | 1 + src/region_sphere.cpp | 48 ++++++++--- src/region_sphere.h | 1 + 7 files changed, 241 insertions(+), 74 deletions(-) diff --git a/src/region_cone.cpp b/src/region_cone.cpp index 1d4fc523a0e..20df0d6f504 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -298,8 +298,7 @@ RegCone::RegCone(LAMMPS *lmp, int narg, char **arg) : extent_zhi = hi; } } - } else - bboxflag = 0; + } // particle could be close to cone surface and 2 ends // particle can only touch surface and 1 end diff --git a/src/region_cylinder.cpp b/src/region_cylinder.cpp index 6546366522a..f3e3bb118e9 100644 --- a/src/region_cylinder.cpp +++ b/src/region_cylinder.cpp @@ -17,19 +17,21 @@ #include "error.h" #include "input.h" #include "update.h" +#include "math_extra.h" #include "variable.h" +#include #include #include using namespace LAMMPS_NS; -static constexpr double BIG = 1.0e20; +static constexpr double BIG = 1.0e200; /* ---------------------------------------------------------------------- */ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : - Region(lmp, narg, arg), c1str(nullptr), c2str(nullptr), rstr(nullptr) + Region(lmp, narg, arg), c1str(nullptr), c2str(nullptr), rstr(nullptr) { c1style = c2style = CONSTANT; options(narg - 8, &arg[8]); @@ -54,6 +56,7 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : c1 = yscale * utils::numeric(FLERR, arg[3], false, lmp); c1style = CONSTANT; } + if (utils::strmatch(arg[4], "^v_")) { c2str = utils::strdup(arg[4] + 2); c2 = 0.0; @@ -63,7 +66,9 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : c2 = zscale * utils::numeric(FLERR, arg[4], false, lmp); c2style = CONSTANT; } + } else if (axis == 'y') { + if (utils::strmatch(arg[3], "^v_")) { c1str = utils::strdup(arg[3] + 2); c1 = 0.0; @@ -73,6 +78,7 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : c1 = xscale * utils::numeric(FLERR, arg[3], false, lmp); c1style = CONSTANT; } + if (utils::strmatch(arg[4], "^v_")) { c2str = utils::strdup(arg[4] + 2); c2 = 0.0; @@ -83,6 +89,7 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : c2style = CONSTANT; } } else if (axis == 'z') { + if (utils::strmatch(arg[3], "^v_")) { c1str = utils::strdup(arg[3] + 2); c1 = 0.0; @@ -92,6 +99,7 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : c1 = xscale * utils::numeric(FLERR, arg[3], false, lmp); c1style = CONSTANT; } + if (utils::strmatch(arg[4], "^v_")) { c2str = utils::strdup(arg[4] + 2); c2 = 0.0; @@ -195,44 +203,44 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : // extent of cylinder // for variable radius, uses initial radius - if (interior && !dynamic && !varshape) { - bboxflag = 1; - if (axis == 'x') { - extent_xlo = lo; - extent_xhi = hi; - extent_ylo = c1 - radius; - extent_yhi = c1 + radius; - extent_zlo = c2 - radius; - extent_zhi = c2 + radius; - } - if (axis == 'y') { - extent_xlo = c1 - radius; - extent_xhi = c1 + radius; - extent_ylo = lo; - extent_yhi = hi; - extent_zlo = c2 - radius; - extent_zhi = c2 + radius; - } - if (axis == 'z') { - extent_xlo = c1 - radius; - extent_xhi = c1 + radius; - extent_ylo = c2 - radius; - extent_yhi = c2 + radius; - extent_zlo = lo; - extent_zhi = hi; + if (interior) { + if (dynamic || varshape) { + RegCylinder::bbox_update(); + } else { + bboxflag = 1; + if (axis == 'x') { + extent_xlo = lo; + extent_xhi = hi; + extent_ylo = c1 - radius; + extent_yhi = c1 + radius; + extent_zlo = c2 - radius; + extent_zhi = c2 + radius; + } + if (axis == 'y') { + extent_xlo = c1 - radius; + extent_xhi = c1 + radius; + extent_ylo = lo; + extent_yhi = hi; + extent_zlo = c2 - radius; + extent_zhi = c2 + radius; + } + if (axis == 'z') { + extent_xlo = c1 - radius; + extent_xhi = c1 + radius; + extent_ylo = c2 - radius; + extent_yhi = c2 + radius; + extent_zlo = lo; + extent_zhi = hi; + } } - } else - bboxflag = 0; + } // particle could be close to cylinder surface and 2 ends // particle can only touch surface and 1 end cmax = 3; contact = new Contact[cmax]; - if (interior) - tmax = 2; - else - tmax = 1; + tmax = (interior ? 2 : 1); } /* ---------------------------------------------------------------------- */ @@ -784,6 +792,107 @@ void RegCylinder::shape_update() } } +/* update the boundary information */ + +void RegCylinder::bbox_update() +{ + if (varshape || dynamic) { + double corners[2][4][3], pos[3]; + double xmin, xmax, ymin, ymax, zmin, zmax; + + // define bounding box corners in region internal positions + + if (axis == 'x') { + xmin = lo; + xmax = hi; + ymin = c1 - radius; + ymax = c1 + radius; + zmin = c2 - radius; + zmax = c2 + radius; + } + if (axis == 'y') { + xmin = c1 - radius; + xmax = c1 + radius; + ymin = lo; + ymax = hi; + zmin = c2 - radius; + zmax = c2 + radius; + } + if (axis == 'z') { + xmin = c1 - radius; + xmax = c1 + radius; + ymin = c2 - radius; + ymax = c2 + radius; + zmin = lo; + zmax = hi; + } + + // face[0] + + corners[0][0][0] = xmin; + corners[0][0][1] = ymin; + corners[0][0][2] = zmin; + corners[0][1][0] = xmin; + corners[0][1][1] = ymin; + corners[0][1][2] = zmax; + corners[0][2][0] = xmin; + corners[0][2][1] = ymax; + corners[0][2][2] = zmax; + corners[0][3][0] = xmin; + corners[0][3][1] = ymax; + corners[0][3][2] = zmin; + + // face[1] + + corners[1][0][0] = xmax; + corners[1][0][1] = ymin; + corners[1][0][2] = zmin; + corners[1][1][0] = xmax; + corners[1][1][1] = ymin; + corners[1][1][2] = zmax; + corners[1][2][0] = xmax; + corners[1][2][1] = ymax; + corners[1][2][2] = zmax; + corners[1][3][0] = xmax; + corners[1][3][1] = ymax; + corners[1][3][2] = zmin; + + // the corners of face[0] and face[1] cover the full extent of the region + // transform and get min/max in x-, y-, and z-direction for each corner + + xmin = ymin = zmin = BIG; + xmax = ymax = zmax = -BIG; + + for (int i = 0; i < 4; ++i) { + MathExtra::copy3(corners[0][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + + MathExtra::copy3(corners[1][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + } + + bboxflag = 1; + extent_xlo = xmin; + extent_xhi = xmax; + extent_ylo = ymin; + extent_yhi = ymax; + extent_zlo = zmin; + extent_zhi = zmax; + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ diff --git a/src/region_cylinder.h b/src/region_cylinder.h index bed7dd4b375..ecb135de693 100644 --- a/src/region_cylinder.h +++ b/src/region_cylinder.h @@ -37,6 +37,7 @@ class RegCylinder : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; void set_velocity_shape() override; void velocity_contact_shape(double *, double *) override; diff --git a/src/region_prism.cpp b/src/region_prism.cpp index 91e1291abdc..ba6485d1e5b 100644 --- a/src/region_prism.cpp +++ b/src/region_prism.cpp @@ -23,11 +23,12 @@ #include "math_extra.h" #include "variable.h" +#include #include using namespace LAMMPS_NS; -static constexpr double BIG = 1.0e20; +static constexpr double BIG = 1.0e200; /* ---------------------------------------------------------------------- */ @@ -193,19 +194,22 @@ RegPrism::RegPrism(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg), // extent of prism - if (interior && !dynamic && !varshape) { - bboxflag = 1; - extent_xlo = MIN(xlo, xlo + xy); - extent_xlo = MIN(extent_xlo, extent_xlo + xz); - extent_ylo = MIN(ylo, ylo + yz); - extent_zlo = zlo; - - extent_xhi = MAX(xhi, xhi + xy); - extent_xhi = MAX(extent_xhi, extent_xhi + xz); - extent_yhi = MAX(yhi, yhi + yz); - extent_zhi = zhi; - } else - bboxflag = 0; + if (interior) { + if (dynamic || varshape) { + RegPrism::bbox_update(); + } else { + bboxflag = 1; + extent_xlo = MIN(xlo, xlo + xy); + extent_xlo = MIN(extent_xlo, extent_xlo + xz); + extent_ylo = MIN(ylo, ylo + yz); + extent_zlo = zlo; + + extent_xhi = MAX(xhi, xhi + xy); + extent_xhi = MAX(extent_xhi, extent_xhi + xz); + extent_yhi = MAX(yhi, yhi + yz); + extent_zhi = zhi; + } + } // particle could be close to all 6 planes // particle can only touch 3 planes @@ -547,20 +551,6 @@ void RegPrism::shape_update() if (yz != 0.0 && zlo == -BIG && zhi == BIG) error->all(FLERR, "Illegal region prism non-zero yz tilt with infinite z size"); - // extent of prism - - if (interior) { - extent_xlo = MIN(xlo, xlo + xy); - extent_xlo = MIN(extent_xlo, extent_xlo + xz); - extent_ylo = MIN(ylo, ylo + yz); - extent_zlo = zlo; - - extent_xhi = MAX(xhi, xhi + xy); - extent_xhi = MAX(extent_xhi, extent_xhi + xz); - extent_yhi = MAX(yhi, yhi + yz); - extent_zhi = zhi; - } - // h = transformation matrix from tilt coords (0-1) to box coords (xyz) h[0][0] = xhi - xlo; @@ -633,6 +623,46 @@ void RegPrism::shape_update() for (int i = 0; i < 6; i++) MathExtra::norm3(face[i]); } + +/* update the boundary information based on the corners */ + +void RegPrism::bbox_update() +{ + if (interior) { + if (varshape || dynamic) { + double pos[3]; + double xmin = BIG; + double xmax = -BIG; + double ymin = BIG; + double ymax = -BIG; + double zmin = BIG; + double zmax = -BIG; + + // the min/max of the 8 corners the prism cover the full extent of the region + // transform and get min/max in x-, y-, and z-direction for each corner + + for (int i = 0; i < 8; ++i) { + MathExtra::copy3(corners[i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + } + + bboxflag = 1; + extent_xlo = xmin; + extent_xhi = xmax; + extent_ylo = ymin; + extent_yhi = ymax; + extent_zlo = zmin; + extent_zhi = zmax; + } + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ diff --git a/src/region_prism.h b/src/region_prism.h index 23cb9860040..46233f0ba48 100644 --- a/src/region_prism.h +++ b/src/region_prism.h @@ -37,6 +37,7 @@ class RegPrism : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; private: double xlo, xhi, ylo, yhi, zlo, zhi; diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index 641956d7eff..a1868307d72 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : - Region(lmp, narg, arg), xstr(nullptr), ystr(nullptr), zstr(nullptr), rstr(nullptr) + Region(lmp, narg, arg), xstr(nullptr), ystr(nullptr), zstr(nullptr), rstr(nullptr) { options(narg - 6, &arg[6]); @@ -81,16 +81,19 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : // extent of sphere // for variable radius, uses initial radius and origin for variable center - if (interior && !dynamic && !varshape) { - bboxflag = 1; - extent_xlo = xc - radius; - extent_xhi = xc + radius; - extent_ylo = yc - radius; - extent_yhi = yc + radius; - extent_zlo = zc - radius; - extent_zhi = zc + radius; - } else - bboxflag = 0; + if (interior) { + if (dynamic || varshape) { + RegSphere::bbox_update(); + } else { + bboxflag = 1; + extent_xlo = xc - radius; + extent_xhi = xc + radius; + extent_ylo = yc - radius; + extent_yhi = yc + radius; + extent_zlo = zc - radius; + extent_zhi = zc + radius; + } + } cmax = 1; contact = new Contact[cmax]; @@ -209,6 +212,29 @@ void RegSphere::shape_update() } } +/* update the boundary information */ + +void RegSphere::bbox_update() +{ + if (varshape || dynamic) { + bboxflag = 1; + extent_xlo = xc - radius; + extent_xhi = xc + radius; + extent_ylo = yc - radius; + extent_yhi = yc + radius; + extent_zlo = zc - radius; + extent_zhi = zc + radius; + if (moveflag) { + extent_xlo += dx; + extent_xhi += dx; + extent_ylo += dy; + extent_yhi += dy; + extent_zlo += dz; + extent_zhi += dz; + } + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ diff --git a/src/region_sphere.h b/src/region_sphere.h index de71b076640..701d89cd6b0 100644 --- a/src/region_sphere.h +++ b/src/region_sphere.h @@ -35,6 +35,7 @@ class RegSphere : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; void set_velocity_shape() override; void velocity_contact_shape(double *, double *) override; From 973a3f46b340d221a52a0e4f105aa4af49243eee Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 20:40:58 -0500 Subject: [PATCH 210/604] reformat --- doc/src/region.rst | 49 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/doc/src/region.rst b/doc/src/region.rst index dbe14360ff8..a6dc781561b 100644 --- a/doc/src/region.rst +++ b/doc/src/region.rst @@ -232,16 +232,16 @@ a time dependent position of the sphere or cylinder region. Whenever a region property, such as a coordinate or an upper/lower bound, is defined via an equal-style variable, the variable should - not cause any of the region boundaries to move - too far within a single timestep. Otherwise, bad dynamics will occur. - "Too far" means a small fraction of the approximate distance of - closest approach between two particles, which for the case of Lennard-Jones - particles is the distance of the energy minimum while for granular - particles it is their diameter. An example is a rapidly varying direction - vector in region plane since a small change in the normal to plane will - shift the region surface far away from the region point by a large displacement. - Similarly, bad dynamics can also occur for fast changing variables employed - in the move/rotate options. + not cause any of the region boundaries to move too far within a + single timestep. Otherwise, bad dynamics will occur. "Too far" means + a small fraction of the approximate distance of closest approach + between two particles, which for the case of Lennard-Jones particles + is the distance of the energy minimum while for granular particles it + is their diameter. An example is a rapidly varying direction vector + in region plane since a small change in the normal to plane will + shift the region surface far away from the region point by a large + displacement. Similarly, bad dynamics can also occur for fast + changing variables employed in the move/rotate options. See the :doc:`Howto tricilinc ` page for a geometric description of triclinic boxes, as defined by LAMMPS, and @@ -311,17 +311,18 @@ define the lattice spacings which are used as follows: If the *move* or *rotate* keywords are used, the region is "dynamic", meaning its location or orientation changes with time. These keywords -cannot be used with a *union* or *intersect* style region. Instead, -the keywords should be used to make the individual sub-regions of the -*union* or *intersect* region dynamic. Normally, each sub-region -should be "dynamic" in the same manner (e.g. rotate around the same -point), though this is not a requirement. - -The *move* keyword allows one or more :doc:`equal-style variables ` to be used to specify the x,y,z displacement -of the region, typically as a function of time. A variable is -specified as v_name, where name is the variable name. Any of the -three variables can be specified as NULL, in which case no -displacement is calculated in that dimension. +cannot be used with a *union* or *intersect* style region. Instead, the +keywords should be used to make the individual sub-regions of the +*union* or *intersect* region dynamic. Normally, each sub-region should +be "dynamic" in the same manner (e.g. rotate around the same point), +though this is not a requirement. + +The *move* keyword allows one or more :doc:`equal-style variables +` to be used to specify the x,y,z displacement of the region, +typically as a function of time. A variable is specified as v_name, +where name is the variable name. Any of the three variables can be +specified as NULL, in which case no displacement is calculated in that +dimension. Note that equal-style variables can specify formulas with various mathematical functions, and include :doc:`thermo_style ` @@ -329,7 +330,8 @@ command keywords for the simulation box parameters and timestep and elapsed time. Thus it is easy to specify a region displacement that change as a function of time or spans consecutive runs in a continuous fashion. For the latter, see the *start* and *stop* keywords of the -:doc:`run ` command and the *elaplong* keyword of :doc:`thermo_style custom ` for details. +:doc:`run ` command and the *elaplong* keyword of +:doc:`thermo_style custom ` for details. For example, these commands would displace a region from its initial position, in the positive x direction, effectively at a constant @@ -362,7 +364,8 @@ wrap around the axis in the direction of rotation. The *move* and *rotate* keywords can be used together. In this case, the displacement specified by the *move* keyword is applied to the *P* -point of the *rotate* keyword. +point of the *rotate* keyword which is equivalent to applying the +rotation *first* and then the translation. ---------- From 87341203ae2dbe2bf3d58b99d62faaf6c4cc0b9f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 21:01:53 -0500 Subject: [PATCH 211/604] enable use of thermo keywords elapsed and elaplong in between runs --- src/thermo.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/thermo.cpp b/src/thermo.cpp index fb52da2db62..dcd531e4592 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -1375,15 +1375,21 @@ int Thermo::evaluate_keyword(const std::string &word, double *answer) dvalue = bivalue; } else if (word == "elapsed") { - if (update->whichflag == 0) - error->all(FLERR, "The variable thermo keyword elapsed cannot be used between runs"); - compute_elapsed(); + // if this is before the first run return 0, otherwise the result from last step of last run + if ((update->whichflag == 0) && (update->first_update == 0)) { + bivalue = 0; + } else { + compute_elapsed(); + } dvalue = bivalue; } else if (word == "elaplong") { - if (update->whichflag == 0) - error->all(FLERR, "The variable thermo keyword elaplong cannot be used between runs"); - compute_elapsed_long(); + // if this is before the first run return 0, otherwise the result from last step of last run + if ((update->whichflag == 0) && (update->first_update == 0)) { + bivalue = 0; + } else { + compute_elapsed_long(); + } dvalue = bivalue; } else if (word == "dt") { From 7b3b3fc0b362d48d9f7db51af8d36199208da17b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 8 Nov 2025 22:58:39 -0500 Subject: [PATCH 212/604] correct pointer variable declaration --- src/RHEO/fix_rheo_thermal.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/RHEO/fix_rheo_thermal.cpp b/src/RHEO/fix_rheo_thermal.cpp index 8ca4321e91a..61f4ade90ab 100644 --- a/src/RHEO/fix_rheo_thermal.cpp +++ b/src/RHEO/fix_rheo_thermal.cpp @@ -360,8 +360,8 @@ void FixRHEOThermal::post_integrate() int *type = atom->type; double imass; - double rmass = atom->rmass; - double mass = atom->mass; + double *rmass = atom->rmass; + double *mass = atom->mass; int n_melt = 0; int n_freeze = 0; @@ -476,8 +476,8 @@ void FixRHEOThermal::pre_force(int /*vflag*/) int *type = atom->type; double imass; - double rmass = atom->rmass; - double mass = atom->mass; + double *rmass = atom->rmass; + double *mass = atom->mass; int nall = atom->nlocal + atom->nghost; From ec1dc46db6dc85d1b7d680dde35f0bef0a75172b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 9 Nov 2025 10:37:16 -0500 Subject: [PATCH 213/604] fixes for some very old documentation bugs from Cecilia --- doc/src/Howto_chunk.rst | 2 +- doc/src/fix_ave_histo.rst | 2 +- doc/src/pair_hybrid.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/Howto_chunk.rst b/doc/src/Howto_chunk.rst index ea000eb22ff..d1ee7621dc3 100644 --- a/doc/src/Howto_chunk.rst +++ b/doc/src/Howto_chunk.rst @@ -96,7 +96,7 @@ category: * :doc:`compute msd/chunk ` * :doc:`compute property/chunk ` * :doc:`compute temp/chunk ` -* :doc:`compute torque/chunk ` +* :doc:`compute torque/chunk ` * :doc:`compute vcm/chunk ` They each take the ID of a :doc:`compute chunk/atom diff --git a/doc/src/fix_ave_histo.rst b/doc/src/fix_ave_histo.rst index 0fa4b02e588..23775c1f51b 100644 --- a/doc/src/fix_ave_histo.rst +++ b/doc/src/fix_ave_histo.rst @@ -281,7 +281,7 @@ assigned to the lower of the two bins. If *beyond* is set to *ignore* then values :math:`<` *lo* and values :math:`>` *hi* are ignored (i.e., they are not binned). If *beyond* is set to *end*, then values :math:`<` *lo* are counted in the first bin and values :math:`>` *hi* are counted in the last bin. -If *beyond* is set to *extend*, then two extra bins are created so that there +If *beyond* is set to *extra*, then two extra bins are created so that there are :math:`N_\text{bins}+2` total bins. Values :math:`<` *lo* are counted in the first bin and values :math:`>` *hi* are counted in the last bin :math:`(N_\text{bins}+2)`\ . Values between diff --git a/doc/src/pair_hybrid.rst b/doc/src/pair_hybrid.rst index 8d5d03b7e36..a765ba24f70 100644 --- a/doc/src/pair_hybrid.rst +++ b/doc/src/pair_hybrid.rst @@ -384,7 +384,7 @@ This input achieves the same effect: .. code-block:: LAMMPS - special_bonds 0.0 0.0 0.1 + special_bonds lj/coul 0.0 0.0 0.1 pair_style hybrid lj/charmm/coul/long 8.0 10.0 lj/cut/coul/long 10.0 pair_modify pair lj/cut/coul/long special lj 0.0 0.0 0.5 pair_modify pair lj/cut/coul/long special coul 0.0 0.0 0.83333333 From bf5918f92da1174928eda3cc5c88fd49c624a4e0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 9 Nov 2025 10:58:53 -0500 Subject: [PATCH 214/604] must not throw execeptions from within multi-threaded regions --- src/OPENMP/npair_bin_ghost_omp.cpp | 13 +++++++++++-- src/OPENMP/npair_bin_omp.cpp | 13 +++++++++++-- src/OPENMP/npair_halffull_omp.cpp | 14 +++++++++++--- src/OPENMP/npair_multi_omp.cpp | 13 +++++++++++-- src/OPENMP/npair_nsq_ghost_omp.cpp | 14 +++++++++++--- src/OPENMP/npair_nsq_omp.cpp | 13 +++++++++++-- src/OPENMP/npair_respa_bin_omp.cpp | 25 +++++++++++++++++++------ src/OPENMP/npair_respa_nsq_omp.cpp | 25 +++++++++++++++++++------ src/OPENMP/npair_trim_omp.cpp | 13 +++++++++++-- 9 files changed, 115 insertions(+), 28 deletions(-) diff --git a/src/OPENMP/npair_bin_ghost_omp.cpp b/src/OPENMP/npair_bin_ghost_omp.cpp index 7d5ae62352b..05e4e9a1283 100644 --- a/src/OPENMP/npair_bin_ghost_omp.cpp +++ b/src/OPENMP/npair_bin_ghost_omp.cpp @@ -52,10 +52,11 @@ void NPairBinGhostOmp::build(NeighList *list) const int nall = nlocal + atom->nghost; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nall); @@ -185,9 +186,17 @@ void NPairBinGhostOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->gnum = nall - nlocal; } diff --git a/src/OPENMP/npair_bin_omp.cpp b/src/OPENMP/npair_bin_omp.cpp index abc2aafed01..86befa38b0c 100644 --- a/src/OPENMP/npair_bin_omp.cpp +++ b/src/OPENMP/npair_bin_omp.cpp @@ -56,10 +56,11 @@ void NPairBinOmp::build(NeighList *list) const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nlocal); @@ -252,9 +253,17 @@ void NPairBinOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; if (!HALF) list->gnum = 0; } diff --git a/src/OPENMP/npair_halffull_omp.cpp b/src/OPENMP/npair_halffull_omp.cpp index 4a186239327..44c354c0497 100644 --- a/src/OPENMP/npair_halffull_omp.cpp +++ b/src/OPENMP/npair_halffull_omp.cpp @@ -53,11 +53,11 @@ void NPairHalffullOmp::build(NeighList *list) { const int inum_full = list->listfull->inum; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; - #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(inum_full); @@ -149,9 +149,17 @@ void NPairHalffullOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = inum_full; } diff --git a/src/OPENMP/npair_multi_omp.cpp b/src/OPENMP/npair_multi_omp.cpp index a3f36f60e00..04e12ebefb9 100644 --- a/src/OPENMP/npair_multi_omp.cpp +++ b/src/OPENMP/npair_multi_omp.cpp @@ -58,10 +58,11 @@ void NPairMultiOmp::build(NeighList *list) const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nlocal); @@ -279,9 +280,17 @@ void NPairMultiOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->gnum = 0; } diff --git a/src/OPENMP/npair_nsq_ghost_omp.cpp b/src/OPENMP/npair_nsq_ghost_omp.cpp index 5cb7d49233e..70b2fdd8c70 100644 --- a/src/OPENMP/npair_nsq_ghost_omp.cpp +++ b/src/OPENMP/npair_nsq_ghost_omp.cpp @@ -12,7 +12,6 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ - #include "npair_nsq_ghost_omp.h" #include "npair_omp.h" #include "omp_compat.h" @@ -52,10 +51,11 @@ void NPairNsqGhostOmp::build(NeighList *list) const int nall = nlocal + atom->nghost; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nall); @@ -172,9 +172,17 @@ void NPairNsqGhostOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->gnum = nall - nlocal; } diff --git a/src/OPENMP/npair_nsq_omp.cpp b/src/OPENMP/npair_nsq_omp.cpp index 3319c2ef2ad..8b13509d756 100644 --- a/src/OPENMP/npair_nsq_omp.cpp +++ b/src/OPENMP/npair_nsq_omp.cpp @@ -65,10 +65,11 @@ void NPairNsqOmp::build(NeighList *list) const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nlocal); @@ -221,9 +222,17 @@ void NPairNsqOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->gnum = 0; } diff --git a/src/OPENMP/npair_respa_bin_omp.cpp b/src/OPENMP/npair_respa_bin_omp.cpp index ca15ebf6b07..cee10493275 100644 --- a/src/OPENMP/npair_respa_bin_omp.cpp +++ b/src/OPENMP/npair_respa_bin_omp.cpp @@ -55,13 +55,14 @@ void NPairRespaBinOmp::build(NeighList *list) const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nlocal); @@ -244,23 +245,35 @@ void NPairRespaBinOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); - + if (ipage.status()) { + overflow = 1; + break; + } ilist_inner[i] = i; firstneigh_inner[i] = neighptr_inner; numneigh_inner[i] = n_inner; ipage.vgot(n_inner); - if (ipage_inner.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); - + if (ipage_inner.status()) { + overflow = 1; + break; + } if (respamiddle) { ilist_middle[i] = i; firstneigh_middle[i] = neighptr_middle; numneigh_middle[i] = n_middle; ipage_middle->vgot(n_middle); - if (ipage_middle->status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage_middle->status()) { + overflow = 1; + break; + } } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->inum_inner = nlocal; if (respamiddle) list->inum_middle = nlocal; diff --git a/src/OPENMP/npair_respa_nsq_omp.cpp b/src/OPENMP/npair_respa_nsq_omp.cpp index 862f70f2b3c..07983cdcd89 100644 --- a/src/OPENMP/npair_respa_nsq_omp.cpp +++ b/src/OPENMP/npair_respa_nsq_omp.cpp @@ -64,13 +64,14 @@ void NPairRespaNsqOmp::build(NeighList *list) const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const double delta = 0.01 * force->angstrom; + int overflow = 0; NPAIR_OMP_INIT; const int respamiddle = list->respamiddle; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(nlocal); @@ -224,23 +225,35 @@ void NPairRespaNsqOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); - + if (ipage.status()) { + overflow = 1; + break; + } ilist_inner[i] = i; firstneigh_inner[i] = neighptr_inner; numneigh_inner[i] = n_inner; ipage.vgot(n_inner); - if (ipage_inner.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); - + if (ipage_inner.status()) { + overflow = 1; + break; + } if (respamiddle) { ilist_middle[i] = i; firstneigh_middle[i] = neighptr_middle; numneigh_middle[i] = n_middle; ipage_middle->vgot(n_middle); - if (ipage_middle->status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage_middle->status()) { + overflow = 1; + break; + } } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = nlocal; list->inum_inner = nlocal; if (respamiddle) list->inum_middle = nlocal; diff --git a/src/OPENMP/npair_trim_omp.cpp b/src/OPENMP/npair_trim_omp.cpp index a69eb827c6f..f004a508ad6 100644 --- a/src/OPENMP/npair_trim_omp.cpp +++ b/src/OPENMP/npair_trim_omp.cpp @@ -34,11 +34,12 @@ NPairTrimOmp::NPairTrimOmp(LAMMPS *lmp) : NPair(lmp) {} void NPairTrimOmp::build(NeighList *list) { const int inum_copy = list->listcopy->inum; + int overflow = 0; NPAIR_OMP_INIT; #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) +#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list) reduction(+:overflow) #endif NPAIR_OMP_SETUP(inum_copy); @@ -98,8 +99,16 @@ void NPairTrimOmp::build(NeighList *list) firstneigh[i] = neighptr; numneigh[i] = n; ipage.vgot(n); - if (ipage.status()) error->one(FLERR, Error::NOLASTLINE, "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipage.status()) { + overflow = 1; + break; + } } NPAIR_OMP_CLOSE; + + if (overflow > 0) + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list overflow, boost neigh_modify one" + utils::errorurl(36)); + list->inum = inum_copy; } From 6d2dedf60190dafc49fad57e3dd1cd4659f45f7a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 9 Nov 2025 15:19:20 -0500 Subject: [PATCH 215/604] improve error messages for some OPENMP package styles --- src/OPENMP/angle_dipole_omp.cpp | 2 +- src/OPENMP/angle_lepton_omp.cpp | 2 +- src/OPENMP/bond_lepton_omp.cpp | 2 +- src/OPENMP/dihedral_lepton_omp.cpp | 2 +- src/OPENMP/domain_omp.cpp | 3 ++- src/OPENMP/fix_nh_asphere_omp.cpp | 5 +++-- src/OPENMP/fix_nh_omp.cpp | 4 ++-- src/OPENMP/fix_nh_sphere_omp.cpp | 6 ++++-- src/OPENMP/fix_nph_asphere_omp.cpp | 6 ++++-- src/OPENMP/fix_omp.cpp | 13 +++++++------ src/OPENMP/fix_qeq_comb_omp.cpp | 2 +- src/OPENMP/fix_qeq_reaxff_omp.cpp | 3 ++- src/OPENMP/pair_lj_spica_coul_msm_omp.cpp | 4 ++-- src/OPENMP/pair_reaxff_omp.cpp | 19 +++++++++++-------- src/OPENMP/pair_rebo_omp.cpp | 3 ++- src/OPENMP/pair_rebomos_omp.cpp | 13 ++++++++++--- src/OPENMP/pair_sw_mod_omp.cpp | 7 ++++--- src/OPENMP/pair_tersoff_zbl_omp.cpp | 2 +- src/OPENMP/pppm_disp_omp.cpp | 6 ++++-- src/OPENMP/pppm_disp_tip4p_omp.cpp | 3 ++- src/OPENMP/pppm_tip4p_omp.cpp | 6 ++++-- src/OPENMP/respa_omp.cpp | 2 +- src/OPENMP/thr_omp.cpp | 3 ++- 23 files changed, 72 insertions(+), 46 deletions(-) diff --git a/src/OPENMP/angle_dipole_omp.cpp b/src/OPENMP/angle_dipole_omp.cpp index 3c771df69f7..a3f1971d722 100644 --- a/src/OPENMP/angle_dipole_omp.cpp +++ b/src/OPENMP/angle_dipole_omp.cpp @@ -43,7 +43,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag) ev_init(eflag,vflag); if (!force->newton_bond) - error->all(FLERR,"'newton' flag for bonded interactions must be 'on'"); + error->all(FLERR, Error::NOLASTLINE, "'newton' flag for bonded interactions must be 'on'"); const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp index 918fb578714..6680de239c3 100644 --- a/src/OPENMP/angle_lepton_omp.cpp +++ b/src/OPENMP/angle_lepton_omp.cpp @@ -108,7 +108,7 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { - error->all(FLERR, e.what()); + error->all(FLERR, Error::NOLASTLINE, e.what()); } const auto *_noalias const x = (dbl3_t *) atom->x[0]; diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp index 995e2fac094..c72a27290e9 100644 --- a/src/OPENMP/bond_lepton_omp.cpp +++ b/src/OPENMP/bond_lepton_omp.cpp @@ -105,7 +105,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { - error->all(FLERR, e.what()); + error->all(FLERR, Error::NOLASTLINE, e.what()); } const auto *_noalias const x = (dbl3_t *) atom->x[0]; diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp index 206749fcfa3..40d0dabcd5d 100644 --- a/src/OPENMP/dihedral_lepton_omp.cpp +++ b/src/OPENMP/dihedral_lepton_omp.cpp @@ -110,7 +110,7 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr) if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression()); } } catch (std::exception &e) { - error->all(FLERR, e.what()); + error->all(FLERR, Error::NOLASTLINE, e.what()); } const double *const *const x = atom->x; diff --git a/src/OPENMP/domain_omp.cpp b/src/OPENMP/domain_omp.cpp index 7f692e1dc8b..4c6ee8f17df 100644 --- a/src/OPENMP/domain_omp.cpp +++ b/src/OPENMP/domain_omp.cpp @@ -55,7 +55,8 @@ void DomainOMP::pbc() #endif // clang-format on for (int i = 0; i < n3; i++) if (!std::isfinite(coord[i])) flag = 1; - if (flag) error->one(FLERR, "Non-numeric atom coords - simulation unstable" + utils::errorurl(6)); + if (flag) error->one(FLERR, Error::NOLASTLINE, + "Non-numeric atom coords - simulation unstable" + utils::errorurl(6)); auto *_noalias const x = (dbl3_t *) atom->x[0]; auto *_noalias const v = (dbl3_t *) atom->v[0]; diff --git a/src/OPENMP/fix_nh_asphere_omp.cpp b/src/OPENMP/fix_nh_asphere_omp.cpp index 3fa339cff43..35a42d2119d 100644 --- a/src/OPENMP/fix_nh_asphere_omp.cpp +++ b/src/OPENMP/fix_nh_asphere_omp.cpp @@ -45,7 +45,7 @@ void FixNHAsphereOMP::init() { avec = dynamic_cast(atom->style_match("ellipsoid")); if (!avec) - error->all(FLERR,"Compute nvt/nph/npt asphere requires atom style ellipsoid"); + error->all(FLERR, Error::NOLASTLINE, "Fix {} requires atom style ellipsoid", style); // check that all particles are finite-size // no point particles allowed, spherical is OK @@ -57,7 +57,8 @@ void FixNHAsphereOMP::init() for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) if (ellipsoid[i] < 0) - error->one(FLERR,"Fix nvt/nph/npt asphere requires extended particles"); + error->one(FLERR, Error::NOLASTLINE, + "Fix {} requires to use *only* extended particles", style); FixNHOMP::init(); } diff --git a/src/OPENMP/fix_nh_omp.cpp b/src/OPENMP/fix_nh_omp.cpp index 544c1919e3b..64b70f131a8 100644 --- a/src/OPENMP/fix_nh_omp.cpp +++ b/src/OPENMP/fix_nh_omp.cpp @@ -197,8 +197,8 @@ void FixNHOMP::remap() domain->xz > TILTMAX*domain->xprd || domain->xy < -TILTMAX*domain->xprd || domain->xy > TILTMAX*domain->xprd) - error->all(FLERR,"Fix npt/nph has tilted box too far in one step - " - "periodic cell is too far from equilibrium state"); + error->all(FLERR, Error::NOLASTLINE, "Fix {} has tilted box too far in one step - " + "periodic cell is too far from equilibrium state", style); domain->set_global_box(); domain->set_local_box(); diff --git a/src/OPENMP/fix_nh_sphere_omp.cpp b/src/OPENMP/fix_nh_sphere_omp.cpp index 5b1a2d8bbe0..321c169befb 100644 --- a/src/OPENMP/fix_nh_sphere_omp.cpp +++ b/src/OPENMP/fix_nh_sphere_omp.cpp @@ -37,8 +37,10 @@ using dbl3_t = struct { double x,y,z; }; FixNHSphereOMP::FixNHSphereOMP(LAMMPS *lmp, int narg, char **arg) : FixNHOMP(lmp, narg, arg) { - if (!atom->omega_flag) error->all(FLERR,"Fix {} requires atom attribute omega", style); - if (!atom->radius_flag) error->all(FLERR,"Fix {} requires atom attribute radius", style); + if (!atom->omega_flag) + error->all(FLERR, Error::NOLASTLINE, "Fix {} requires atom attribute omega", style); + if (!atom->radius_flag) + error->all(FLERR, Error::NOLASTLINE, "Fix {} requires atom attribute radius", style); } /* ---------------------------------------------------------------------- */ diff --git a/src/OPENMP/fix_nph_asphere_omp.cpp b/src/OPENMP/fix_nph_asphere_omp.cpp index f66083ddb84..396d0c8dbed 100644 --- a/src/OPENMP/fix_nph_asphere_omp.cpp +++ b/src/OPENMP/fix_nph_asphere_omp.cpp @@ -24,8 +24,10 @@ using namespace FixConst; FixNPHAsphereOMP::FixNPHAsphereOMP(LAMMPS *lmp, int narg, char **arg) : FixNHAsphereOMP(lmp, narg, arg) { - if (tstat_flag) error->all(FLERR, "Temperature control can not be used with fix nph/asphere/tmp"); - if (!pstat_flag) error->all(FLERR, "Pressure control must be used with fix nph/asphere/omp"); + if (tstat_flag) + error->all(FLERR, Error::NOLASTLINE, "Temperature control cannot be used with fix {}", style); + if (!pstat_flag) + error->all(FLERR, Error::NOLASTLINE, "Pressure control must be used with fix {}", style); // create a new compute temp style // id = fix-ID + temp diff --git a/src/OPENMP/fix_omp.cpp b/src/OPENMP/fix_omp.cpp index 88f8acca510..8327689759d 100644 --- a/src/OPENMP/fix_omp.cpp +++ b/src/OPENMP/fix_omp.cpp @@ -65,7 +65,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) _nthr(-1), _neighbor(true), _mixed(false), _reduced(true), _pair_compute_flag(false), _kspace_compute_flag(false) { - if (narg < 4) error->all(FLERR,"Illegal package omp command"); + if (narg < 4) utils::missing_cmd_args(FLERR, "package omp", error); int nthreads = 1; if (narg > 3) { @@ -80,7 +80,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) #if defined(_OPENMP) if (nthreads < 1) - error->all(FLERR,"Illegal number of OpenMP threads requested"); + error->all(FLERR, 3 - 2, "Illegal number of OpenMP threads requested"); int reset_thr = 0; #endif @@ -97,10 +97,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package omp command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "package omp neigh", error); _neighbor = utils::logical(FLERR,arg[iarg+1],false,lmp) != 0; iarg += 2; - } else error->all(FLERR,"Illegal package omp command"); + } else error->all(FLERR, iarg - 2, "Unknown package omp keyword {}", arg[iarg]); } // print summary of settings @@ -161,7 +161,8 @@ void FixOMP::init() { // OPENMP package cannot be used with atom_style template if (atom->molecular == Atom::TEMPLATE) - error->all(FLERR,"OPENMP package does not (yet) work with atom_style template"); + error->all(FLERR, Error::NOLASTLINE, + "OPENMP package does not (yet) work with atom_style template"); // adjust number of data objects when the number of OpenMP // threads has been changed somehow @@ -198,7 +199,7 @@ void FixOMP::init() if (utils::strmatch(update->integrate_style,"^respa") && !utils::strmatch(update->integrate_style,"^respa/omp")) - error->all(FLERR,"Must use respa/omp for r-RESPA with /omp styles"); + error->all(FLERR, Error::NOLASTLINE, "Must use respa/omp for r-RESPA with /omp styles"); _pair_compute_flag = force->pair && force->pair->compute_flag; _kspace_compute_flag = force->kspace && force->kspace->compute_flag; diff --git a/src/OPENMP/fix_qeq_comb_omp.cpp b/src/OPENMP/fix_qeq_comb_omp.cpp index 8d5eb284cc2..76eb07a3340 100644 --- a/src/OPENMP/fix_qeq_comb_omp.cpp +++ b/src/OPENMP/fix_qeq_comb_omp.cpp @@ -37,7 +37,7 @@ using namespace FixConst; FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) : FixQEQComb(lmp, narg, arg) { - if (narg < 5) error->all(FLERR, "Illegal fix qeq/comb/omp command"); + if (narg < 5) utils::missing_cmd_args(FLERR, "fix qeq/comb/omp", error); } /* ---------------------------------------------------------------------- */ diff --git a/src/OPENMP/fix_qeq_reaxff_omp.cpp b/src/OPENMP/fix_qeq_reaxff_omp.cpp index c6d92dea6dc..92659d99b7b 100644 --- a/src/OPENMP/fix_qeq_reaxff_omp.cpp +++ b/src/OPENMP/fix_qeq_reaxff_omp.cpp @@ -224,7 +224,8 @@ void FixQEqReaxFFOMP::compute_H() } // omp if (m_fill >= H.m) - error->all(FLERR,"Fix qeq/reaxff: H matrix size has been exceeded: m_fill={} H.m={}\n", + error->all(FLERR, Error::NOLASTLINE, + "Fix qeq/reaxff: H matrix size has been exceeded: m_fill={} H.m={}\n", m_fill, H.m); } diff --git a/src/OPENMP/pair_lj_spica_coul_msm_omp.cpp b/src/OPENMP/pair_lj_spica_coul_msm_omp.cpp index 098e3bcbac3..6f1fc88d88e 100644 --- a/src/OPENMP/pair_lj_spica_coul_msm_omp.cpp +++ b/src/OPENMP/pair_lj_spica_coul_msm_omp.cpp @@ -44,8 +44,8 @@ PairLJSPICACoulMSMOMP::PairLJSPICACoulMSMOMP(LAMMPS *lmp) : void PairLJSPICACoulMSMOMP::compute(int eflag, int vflag) { if (force->kspace->scalar_pressure_flag) - error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' " - "with OMP MSM Pair styles"); + error->all(FLERR, Error::NOLASTLINE, + "Must use 'kspace_modify pressure/scalar no' with OMP MSM Pair styles"); ev_init(eflag,vflag); diff --git a/src/OPENMP/pair_reaxff_omp.cpp b/src/OPENMP/pair_reaxff_omp.cpp index 3a84719105f..537ba412bc3 100644 --- a/src/OPENMP/pair_reaxff_omp.cpp +++ b/src/OPENMP/pair_reaxff_omp.cpp @@ -102,7 +102,9 @@ PairReaxFFOMP::~PairReaxFFOMP() void PairReaxFFOMP::init_style() { - if (!atom->q_flag) error->all(FLERR,"Pair style reaxff/omp requires atom attribute q"); + if (!atom->q_flag) + error->all(FLERR, Error::NOLASTLINE, + "Pair style reaxff/omp requires atom attribute q"); auto acks2_fixes = modify->get_fix_by_style("^acks2/reax"); int have_qeq = modify->get_fix_by_style("^qeq/reax").size() @@ -112,22 +114,23 @@ void PairReaxFFOMP::init_style() if (qeqflag && (have_qeq != 1)) - error->all(FLERR,"Pair style reaxff requires use of exactly one of the " + error->all(FLERR, Error::NOLASTLINE, + "Pair style reaxff requires use of exactly one of the " "fix qeq/reaxff or fix qeq/shielded or fix acks2/reaxff or " "fix qtpie/reaxff or fix qeq/rel/reaxff commands"); api->system->acks2_flag = acks2_fixes.size(); if (api->system->acks2_flag) - error->all(FLERR,"Cannot (yet) use ACKS2 with OPENMP ReaxFF"); + error->all(FLERR, Error::NOLASTLINE, "Cannot (yet) use ACKS2 with OPENMP ReaxFF"); api->system->n = atom->nlocal; // my atoms api->system->N = atom->nlocal + atom->nghost; // mine + ghosts api->system->wsize = comm->nprocs; if (atom->tag_enable == 0) - error->all(FLERR,"Pair style reaxff/omp requires atom IDs"); + error->all(FLERR, Error::NOLASTLINE, "Pair style reaxff/omp requires atom IDs"); if (force->newton_pair == 0) - error->all(FLERR,"Pair style reaxff/omp requires newton pair on"); + error->all(FLERR, Error::NOLASTLINE, "Pair style reaxff/omp requires newton pair on"); // need a half neighbor list w/ Newton off and ghost neighbors // built whenever re-neighboring occurs @@ -136,7 +139,7 @@ void PairReaxFFOMP::init_style() cutmax = MAX3(api->control->nonb_cut, api->control->hbond_cut, api->control->bond_cut); if ((cutmax < 2.0*api->control->bond_cut) && (comm->me == 0)) - error->warning(FLERR,"Total cutoff < 2*bond cutoff. May need to use an " + error->warning(FLERR, "Total cutoff < 2*bond cutoff. May need to use an " "increased neighbor list skin."); if (fix_reaxff == nullptr) @@ -184,7 +187,7 @@ void PairReaxFFOMP::setup() int num_nbrs = estimate_reax_lists(); if (num_nbrs < 0) - error->all(FLERR,"Too many neighbors for pair style reaxff"); + error->all(FLERR, Error::NOLASTLINE, "Too many neighbors for pair style reaxff/omp"); Make_List(api->system->total_cap,num_nbrs,TYP_FAR_NEIGHBOR,api->lists+FAR_NBRS); (api->lists+FAR_NBRS)->error_ptr=error; @@ -355,7 +358,7 @@ void PairReaxFFOMP::write_reax_atoms() int *num_hbonds = fix_reaxff->num_hbonds; if (api->system->N > api->system->total_cap) - error->all(FLERR,"Too many ghost atoms"); + error->all(FLERR, Error::NOLASTLINE, "Too many ghost atoms in pair style reaxff/omp"); #if defined(_OPENMP) #pragma omp parallel for schedule(static) default(shared) diff --git a/src/OPENMP/pair_rebo_omp.cpp b/src/OPENMP/pair_rebo_omp.cpp index 6767ea166fc..f4c4eb053a3 100644 --- a/src/OPENMP/pair_rebo_omp.cpp +++ b/src/OPENMP/pair_rebo_omp.cpp @@ -30,7 +30,8 @@ PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) void PairREBOOMP::settings(int narg, char ** /* arg */) { - if (narg != 0) error->all(FLERR, "Illegal pair_style command"); + if (narg != 0) error->all(FLERR, Error::NOPOINTER, + "Pair style rebo/omp command does not use any keywords"); cutlj = 0.0; ljflag = torflag = 0; diff --git a/src/OPENMP/pair_rebomos_omp.cpp b/src/OPENMP/pair_rebomos_omp.cpp index 00dadde13e7..989b7dbd2f4 100644 --- a/src/OPENMP/pair_rebomos_omp.cpp +++ b/src/OPENMP/pair_rebomos_omp.cpp @@ -116,8 +116,10 @@ void PairREBOMoSOMP::REBO_neigh_thr() memory->create(nS,maxlocal,"REBOMoS:nS"); } + int overflow = 0; + #if defined(_OPENMP) -#pragma omp parallel LMP_DEFAULT_NONE +#pragma omp parallel LMP_DEFAULT_NONE reduction(+:overflow) #endif { int i,j,ii,jj,n,jnum,itype,jtype; @@ -185,10 +187,15 @@ void PairREBOMoSOMP::REBO_neigh_thr() REBO_firstneigh[i] = neighptr; REBO_numneigh[i] = n; ipg.vgot(n); - if (ipg.status()) - error->one(FLERR, Error::NOLASTLINE, "REBO list overflow, boost neigh_modify one" + utils::errorurl(36)); + if (ipg.status()) { + overflow = 1; + break; + } } } + if (overflow) + error->one(FLERR, Error::NOLASTLINE, + "REBO list overflow, boost neigh_modify one" + utils::errorurl(36)); } /* ---------------------------------------------------------------------- diff --git a/src/OPENMP/pair_sw_mod_omp.cpp b/src/OPENMP/pair_sw_mod_omp.cpp index 3a620a16ae5..3f547f3ca5b 100644 --- a/src/OPENMP/pair_sw_mod_omp.cpp +++ b/src/OPENMP/pair_sw_mod_omp.cpp @@ -43,13 +43,14 @@ void PairSWMODOMP::settings(int narg, char **arg) while (iarg < narg) { if (strcmp(arg[iarg],"maxdelcs") == 0) { - if (iarg+3 > narg) error->all(FLERR,"Illegal pair_style command"); + if (iarg+3 > narg) utils::missing_cmd_args(FLERR,"pair_style sw/mod/omp", error); delta1 = utils::numeric(FLERR,arg[iarg+1],false,Pointers::lmp); delta2 = utils::numeric(FLERR,arg[iarg+2],false,Pointers::lmp); iarg += 3; if ((delta1 < 0.0) || (delta1 > 1.0) || (delta2 < 0.0) || (delta2 > 1.0) || (delta1 > delta2)) - error->all(FLERR,"Illegal values for maxdelcs keyword"); - } else error->all(FLERR,"Illegal pair_style command"); + error->all(FLERR, Error::NOPOINTER, "Illegal values for maxdelcs keyword"); + } else error->all(FLERR, Error::NOPOINTER, + "Unkown pair_style sw/mod/omp keyword {}", arg[iarg]); } PairSWOMP::settings(narg-iarg,arg+iarg); } diff --git a/src/OPENMP/pair_tersoff_zbl_omp.cpp b/src/OPENMP/pair_tersoff_zbl_omp.cpp index 524d7fe509f..8103c15528a 100644 --- a/src/OPENMP/pair_tersoff_zbl_omp.cpp +++ b/src/OPENMP/pair_tersoff_zbl_omp.cpp @@ -72,7 +72,7 @@ PairTersoffZBLOMP::PairTersoffZBLOMP(LAMMPS *lmp) : PairTersoffOMP(lmp) global_a_0 = 0.529; global_epsilon_0 = 0.00552635 * 0.043365121; global_e = 1.0; - } else error->all(FLERR,"Pair tersoff/zbl requires metal or real units"); + } else error->all(FLERR, Error::NOLASTLINE, "Pair tersoff/zbl/omp requires metal or real units"); } /* ---------------------------------------------------------------------- */ diff --git a/src/OPENMP/pppm_disp_omp.cpp b/src/OPENMP/pppm_disp_omp.cpp index e7f8d1c360f..25d4233653b 100644 --- a/src/OPENMP/pppm_disp_omp.cpp +++ b/src/OPENMP/pppm_disp_omp.cpp @@ -358,7 +358,8 @@ void PPPMDispOMP::particle_map(double dxinv, double dyinv, const int nzhi_out = nzhi_o; if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2])) - error->one(FLERR,"Non-numeric box dimensions. Simulation unstable."+utils::errorurl(6)); + error->one(FLERR, Error::NOLASTLINE, + "Non-numeric box dimensions. Simulation unstable."+utils::errorurl(6)); int flag = 0; #if defined(_OPENMP) @@ -388,7 +389,8 @@ void PPPMDispOMP::particle_map(double dxinv, double dyinv, int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR, Error::NOLASTLINE, "Out of range atoms - cannot compute PPPM" + utils::errorurl(4)); + if (flag_all) error->all(FLERR, Error::NOLASTLINE, + "Out of range atoms - cannot compute PPPM" + utils::errorurl(4)); } /* ---------------------------------------------------------------------- diff --git a/src/OPENMP/pppm_disp_tip4p_omp.cpp b/src/OPENMP/pppm_disp_tip4p_omp.cpp index 7cadbbb2acb..8a22625655c 100644 --- a/src/OPENMP/pppm_disp_tip4p_omp.cpp +++ b/src/OPENMP/pppm_disp_tip4p_omp.cpp @@ -354,7 +354,8 @@ void PPPMDispTIP4POMP::particle_map_c(double dxinv, double dyinv, const int nzhi_out = nzhi_o; if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2])) - error->one(FLERR,"Non-numeric box dimensions - simulation unstable" + utils::errorurl(6)); + error->one(FLERR, Error::NOLASTLINE, + "Non-numeric box dimensions - simulation unstable" + utils::errorurl(6)); int flag = 0; #if defined(_OPENMP) diff --git a/src/OPENMP/pppm_tip4p_omp.cpp b/src/OPENMP/pppm_tip4p_omp.cpp index e2cf98b0d2a..57da057118b 100644 --- a/src/OPENMP/pppm_tip4p_omp.cpp +++ b/src/OPENMP/pppm_tip4p_omp.cpp @@ -350,7 +350,8 @@ void PPPMTIP4POMP::particle_map() const int nlocal = atom->nlocal; if (!std::isfinite(boxlox) || !std::isfinite(boxloy) || !std::isfinite(boxloz)) - error->one(FLERR,"Non-numeric box dimensions - simulation unstable" + utils::errorurl(6)); + error->one(FLERR, Error::NOLASTLINE, + "Non-numeric box dimensions - simulation unstable" + utils::errorurl(6)); int flag = 0; #if defined(_OPENMP) @@ -388,7 +389,8 @@ void PPPMTIP4POMP::particle_map() int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - if (flag_all) error->all(FLERR, Error::NOLASTLINE, "Out of range atoms - cannot compute PPPM" + utils::errorurl(4)); + if (flag_all) error->all(FLERR, Error::NOLASTLINE, + "Out of range atoms - cannot compute PPPM" + utils::errorurl(4)); } /* ---------------------------------------------------------------------- diff --git a/src/OPENMP/respa_omp.cpp b/src/OPENMP/respa_omp.cpp index 254b6671e72..b9c77704ab1 100644 --- a/src/OPENMP/respa_omp.cpp +++ b/src/OPENMP/respa_omp.cpp @@ -57,7 +57,7 @@ void RespaOMP::init() Respa::init(); if (atom->torque) - error->all(FLERR,"Extended particles are not supported by respa/omp\n"); + error->all(FLERR, Error::NOLASTLINE, "Extended particles are not supported by respa/omp"); } /* ---------------------------------------------------------------------- diff --git a/src/OPENMP/thr_omp.cpp b/src/OPENMP/thr_omp.cpp index 482ad501752..b42be1732f6 100644 --- a/src/OPENMP/thr_omp.cpp +++ b/src/OPENMP/thr_omp.cpp @@ -47,7 +47,8 @@ ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(nullptr), thr_style(style { // register fix omp with this class fix = static_cast(lmp->modify->get_fix_by_id("package_omp")); - if (!fix) lmp->error->all(FLERR, "The 'package omp' command is required for /omp styles"); + if (!fix) lmp->error->all(FLERR, Error::NOLASTLINE, + "The 'package omp' command is required for /omp styles"); #if defined(_OPENMP) omp_set_num_threads(lmp->comm->nthreads); #endif From f9dfc056d1e1c2569154d013bec7e61a3e24f79e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 11:05:49 -0700 Subject: [PATCH 216/604] Fix more issues --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_atomic_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_bond_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_charge_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_dipole_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_dpd_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_full_kokkos.cpp | 1 + src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_kokkos.cpp | 22 ++++++++++++++++++++-- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_sphere_kokkos.cpp | 2 ++ src/KOKKOS/atom_vec_spin_kokkos.cpp | 2 ++ src/KOKKOS/comm_kokkos.cpp | 4 ++-- 13 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 7115046adc5..2b6ed3a3d1e 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -39,6 +39,8 @@ AtomVecKokkos(lmp), AtomVecAngle(lmp) void AtomVecAngleKokkos::init() { + AtomVecAngle::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 36ed9cd693b..15176a88d5e 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -37,6 +37,8 @@ AtomVecKokkos(lmp), AtomVecAtomic(lmp) void AtomVecAtomicKokkos::init() { + AtomVecAtomic::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 5119951c1d8..5e8a924c9e9 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -37,6 +37,8 @@ AtomVecKokkos(lmp), AtomVecBond(lmp) void AtomVecBondKokkos::init() { + AtomVecBond::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 228842c2f19..6930cc15a99 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -37,6 +37,8 @@ AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) void AtomVecChargeKokkos::init() { + AtomVecCharge::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index 1753941ba23..5e1d29e6258 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -35,6 +35,8 @@ AtomVecKokkos(lmp), AtomVecDipole(lmp), q(nullptr) {} void AtomVecDipoleKokkos::init() { + AtomVecDipole::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 262624f7f00..be7a5d4f697 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -39,6 +39,8 @@ AtomVecKokkos(lmp), AtomVecDPD(lmp) void AtomVecDPDKokkos::init() { + AtomVecDPD::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 6a3d57a7535..e1e5de88f0d 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -37,6 +37,7 @@ AtomVecKokkos(lmp), AtomVecFull(lmp) void AtomVecFullKokkos::init() { + AtomVecFull::init(); set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index ee2c12aeb2c..6cfe5660862 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -35,6 +35,8 @@ AtomVecKokkos(lmp), AtomVecHybrid(lmp) void AtomVecHybridKokkos::init() { + AtomVecHybrid::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 81c08607f8d..afcd7c84eac 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -1657,6 +1657,8 @@ struct AtomVecKokkos_PackBorderVel { const typename AT::t_tagint_1d _tag; const typename AT::t_int_1d _type; const typename AT::t_int_1d _mask; + const typename AT::t_tagint_1d _molecule; + const typename AT::t_kkfloat_1d _q; const typename AT::t_kkfloat_1d_4 _mu; const typename AT::t_kkfloat_1d_4 _sp; typename AT::t_kkfloat_1d _radius,_rmass; @@ -1679,6 +1681,8 @@ struct AtomVecKokkos_PackBorderVel { _tag(atomKK->k_tag.view()), _type(atomKK->k_type.view()), _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()), @@ -1728,6 +1732,12 @@ struct AtomVecKokkos_PackBorderVel { _buf(i,m++) = _v(j,2); } + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; + + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); + if (_datamask & MU_MASK) { _buf(i,m++) = _mu(j,0); _buf(i,m++) = _mu(j,1); @@ -1865,6 +1875,8 @@ struct AtomVecKokkos_UnpackBorderVel { typename AT::t_tagint_1d _tag; typename AT::t_int_1d _type; typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + typename AT::t_kkfloat_1d _q; typename AT::t_kkfloat_1d_3 _v; typename AT::t_kkfloat_1d_4 _mu; typename AT::t_kkfloat_1d_4 _sp; @@ -1884,6 +1896,8 @@ struct AtomVecKokkos_UnpackBorderVel { _tag(atomKK->k_tag.view()), _type(atomKK->k_type.view()), _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), _v(atomKK->k_v.view()), _mu(atomKK->k_mu.view()), _sp(atomKK->k_sp.view()), @@ -1912,12 +1926,16 @@ struct AtomVecKokkos_UnpackBorderVel { _tag(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); _type(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); _mask(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); - _radius(i+_first) = _buf(i,m++); - _rmass(i+_first) = _buf(i,m++); _v(i+_first,0) = _buf(i,m++); _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); + if (_datamask & MU_MASK) { _mu(i+_first,0) = _buf(i,m++); _mu(i+_first,1) = _buf(i,m++); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 5dc14c87733..ae72ea7b164 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -38,6 +38,8 @@ AtomVecKokkos(lmp), AtomVecMolecular(lmp) void AtomVecMolecularKokkos::init() { + AtomVecMolecular::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index d16742b1251..83a8ff37ff4 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -41,6 +41,8 @@ AtomVecKokkos(lmp), AtomVecSphere(lmp) void AtomVecSphereKokkos::init() { + AtomVecSphere::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index cf4cf4b2c70..9c004fec140 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -49,6 +49,8 @@ AtomVecKokkos(lmp), AtomVecSpin(lmp) void AtomVecSpinKokkos::init() { + AtomVecSpin::init(); + set_atom_masks(); } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 271da5dfaff..4f034e5a1cf 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1116,8 +1116,8 @@ struct BuildBorderListFunctor { template void CommKokkos::borders_device() { - int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax; - int nsend,nrecv,sendflag,nfirst,nlast,ngroup; + int n,iswap,dim,ineed,twoneed,smax,rmax; + int nsend,nrecv,sendflag,nfirst,nlast; double lo,hi; double *mlo,*mhi; MPI_Request request; From 135fde593f71dd7fc50b3ec2df7dd9a52af6c5b4 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 12:20:58 -0700 Subject: [PATCH 217/604] Reduce if-tests for DPD-REACT --- src/KOKKOS/atom_vec_kokkos.cpp | 232 +++++++++++++++------------------ 1 file changed, 103 insertions(+), 129 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index afcd7c84eac..22f08c5a1a1 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -114,12 +114,17 @@ struct AtomVecKokkos_PackComm { } if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & MU_MASK) { _buf(i,m++) = _mu(j,0); _buf(i,m++) = _mu(j,1); _buf(i,m++) = _mu(j,2); } + // SPIN package + if (_datamask & SP_MASK) { _buf(i,m++) = _sp(j,0); _buf(i,m++) = _sp(j,1); @@ -127,17 +132,14 @@ struct AtomVecKokkos_PackComm { _buf(i,m++) = _sp(j,3); } - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) _buf(i,m++) = _uChem(j); + } } } }; @@ -326,12 +328,17 @@ struct AtomVecKokkos_PackCommSelf { } if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & MU_MASK) { _mu(i+_nfirst,0) = _mu(j,0); _mu(i+_nfirst,1) = _mu(j,1); _mu(i+_nfirst,2) = _mu(j,2); } + // SPIN package + if (_datamask & SP_MASK) { _sp(i+_nfirst,0) = _sp(j,0); _sp(i+_nfirst,1) = _sp(j,1); @@ -339,17 +346,14 @@ struct AtomVecKokkos_PackCommSelf { _sp(i+_nfirst,3) = _sp(j,3); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_nfirst) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_nfirst) = _dpdTheta(j); _uCond(i+_nfirst) = _uCond(j); - - if (_datamask & UMECH_MASK) _uMech(i+_nfirst) = _uMech(j); - - if (_datamask & UCHEM_MASK) _uChem(i+_nfirst) = _uChem(j); + } } } }; @@ -558,12 +562,17 @@ struct AtomVecKokkos_PackCommSelfFused { } if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & MU_MASK) { _mu(i+_nfirst,0) = _mu(j,0); _mu(i+_nfirst,1) = _mu(j,1); _mu(i+_nfirst,2) = _mu(j,2); } + // SPIN package + if (_datamask & SP_MASK) { _sp(i+_nfirst,0) = _sp(j,0); _sp(i+_nfirst,1) = _sp(j,1); @@ -571,17 +580,14 @@ struct AtomVecKokkos_PackCommSelfFused { _sp(i+_nfirst,3) = _sp(j,3); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_nfirst) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_nfirst) = _dpdTheta(j); _uCond(i+_nfirst) = _uCond(j); - - if (_datamask & UMECH_MASK) _uMech(i+_nfirst) = _uMech(j); - - if (_datamask & UCHEM_MASK) _uChem(i+_nfirst) = _uChem(j); + } } } }; @@ -693,12 +699,17 @@ struct AtomVecKokkos_UnpackComm { _x(i+_first,2) = _buf(i,m++); if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & MU_MASK) { _mu(i+_first,0) = _buf(i,m++); _mu(i+_first,1) = _buf(i,m++); _mu(i+_first,2) = _buf(i,m++); } + // SPIN package + if (_datamask & SP_MASK) { _sp(i+_first,0) = _buf(i,m++); _sp(i+_first,1) = _buf(i,m++); @@ -706,17 +717,14 @@ struct AtomVecKokkos_UnpackComm { _sp(i+_first,3) = _buf(i,m++); } + // DPD-REACT package + if (_datamask & DPDTHETA_MASK) _dpdTheta(i+_first) = _buf(i,m++); - - if (_datamask & UCOND_MASK) _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) _uChem(i+_first) = _buf(i,m++); + } } } }; @@ -841,12 +849,16 @@ struct AtomVecKokkos_PackCommVel { } } + // DIPOLE package + if (_datamask & MU_MASK) { _buf(i,m++) = _mu(j,0); _buf(i,m++) = _mu(j,1); _buf(i,m++) = _mu(j,2); } + // SPIN package + if (_datamask & SP_MASK) { _buf(i,m++) = _sp(j,0); _buf(i,m++) = _sp(j,1); @@ -854,23 +866,22 @@ struct AtomVecKokkos_PackCommVel { _buf(i,m++) = _sp(j,3); } + // SPHERE package + if (_datamask & OMEGA_MASK) { _buf(i,m++) = _omega(j,0); _buf(i,m++) = _omega(j,1); _buf(i,m++) = _omega(j,2); } - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) _buf(i,m++) = _uChem(j); + } } }; @@ -1053,12 +1064,16 @@ struct AtomVecKokkos_UnpackCommVel { _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); + // DIPOLE package + if (_datamask & MU_MASK) { _mu(i+_first,0) = _buf(i,m++); _mu(i+_first,1) = _buf(i,m++); _mu(i+_first,2) = _buf(i,m++); } + // SPIN package + if (_datamask & SP_MASK) { _sp(i+_first,0) = _buf(i,m++); _sp(i+_first,1) = _buf(i,m++); @@ -1066,23 +1081,22 @@ struct AtomVecKokkos_UnpackCommVel { _sp(i+_first,3) = _buf(i,m++); } + // SPHERE package + if (_datamask & OMEGA_MASK) { _omega(i+_first,0) = _buf(i,m++); _omega(i+_first,1) = _buf(i,m++); _omega(i+_first,2) = _buf(i,m++); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) _uChem(i+_first) = _buf(i,m++); + } } }; @@ -1138,19 +1152,22 @@ struct AtomVecKokkos_PackReverse { _buf(i,m++) = _f(i+_first,2); if constexpr (!DEFAULT) { + + // DIPLE package + if (_datamask & TORQUE_MASK) { _buf(i,m++) = _torque(i+_first,0); _buf(i,m++) = _torque(i+_first,1); _buf(i,m++) = _torque(i+_first,2); } + // SPIN package + if (_datamask & FM_MASK) { _buf(i,m++) = _fm(i+_first,0); _buf(i,m++) = _fm(i+_first,1); _buf(i,m++) = _fm(i+_first,2); - } - if (_datamask & FML_MASK) { _buf(i,m++) = _fm_long(i+_first,0); _buf(i,m++) = _fm_long(i+_first,1); _buf(i,m++) = _fm_long(i+_first,2); @@ -1219,19 +1236,22 @@ struct AtomVecKokkos_UnPackReverseSelf { _f(j,2) += _f(i+_nfirst,2); if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & TORQUE_MASK) { _torque(j,0) += _torque(i+_nfirst,0); _torque(j,1) += _torque(i+_nfirst,1); _torque(j,2) += _torque(i+_nfirst,2); } + // SPIN package + if (_datamask & FM_MASK) { _fm(j,0) += _fm(i+_nfirst,0); _fm(j,1) += _fm(i+_nfirst,1); _fm(j,2) += _fm(i+_nfirst,2); - } - if (_datamask & FML_MASK) { _fm_long(j,0) += _fm_long(i+_nfirst,0); _fm_long(j,1) += _fm_long(i+_nfirst,1); _fm_long(j,2) += _fm_long(i+_nfirst,2); @@ -1307,19 +1327,22 @@ struct AtomVecKokkos_UnPackReverse { _f(j,2) += _buf(i,m++); if constexpr (!DEFAULT) { + + // DIPOLE package + if (_datamask & TORQUE_MASK) { _torque(j,0) += _buf(i,m++); _torque(j,1) += _buf(i,m++); _torque(j,2) += _buf(i,m++); } + // SPIN package + if (_datamask & FM_MASK) { _fm(j,0) += _buf(i,m++); _fm(j,1) += _buf(i,m++); _fm(j,2) += _buf(i,m++); - } - if (_datamask & FML_MASK) { _fm_long(j,0) += _buf(i,m++); _fm_long(j,1) += _buf(i,m++); _fm_long(j,2) += _buf(i,m++); @@ -1451,23 +1474,16 @@ struct AtomVecKokkos_PackBorder { if (_datamask & RMASS_MASK) _buf(i,m++) = _rmass(j); - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) _buf(i,m++) = _uChem(j); - - if (_datamask & UCG_MASK) _buf(i,m++) = _uCG(j); - - if (_datamask & UCGNEW_MASK) _buf(i,m++) = _uCGnew(j); + } } }; @@ -1601,23 +1617,16 @@ struct AtomVecKokkos_UnpackBorder { if (_datamask & RMASS_MASK) _rmass(i+_first) = _buf(i,m++); - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) _uChem(i+_first) = _buf(i,m++); - - if (_datamask & UCG_MASK) _uCG(i+_first) = _buf(i,m++); - - if (_datamask & UCGNEW_MASK) _uCGnew(i+_first) = _buf(i,m++); + } } }; @@ -1764,23 +1773,16 @@ struct AtomVecKokkos_PackBorderVel { _buf(i,m++) = _omega(j,2); } - if (_datamask & DPDTHETA_MASK) - _buf(i,m++) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); _buf(i,m++) = _uCond(j); - - if (_datamask & UMECH_MASK) _buf(i,m++) = _uMech(j); - - if (_datamask & UCHEM_MASK) _buf(i,m++) = _uChem(j); - - if (_datamask & UCG_MASK) _buf(i,m++) = _uCG(j); - - if (_datamask & UCGNEW_MASK) _buf(i,m++) = _uCGnew(j); + } } }; @@ -1962,23 +1964,16 @@ struct AtomVecKokkos_UnpackBorderVel { _omega(i+_first,2) = _buf(i,m++); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i+_first) = _buf(i,m++); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); _uCond(i+_first) = _buf(i,m++); - - if (_datamask & UMECH_MASK) _uMech(i+_first) = _buf(i,m++); - - if (_datamask & UCHEM_MASK) _uChem(i+_first) = _buf(i,m++); - - if (_datamask & UCG_MASK) _uCG(i+_first) = _buf(i,m++); - - if (_datamask & UCGNEW_MASK) _uCGnew(i+_first) = _buf(i,m++); + } } }; @@ -2206,23 +2201,16 @@ struct AtomVecKokkos_PackExchangeFunctor { _buf(mysend,m++) = _omega(i,2); } - if (_datamask & DPDTHETA_MASK) - _buf(mysend,m++) = _dpdTheta(i); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _buf(mysend,m++) = _dpdTheta(i); _buf(mysend,m++) = _uCond(i); - - if (_datamask & UMECH_MASK) _buf(mysend,m++) = _uMech(i); - - if (_datamask & UCHEM_MASK) _buf(mysend,m++) = _uChem(i); - - if (_datamask & UCG_MASK) _buf(mysend,m++) = _uCG(i); - - if (_datamask & UCGNEW_MASK) _buf(mysend,m++) = _uCGnew(i); + } const int j = _copylist(mysend); @@ -2318,23 +2306,16 @@ struct AtomVecKokkos_PackExchangeFunctor { _omega(i,2) = _omega(j,2); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i) = _dpdTheta(j); + // DPD-REACT package - if (_datamask & UCOND_MASK) - _uCond(i) = _uCond(j); - - if (_datamask & UMECH_MASK) - _uMech(i) = _uMech(j); - - if (_datamask & UCHEM_MASK) - _uChem(i) = _uChem(j); - - if (_datamask & UCG_MASK) - _uCG(i) = _uCG(j); - - if (_datamask & UCGNEW_MASK) - _uCGnew(i) = _uCGnew(j); + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _dpdTheta(j); + _uCond(i) = _uCond(j); + _uMech(i) = _uMech(j); + _uChem(i) = _uChem(j); + _uCG(i) = _uCG(j); + _uCGnew(i) = _uCGnew(j); + } } } }; @@ -2570,23 +2551,16 @@ struct AtomVecKokkos_UnpackExchangeFunctor { _omega(i,2) = _buf(myrecv,m++); } - if (_datamask & DPDTHETA_MASK) - _dpdTheta(i) = _buf(myrecv,m++); + // DPD-REACT package - if (_datamask & UCOND_MASK) + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _buf(myrecv,m++); _uCond(i) = _buf(myrecv,m++); - - if (_datamask & UMECH_MASK) _uMech(i) = _buf(myrecv,m++); - - if (_datamask & UCHEM_MASK) _uChem(i) = _buf(myrecv,m++); - - if (_datamask & UCG_MASK) _uCG(i) = _buf(myrecv,m++); - - if (_datamask & UCGNEW_MASK) _uCGnew(i) = _buf(myrecv,m++); + } } if (OUTPUT_INDICES) From b0b033c27cd8b38835c460a36daa13d90c86d495 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 10 Nov 2025 13:56:41 -0600 Subject: [PATCH 218/604] fixed typos and updated other coreshell kernels --- lib/gpu/lal_born_coul_long_cs.cu | 4 ++-- lib/gpu/lal_born_coul_wolf_cs.cu | 4 ++++ lib/gpu/lal_coul_long_cs.cu | 24 +++++++++++++----------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/lib/gpu/lal_born_coul_long_cs.cu b/lib/gpu/lal_born_coul_long_cs.cu index f8ab43cae32..e75bcc0be50 100644 --- a/lib/gpu/lal_born_coul_long_cs.cu +++ b/lib/gpu/lal_born_coul_long_cs.cu @@ -40,14 +40,14 @@ _texture( q_tex,int2); #define B4 (acctyp)-5.80844129e-3 #define B5 (acctyp)1.14652755e-1 -#if defined _DOULBE_DOUBLE +#if defined _DOUBLE_DOUBLE #define EPSILON (acctyp)(1.0e-20) #define EPS_EWALD (acctyp)(1.0e-6) #define EPS_EWALD_SQR (acctyp)(1.0e-12) #else #define EPSILON (numtyp)(1.0e-7) #define EPS_EWALD (numtyp)(1.0e-6) -#define EPS_EWALD_SQR (numtyp)(1.0e-7) +#define EPS_EWALD_SQR (numtyp)(1.0e-8) #endif __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_, diff --git a/lib/gpu/lal_born_coul_wolf_cs.cu b/lib/gpu/lal_born_coul_wolf_cs.cu index 785bbed49b5..511cb688c47 100644 --- a/lib/gpu/lal_born_coul_wolf_cs.cu +++ b/lib/gpu/lal_born_coul_wolf_cs.cu @@ -29,7 +29,11 @@ _texture( q_tex,int2); #define q_tex q_ #endif +#if defined _DOUBLE_DOUBLE #define EPSILON (acctyp)(1.0e-20) +#else +#define EPSILON (numtyp)(1.0e-7) +#endif #define MY_PIS (acctyp)1.77245385090551602729 __kernel void k_born_coul_wolf_cs(const __global numtyp4 *restrict x_, diff --git a/lib/gpu/lal_coul_long_cs.cu b/lib/gpu/lal_coul_long_cs.cu index 5a1e59e407a..fbc85ee79ea 100644 --- a/lib/gpu/lal_coul_long_cs.cu +++ b/lib/gpu/lal_coul_long_cs.cu @@ -39,9 +39,15 @@ _texture( q_tex,int2); #define B4 (acctyp)-5.80844129e-3 #define B5 (acctyp)1.14652755e-1 +#if defined _DOUBLE_DOUBLE #define EPSILON (acctyp)(1.0e-20) #define EPS_EWALD (acctyp)(1.0e-6) #define EPS_EWALD_SQR (acctyp)(1.0e-12) +#else +#define EPSILON (numtyp)(1.0e-7) +#define EPS_EWALD (numtyp)(1.0e-6) +#define EPS_EWALD_SQR (numtyp)(1.0e-8) +#endif __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, const __global numtyp *restrict scale, @@ -91,7 +97,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, int j=dev_packed[nbor]; numtyp factor_coul; - factor_coul = sp_cl[sbmask(j)]; + factor_coul = (numtyp)1.0-sp_cl[sbmask(j)]; j &= NEIGHMASK; numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; @@ -112,14 +118,14 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, numtyp r = ucl_rsqrt(r2inv); fetch(prefactor,j,q_tex); prefactor *= qqrd2e * scale[mtype] * qtmp; - if (factor_coul<(numtyp)1.0) { + if (factor_coul > (acctyp)0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); numtyp u = (numtyp)1.0 - t; _erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2; prefactor /= (r+EPS_EWALD); - force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul)); + force = prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); // Additionally r2inv needs to be accordingly modified since the later // scaling of the overall force shall be consistent r2inv = ucl_recip(rsq + EPS_EWALD_SQR); @@ -140,9 +146,7 @@ __kernel void k_coul_long_cs(const __global numtyp4 *restrict x_, f.z+=delz*force; if (EVFLAG && eflag) { - numtyp e = prefactor*_erfc; - if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor; - e_coul += e; + e_coul += prefactor*(_erfc-factor_coul); } if (EVFLAG && vflag) { virial[0] += delx*delx*force; @@ -213,7 +217,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_, int j=dev_packed[nbor]; numtyp factor_coul; - factor_coul = sp_cl[sbmask(j)]; + factor_coul = (numtyp)1.0-sp_cl[sbmask(j)]; j &= NEIGHMASK; numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; @@ -233,7 +237,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_, numtyp r = ucl_rsqrt(r2inv); fetch(prefactor,j,q_tex); prefactor *= qqrd2e * scale[mtype] * qtmp; - if (factor_coul<(numtyp)1.0) { + if (factor_coul > (acctyp)0) { numtyp grij = g_ewald * (r+EPS_EWALD); numtyp expm2 = ucl_exp(-grij*grij); acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij); @@ -261,9 +265,7 @@ __kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_, f.z+=delz*force; if (EVFLAG && eflag) { - numtyp e = prefactor*_erfc; - if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor; - e_coul += e; + e_coul += prefactor*(_erfc-factor_coul); } if (EVFLAG && vflag) { virial[0] += delx*delx*force; From bc94591c55df6735bed00d4d6d7cc9b8079da808 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 13:24:52 -0700 Subject: [PATCH 219/604] Add more templates --- src/KOKKOS/atom_vec_kokkos.cpp | 953 ++++++++++++++++++--------------- 1 file changed, 536 insertions(+), 417 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 22f08c5a1a1..b9118cd0561 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -35,7 +35,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) buffer_size = 0; size_exchange = 0; - datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = + datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = datamask_border = datamask_border_vel = datamask_exchange = EMPTY_MASK; k_count = DAT::tdual_int_1d("atom:k_count",1); @@ -97,7 +97,7 @@ struct AtomVecKokkos_PackComm { void operator() (const int& i) const { const int j = _list(i); int m = 0; - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -311,7 +311,7 @@ struct AtomVecKokkos_PackCommSelf { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _x(i+_nfirst,0) = _x(j,0); _x(i+_nfirst,1) = _x(j,1); _x(i+_nfirst,2) = _x(j,2); @@ -719,7 +719,7 @@ struct AtomVecKokkos_UnpackComm { // DPD-REACT package - if (_datamask & DPDTHETA_MASK) + if (_datamask & DPDTHETA_MASK) { _dpdTheta(i+_first) = _buf(i,m++); _uCond(i+_first) = _buf(i,m++); _uMech(i+_first) = _buf(i,m++); @@ -814,7 +814,7 @@ struct AtomVecKokkos_PackCommVel { void operator() (const int& i) const { int m = 0; const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -832,7 +832,7 @@ struct AtomVecKokkos_PackCommVel { _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } - if (DEFORM_VREMAP == 0) { + if constexpr (DEFORM_VREMAP == 0) { _buf(i,m++) = _v(j,0); _buf(i,m++) = _v(j,1); _buf(i,m++) = _v(j,2); @@ -1019,7 +1019,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackCommVel { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1064,38 +1064,41 @@ struct AtomVecKokkos_UnpackCommVel { _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); - // DIPOLE package + if constexpr (!DEFAULT) { - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - } + // DIPOLE package - // SPIN package + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + // SPIN package - // SPHERE package + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } + // SPHERE package - // DPD-REACT package + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + } } } }; @@ -1104,15 +1107,26 @@ struct AtomVecKokkos_UnpackCommVel { void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->forward_comm_on_host) { atomKK->sync(HostKK,datamask_comm_vel); - struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } atomKK->modified(HostKK,datamask_comm_vel); } else { atomKK->sync(Device,datamask_comm_vel); - struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } atomKK->modified(Device,datamask_comm_vel); } } @@ -1282,7 +1296,7 @@ int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list } else { struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - } + } atomKK->modified(Device,datamask_reverse); } @@ -1385,7 +1399,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackBorder { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1434,7 +1448,7 @@ struct AtomVecKokkos_PackBorder { void operator() (const int& i) const { const int j = _list(i); int m = 0; - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -1448,41 +1462,44 @@ struct AtomVecKokkos_PackBorder { _buf(i,m++) = d_ubuf(_type(j)).d; _buf(i,m++) = d_ubuf(_mask(j)).d; - if (_datamask & MOLECULE_MASK) - _buf(i,m++) = d_ubuf(_molecule(j)).d; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _buf(i,m++) = _q(j); + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - _buf(i,m++) = _mu(j,3); - } + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } - if (_datamask & RADIUS_MASK) - _buf(i,m++) = _radius(j); + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } - if (_datamask & RMASS_MASK) - _buf(i,m++) = _rmass(j); + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); - // DPD-REACT package + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); - if (_datamask & DPDTHETA_MASK) { - _buf(i,m++) = _dpdTheta(j); - _buf(i,m++) = _uCond(j); - _buf(i,m++) = _uMech(j); - _buf(i,m++) = _uChem(j); - _buf(i,m++) = _uCG(j); - _buf(i,m++) = _uCGnew(j); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); + _buf(i,m++) = _uCG(j); + _buf(i,m++) = _uCGnew(j); + } } } }; @@ -1508,29 +1525,56 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, dz = pbc[2]; } if (space == HostKK) { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } - } else { dx = dy = dz = 0; if (space == HostKK) { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } } return n*size_border; @@ -1538,7 +1582,7 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackBorder { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1591,41 +1635,44 @@ struct AtomVecKokkos_UnpackBorder { _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,m++); + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); - // DPD-REACT package + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); - _uCG(i+_first) = _buf(i,m++); - _uCGnew(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } } } }; @@ -1640,13 +1687,25 @@ void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, atomKK->sync(space,datamask_border); if (space == HostKK) { - struct AtomVecKokkos_UnpackBorder - f(atomKK,buf.view_host(),first,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_UnpackBorder - f(atomKK,buf.view_device(),first,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } } atomKK->modified(space,datamask_border); @@ -1716,7 +1775,7 @@ struct AtomVecKokkos_PackBorderVel { void operator() (const int& i) const { int m = 0; const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -1729,7 +1788,7 @@ struct AtomVecKokkos_PackBorderVel { _buf(i,m++) = d_ubuf(_type(j)).d; _buf(i,m++) = d_ubuf(_mask(j)).d; - if (DEFORM_VREMAP) { + if constexpr (DEFORM_VREMAP) { if (_mask(i) & _deform_groupbit) { _buf(i,m++) = _v(j,0) + _dvx; _buf(i,m++) = _v(j,1) + _dvy; @@ -1867,7 +1926,7 @@ int AtomVecKokkos::pack_border_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackBorderVel { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1932,47 +1991,50 @@ struct AtomVecKokkos_UnpackBorderVel { _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,m++); + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); - // DPD-REACT package + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); - _uCG(i+_first) = _buf(i,m++); - _uCGnew(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } } } }; @@ -1988,17 +2050,33 @@ void AtomVecKokkos::unpack_border_vel_kokkos( atomKK->sync(space,datamask_border_vel); if (space == HostKK) { - struct AtomVecKokkos_UnpackBorderVel f( - atomKK, - buf.view_host(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_UnpackBorderVel f( - atomKK, - buf.view_device(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } } atomKK->modified(space,datamask_border_vel); @@ -2006,7 +2084,7 @@ void AtomVecKokkos::unpack_border_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -2121,95 +2199,98 @@ struct AtomVecKokkos_PackExchangeFunctor { _buf(mysend,m++) = d_ubuf(_mask(i)).d; _buf(mysend,m++) = d_ubuf(_image(i)).d; - if (_datamask & Q_MASK) - _buf(mysend,m++) = _q(i); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + if (_datamask & Q_MASK) + _buf(mysend,m++) = _q(i); + + if (_datamask & MOLECULE_MASK) + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - if (_datamask & BOND_MASK) { - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (int k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + if (_datamask & BOND_MASK) { + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (int k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } } - } - if (_datamask & ANGLE_MASK) { - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (int k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + if (_datamask & ANGLE_MASK) { + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (int k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } } - } - if (_datamask & DIHEDRAL_MASK) { - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (int k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + if (_datamask & DIHEDRAL_MASK) { + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (int k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } } - } - if (_datamask & IMPROPER_MASK) { - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (int k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + if (_datamask & IMPROPER_MASK) { + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (int k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } } - } - if (_datamask & SPECIAL_MASK) { - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (int k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - } + if (_datamask & SPECIAL_MASK) { + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (int k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + } - if (_datamask & MU_MASK) { - _buf(mysend,m++) = _mu(i,0); - _buf(mysend,m++) = _mu(i,1); - _buf(mysend,m++) = _mu(i,2); - _buf(mysend,m++) = _mu(i,3); - } + if (_datamask & MU_MASK) { + _buf(mysend,m++) = _mu(i,0); + _buf(mysend,m++) = _mu(i,1); + _buf(mysend,m++) = _mu(i,2); + _buf(mysend,m++) = _mu(i,3); + } - if (_datamask & SP_MASK) { - _buf(mysend,m++) = _sp(i,0); - _buf(mysend,m++) = _sp(i,1); - _buf(mysend,m++) = _sp(i,2); - _buf(mysend,m++) = _sp(i,3); - } + if (_datamask & SP_MASK) { + _buf(mysend,m++) = _sp(i,0); + _buf(mysend,m++) = _sp(i,1); + _buf(mysend,m++) = _sp(i,2); + _buf(mysend,m++) = _sp(i,3); + } - if (_datamask & RADIUS_MASK) - _buf(mysend,m++) = _radius(i); + if (_datamask & RADIUS_MASK) + _buf(mysend,m++) = _radius(i); - if (_datamask & RMASS_MASK) - _buf(mysend,m++) = _rmass(i); + if (_datamask & RMASS_MASK) + _buf(mysend,m++) = _rmass(i); - if (_datamask & OMEGA_MASK) { - _buf(mysend,m++) = _omega(i,0); - _buf(mysend,m++) = _omega(i,1); - _buf(mysend,m++) = _omega(i,2); - } + if (_datamask & OMEGA_MASK) { + _buf(mysend,m++) = _omega(i,0); + _buf(mysend,m++) = _omega(i,1); + _buf(mysend,m++) = _omega(i,2); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _buf(mysend,m++) = _dpdTheta(i); - _buf(mysend,m++) = _uCond(i); - _buf(mysend,m++) = _uMech(i); - _buf(mysend,m++) = _uChem(i); - _buf(mysend,m++) = _uCG(i); - _buf(mysend,m++) = _uCGnew(i); + if (_datamask & DPDTHETA_MASK) { + _buf(mysend,m++) = _dpdTheta(i); + _buf(mysend,m++) = _uCond(i); + _buf(mysend,m++) = _uMech(i); + _buf(mysend,m++) = _uChem(i); + _buf(mysend,m++) = _uCG(i); + _buf(mysend,m++) = _uCGnew(i); + } } const int j = _copylist(mysend); @@ -2226,95 +2307,98 @@ struct AtomVecKokkos_PackExchangeFunctor { _mask(i) = _mask(j); _image(i) = _image(j); - if (_datamask & Q_MASK) - _q(i) = _q(j); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _molecule(i) = _molecule(j); + if (_datamask & Q_MASK) + _q(i) = _q(j); - if (_datamask & BOND_MASK) { - _num_bond(i) = _num_bond(j); - for (int k = 0; k < _num_bond(j); k++) { - _bond_type(i,k) = _bond_type(j,k); - _bond_atom(i,k) = _bond_atom(j,k); + if (_datamask & MOLECULE_MASK) + _molecule(i) = _molecule(j); + + if (_datamask & BOND_MASK) { + _num_bond(i) = _num_bond(j); + for (int k = 0; k < _num_bond(j); k++) { + _bond_type(i,k) = _bond_type(j,k); + _bond_atom(i,k) = _bond_atom(j,k); + } } - } - if (_datamask & ANGLE_MASK) { - _num_angle(i) = _num_angle(j); - for (int k = 0; k < _num_angle(j); k++) { - _angle_type(i,k) = _angle_type(j,k); - _angle_atom1(i,k) = _angle_atom1(j,k); - _angle_atom2(i,k) = _angle_atom2(j,k); - _angle_atom3(i,k) = _angle_atom3(j,k); + if (_datamask & ANGLE_MASK) { + _num_angle(i) = _num_angle(j); + for (int k = 0; k < _num_angle(j); k++) { + _angle_type(i,k) = _angle_type(j,k); + _angle_atom1(i,k) = _angle_atom1(j,k); + _angle_atom2(i,k) = _angle_atom2(j,k); + _angle_atom3(i,k) = _angle_atom3(j,k); + } } - } - if (_datamask & DIHEDRAL_MASK) { - _num_dihedral(i) = _num_dihedral(j); - for (int k = 0; k < _num_dihedral(j); k++) { - _dihedral_type(i,k) = _dihedral_type(j,k); - _dihedral_atom1(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4(i,k) = _dihedral_atom4(j,k); + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = _num_dihedral(j); + for (int k = 0; k < _num_dihedral(j); k++) { + _dihedral_type(i,k) = _dihedral_type(j,k); + _dihedral_atom1(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4(i,k) = _dihedral_atom4(j,k); + } } - } - if (_datamask & IMPROPER_MASK) { - _num_improper(i) = _num_improper(j); - for (int k = 0; k < _num_improper(j); k++) { - _improper_type(i,k) = _improper_type(j,k); - _improper_atom1(i,k) = _improper_atom1(j,k); - _improper_atom2(i,k) = _improper_atom2(j,k); - _improper_atom3(i,k) = _improper_atom3(j,k); - _improper_atom4(i,k) = _improper_atom4(j,k); + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = _num_improper(j); + for (int k = 0; k < _num_improper(j); k++) { + _improper_type(i,k) = _improper_type(j,k); + _improper_atom1(i,k) = _improper_atom1(j,k); + _improper_atom2(i,k) = _improper_atom2(j,k); + _improper_atom3(i,k) = _improper_atom3(j,k); + _improper_atom4(i,k) = _improper_atom4(j,k); + } } - } - if (_datamask & SPECIAL_MASK) { - _nspecial(i,0) = _nspecial(j,0); - _nspecial(i,1) = _nspecial(j,1); - _nspecial(i,2) = _nspecial(j,2); - for (int k = 0; k < _nspecial(j,2); k++) - _special(i,k) = _special(j,k); - } + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = _nspecial(j,0); + _nspecial(i,1) = _nspecial(j,1); + _nspecial(i,2) = _nspecial(j,2); + for (int k = 0; k < _nspecial(j,2); k++) + _special(i,k) = _special(j,k); + } - if (_datamask & MU_MASK) { - _mu(i,0) = _mu(j,0); - _mu(i,1) = _mu(j,1); - _mu(i,2) = _mu(j,2); - _mu(i,3) = _mu(j,3); - } + if (_datamask & MU_MASK) { + _mu(i,0) = _mu(j,0); + _mu(i,1) = _mu(j,1); + _mu(i,2) = _mu(j,2); + _mu(i,3) = _mu(j,3); + } - if (_datamask & SP_MASK) { - _sp(i,0) = _sp(j,0); - _sp(i,1) = _sp(j,1); - _sp(i,2) = _sp(j,2); - _sp(i,3) = _sp(j,3); - } + if (_datamask & SP_MASK) { + _sp(i,0) = _sp(j,0); + _sp(i,1) = _sp(j,1); + _sp(i,2) = _sp(j,2); + _sp(i,3) = _sp(j,3); + } - if (_datamask & RADIUS_MASK) - _radius(i) = _radius(j); + if (_datamask & RADIUS_MASK) + _radius(i) = _radius(j); - if (_datamask & RMASS_MASK) - _rmass(i) = _rmass(j); + if (_datamask & RMASS_MASK) + _rmass(i) = _rmass(j); - if (_datamask & OMEGA_MASK) { - _omega(i,0) = _omega(j,0); - _omega(i,1) = _omega(j,1); - _omega(i,2) = _omega(j,2); - } + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _omega(j,0); + _omega(i,1) = _omega(j,1); + _omega(i,2) = _omega(j,2); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i) = _dpdTheta(j); - _uCond(i) = _uCond(j); - _uMech(i) = _uMech(j); - _uChem(i) = _uChem(j); - _uCG(i) = _uCG(j); - _uCGnew(i) = _uCGnew(j); + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _dpdTheta(j); + _uCond(i) = _uCond(j); + _uMech(i) = _uMech(j); + _uChem(i) = _uChem(j); + _uCG(i) = _uCG(j); + _uCGnew(i) = _uCGnew(j); + } } } } @@ -2337,22 +2421,33 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr } if (space == HostKK) { - AtomVecKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } } else { - AtomVecKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - - return nsend*size_exchange; + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } } + + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -2471,99 +2566,102 @@ struct AtomVecKokkos_UnpackExchangeFunctor { _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - if (_datamask & Q_MASK) - _q(i) = _buf(myrecv,m++); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & Q_MASK) + _q(i) = _buf(myrecv,m++); - if (_datamask & BOND_MASK) { - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & MOLECULE_MASK) + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & BOND_MASK) { + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & ANGLE_MASK) { - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & ANGLE_MASK) { + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & DIHEDRAL_MASK) { - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & IMPROPER_MASK) { - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & SPECIAL_MASK) { - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } - if (_datamask & MU_MASK) { - _mu(i,0) = _buf(myrecv,m++); - _mu(i,1) = _buf(myrecv,m++); - _mu(i,2) = _buf(myrecv,m++); - _mu(i,3) = _buf(myrecv,m++); - } + if (_datamask & MU_MASK) { + _mu(i,0) = _buf(myrecv,m++); + _mu(i,1) = _buf(myrecv,m++); + _mu(i,2) = _buf(myrecv,m++); + _mu(i,3) = _buf(myrecv,m++); + } - if (_datamask & SP_MASK) { - _sp(i,0) = _buf(myrecv,m++); - _sp(i,1) = _buf(myrecv,m++); - _sp(i,2) = _buf(myrecv,m++); - _sp(i,3) = _buf(myrecv,m++); - } + if (_datamask & SP_MASK) { + _sp(i,0) = _buf(myrecv,m++); + _sp(i,1) = _buf(myrecv,m++); + _sp(i,2) = _buf(myrecv,m++); + _sp(i,3) = _buf(myrecv,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i) = _buf(myrecv,m++); + if (_datamask & RADIUS_MASK) + _radius(i) = _buf(myrecv,m++); - if (_datamask & RMASS_MASK) - _rmass(i) = _buf(myrecv,m++); + if (_datamask & RMASS_MASK) + _rmass(i) = _buf(myrecv,m++); - if (_datamask & OMEGA_MASK) { - _omega(i,0) = _buf(myrecv,m++); - _omega(i,1) = _buf(myrecv,m++); - _omega(i,2) = _buf(myrecv,m++); - } + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _buf(myrecv,m++); + _omega(i,1) = _buf(myrecv,m++); + _omega(i,2) = _buf(myrecv,m++); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i) = _buf(myrecv,m++); - _uCond(i) = _buf(myrecv,m++); - _uMech(i) = _buf(myrecv,m++); - _uChem(i) = _buf(myrecv,m++); - _uCG(i) = _buf(myrecv,m++); - _uCGnew(i) = _buf(myrecv,m++); + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _buf(myrecv,m++); + _uCond(i) = _buf(myrecv,m++); + _uMech(i) = _buf(myrecv,m++); + _uChem(i) = _buf(myrecv,m++); + _uCG(i) = _buf(myrecv,m++); + _uCGnew(i) = _buf(myrecv,m++); + } } } - if (OUTPUT_INDICES) + if constexpr (OUTPUT_INDICES) _indices(myrecv) = i; } }; @@ -2578,37 +2676,58 @@ int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nr atomKK->sync(space,datamask_exchange); if (space == HostKK) { + k_count.view_host()(0) = nlocal; + if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.view_host()(0) = nlocal; - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } } else { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } + + k_count.modify_device(); + k_count.sync_host(); } atomKK->modified(space,datamask_exchange); From 64dfd7fe164318e2bea41ddd77b7bf1c0db780f2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 13:24:52 -0700 Subject: [PATCH 220/604] Add more templates --- src/KOKKOS/atom_vec_kokkos.cpp | 953 ++++++++++++++++++--------------- 1 file changed, 536 insertions(+), 417 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 22f08c5a1a1..b9118cd0561 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -35,7 +35,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) buffer_size = 0; size_exchange = 0; - datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = + datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = datamask_border = datamask_border_vel = datamask_exchange = EMPTY_MASK; k_count = DAT::tdual_int_1d("atom:k_count",1); @@ -97,7 +97,7 @@ struct AtomVecKokkos_PackComm { void operator() (const int& i) const { const int j = _list(i); int m = 0; - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -311,7 +311,7 @@ struct AtomVecKokkos_PackCommSelf { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _x(i+_nfirst,0) = _x(j,0); _x(i+_nfirst,1) = _x(j,1); _x(i+_nfirst,2) = _x(j,2); @@ -719,7 +719,7 @@ struct AtomVecKokkos_UnpackComm { // DPD-REACT package - if (_datamask & DPDTHETA_MASK) + if (_datamask & DPDTHETA_MASK) { _dpdTheta(i+_first) = _buf(i,m++); _uCond(i+_first) = _buf(i,m++); _uMech(i+_first) = _buf(i,m++); @@ -814,7 +814,7 @@ struct AtomVecKokkos_PackCommVel { void operator() (const int& i) const { int m = 0; const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -832,7 +832,7 @@ struct AtomVecKokkos_PackCommVel { _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } - if (DEFORM_VREMAP == 0) { + if constexpr (DEFORM_VREMAP == 0) { _buf(i,m++) = _v(j,0); _buf(i,m++) = _v(j,1); _buf(i,m++) = _v(j,2); @@ -1019,7 +1019,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackCommVel { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1064,38 +1064,41 @@ struct AtomVecKokkos_UnpackCommVel { _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); - // DIPOLE package + if constexpr (!DEFAULT) { - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - } + // DIPOLE package - // SPIN package + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + // SPIN package - // SPHERE package + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } + // SPHERE package - // DPD-REACT package + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + } } } }; @@ -1104,15 +1107,26 @@ struct AtomVecKokkos_UnpackCommVel { void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->forward_comm_on_host) { atomKK->sync(HostKK,datamask_comm_vel); - struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } atomKK->modified(HostKK,datamask_comm_vel); } else { atomKK->sync(Device,datamask_comm_vel); - struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } atomKK->modified(Device,datamask_comm_vel); } } @@ -1282,7 +1296,7 @@ int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list } else { struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); Kokkos::parallel_for(n,f); - } + } atomKK->modified(Device,datamask_reverse); } @@ -1385,7 +1399,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackBorder { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1434,7 +1448,7 @@ struct AtomVecKokkos_PackBorder { void operator() (const int& i) const { const int j = _list(i); int m = 0; - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -1448,41 +1462,44 @@ struct AtomVecKokkos_PackBorder { _buf(i,m++) = d_ubuf(_type(j)).d; _buf(i,m++) = d_ubuf(_mask(j)).d; - if (_datamask & MOLECULE_MASK) - _buf(i,m++) = d_ubuf(_molecule(j)).d; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _buf(i,m++) = _q(j); + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; - if (_datamask & MU_MASK) { - _buf(i,m++) = _mu(j,0); - _buf(i,m++) = _mu(j,1); - _buf(i,m++) = _mu(j,2); - _buf(i,m++) = _mu(j,3); - } + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); - if (_datamask & SP_MASK) { - _buf(i,m++) = _sp(j,0); - _buf(i,m++) = _sp(j,1); - _buf(i,m++) = _sp(j,2); - _buf(i,m++) = _sp(j,3); - } + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } - if (_datamask & RADIUS_MASK) - _buf(i,m++) = _radius(j); + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } - if (_datamask & RMASS_MASK) - _buf(i,m++) = _rmass(j); + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); - // DPD-REACT package + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); - if (_datamask & DPDTHETA_MASK) { - _buf(i,m++) = _dpdTheta(j); - _buf(i,m++) = _uCond(j); - _buf(i,m++) = _uMech(j); - _buf(i,m++) = _uChem(j); - _buf(i,m++) = _uCG(j); - _buf(i,m++) = _uCGnew(j); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); + _buf(i,m++) = _uCG(j); + _buf(i,m++) = _uCGnew(j); + } } } }; @@ -1508,29 +1525,56 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, dz = pbc[2]; } if (space == HostKK) { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } - } else { dx = dy = dz = 0; if (space == HostKK) { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_host(), k_sendlist.view_host(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - AtomVecKokkos_PackBorder f( - atomKK,buf.view_device(), k_sendlist.view_device(), - dx,dy,dz,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } } } return n*size_border; @@ -1538,7 +1582,7 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackBorder { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1591,41 +1635,44 @@ struct AtomVecKokkos_UnpackBorder { _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,m++); + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); - // DPD-REACT package + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); - _uCG(i+_first) = _buf(i,m++); - _uCGnew(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } } } }; @@ -1640,13 +1687,25 @@ void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, atomKK->sync(space,datamask_border); if (space == HostKK) { - struct AtomVecKokkos_UnpackBorder - f(atomKK,buf.view_host(),first,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_UnpackBorder - f(atomKK,buf.view_device(),first,datamask_border); - Kokkos::parallel_for(n,f); + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } } atomKK->modified(space,datamask_border); @@ -1716,7 +1775,7 @@ struct AtomVecKokkos_PackBorderVel { void operator() (const int& i) const { int m = 0; const int j = _list(i); - if (PBC_FLAG == 0) { + if constexpr (PBC_FLAG == 0) { _buf(i,m++) = _x(j,0); _buf(i,m++) = _x(j,1); _buf(i,m++) = _x(j,2); @@ -1729,7 +1788,7 @@ struct AtomVecKokkos_PackBorderVel { _buf(i,m++) = d_ubuf(_type(j)).d; _buf(i,m++) = d_ubuf(_mask(j)).d; - if (DEFORM_VREMAP) { + if constexpr (DEFORM_VREMAP) { if (_mask(i) & _deform_groupbit) { _buf(i,m++) = _v(j,0) + _dvx; _buf(i,m++) = _v(j,1) + _dvy; @@ -1867,7 +1926,7 @@ int AtomVecKokkos::pack_border_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackBorderVel { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -1932,47 +1991,50 @@ struct AtomVecKokkos_UnpackBorderVel { _v(i+_first,1) = _buf(i,m++); _v(i+_first,2) = _buf(i,m++); - if (_datamask & MOLECULE_MASK) - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + if constexpr (!DEFAULT) { - if (_datamask & Q_MASK) - _q(i+_first) = _buf(i,m++); + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; - if (_datamask & MU_MASK) { - _mu(i+_first,0) = _buf(i,m++); - _mu(i+_first,1) = _buf(i,m++); - _mu(i+_first,2) = _buf(i,m++); - _mu(i+_first,3) = _buf(i,m++); - } + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); - if (_datamask & SP_MASK) { - _sp(i+_first,0) = _buf(i,m++); - _sp(i+_first,1) = _buf(i,m++); - _sp(i+_first,2) = _buf(i,m++); - _sp(i+_first,3) = _buf(i,m++); - } + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i+_first) = _buf(i,m++); + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } - if (_datamask & RMASS_MASK) - _rmass(i+_first) = _buf(i,m++); + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); - if (_datamask & OMEGA_MASK) { - _omega(i+_first,0) = _buf(i,m++); - _omega(i+_first,1) = _buf(i,m++); - _omega(i+_first,2) = _buf(i,m++); - } + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); - // DPD-REACT package + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i+_first) = _buf(i,m++); - _uCond(i+_first) = _buf(i,m++); - _uMech(i+_first) = _buf(i,m++); - _uChem(i+_first) = _buf(i,m++); - _uCG(i+_first) = _buf(i,m++); - _uCGnew(i+_first) = _buf(i,m++); + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } } } }; @@ -1988,17 +2050,33 @@ void AtomVecKokkos::unpack_border_vel_kokkos( atomKK->sync(space,datamask_border_vel); if (space == HostKK) { - struct AtomVecKokkos_UnpackBorderVel f( - atomKK, - buf.view_host(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_UnpackBorderVel f( - atomKK, - buf.view_device(), - first,datamask_border_vel); - Kokkos::parallel_for(n,f); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } } atomKK->modified(space,datamask_border_vel); @@ -2006,7 +2084,7 @@ void AtomVecKokkos::unpack_border_vel_kokkos( /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -2121,95 +2199,98 @@ struct AtomVecKokkos_PackExchangeFunctor { _buf(mysend,m++) = d_ubuf(_mask(i)).d; _buf(mysend,m++) = d_ubuf(_image(i)).d; - if (_datamask & Q_MASK) - _buf(mysend,m++) = _q(i); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + if (_datamask & Q_MASK) + _buf(mysend,m++) = _q(i); + + if (_datamask & MOLECULE_MASK) + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - if (_datamask & BOND_MASK) { - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (int k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + if (_datamask & BOND_MASK) { + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (int k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } } - } - if (_datamask & ANGLE_MASK) { - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (int k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + if (_datamask & ANGLE_MASK) { + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (int k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } } - } - if (_datamask & DIHEDRAL_MASK) { - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (int k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + if (_datamask & DIHEDRAL_MASK) { + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (int k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } } - } - if (_datamask & IMPROPER_MASK) { - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (int k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + if (_datamask & IMPROPER_MASK) { + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (int k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } } - } - if (_datamask & SPECIAL_MASK) { - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (int k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - } + if (_datamask & SPECIAL_MASK) { + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (int k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + } - if (_datamask & MU_MASK) { - _buf(mysend,m++) = _mu(i,0); - _buf(mysend,m++) = _mu(i,1); - _buf(mysend,m++) = _mu(i,2); - _buf(mysend,m++) = _mu(i,3); - } + if (_datamask & MU_MASK) { + _buf(mysend,m++) = _mu(i,0); + _buf(mysend,m++) = _mu(i,1); + _buf(mysend,m++) = _mu(i,2); + _buf(mysend,m++) = _mu(i,3); + } - if (_datamask & SP_MASK) { - _buf(mysend,m++) = _sp(i,0); - _buf(mysend,m++) = _sp(i,1); - _buf(mysend,m++) = _sp(i,2); - _buf(mysend,m++) = _sp(i,3); - } + if (_datamask & SP_MASK) { + _buf(mysend,m++) = _sp(i,0); + _buf(mysend,m++) = _sp(i,1); + _buf(mysend,m++) = _sp(i,2); + _buf(mysend,m++) = _sp(i,3); + } - if (_datamask & RADIUS_MASK) - _buf(mysend,m++) = _radius(i); + if (_datamask & RADIUS_MASK) + _buf(mysend,m++) = _radius(i); - if (_datamask & RMASS_MASK) - _buf(mysend,m++) = _rmass(i); + if (_datamask & RMASS_MASK) + _buf(mysend,m++) = _rmass(i); - if (_datamask & OMEGA_MASK) { - _buf(mysend,m++) = _omega(i,0); - _buf(mysend,m++) = _omega(i,1); - _buf(mysend,m++) = _omega(i,2); - } + if (_datamask & OMEGA_MASK) { + _buf(mysend,m++) = _omega(i,0); + _buf(mysend,m++) = _omega(i,1); + _buf(mysend,m++) = _omega(i,2); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _buf(mysend,m++) = _dpdTheta(i); - _buf(mysend,m++) = _uCond(i); - _buf(mysend,m++) = _uMech(i); - _buf(mysend,m++) = _uChem(i); - _buf(mysend,m++) = _uCG(i); - _buf(mysend,m++) = _uCGnew(i); + if (_datamask & DPDTHETA_MASK) { + _buf(mysend,m++) = _dpdTheta(i); + _buf(mysend,m++) = _uCond(i); + _buf(mysend,m++) = _uMech(i); + _buf(mysend,m++) = _uChem(i); + _buf(mysend,m++) = _uCG(i); + _buf(mysend,m++) = _uCGnew(i); + } } const int j = _copylist(mysend); @@ -2226,95 +2307,98 @@ struct AtomVecKokkos_PackExchangeFunctor { _mask(i) = _mask(j); _image(i) = _image(j); - if (_datamask & Q_MASK) - _q(i) = _q(j); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _molecule(i) = _molecule(j); + if (_datamask & Q_MASK) + _q(i) = _q(j); - if (_datamask & BOND_MASK) { - _num_bond(i) = _num_bond(j); - for (int k = 0; k < _num_bond(j); k++) { - _bond_type(i,k) = _bond_type(j,k); - _bond_atom(i,k) = _bond_atom(j,k); + if (_datamask & MOLECULE_MASK) + _molecule(i) = _molecule(j); + + if (_datamask & BOND_MASK) { + _num_bond(i) = _num_bond(j); + for (int k = 0; k < _num_bond(j); k++) { + _bond_type(i,k) = _bond_type(j,k); + _bond_atom(i,k) = _bond_atom(j,k); + } } - } - if (_datamask & ANGLE_MASK) { - _num_angle(i) = _num_angle(j); - for (int k = 0; k < _num_angle(j); k++) { - _angle_type(i,k) = _angle_type(j,k); - _angle_atom1(i,k) = _angle_atom1(j,k); - _angle_atom2(i,k) = _angle_atom2(j,k); - _angle_atom3(i,k) = _angle_atom3(j,k); + if (_datamask & ANGLE_MASK) { + _num_angle(i) = _num_angle(j); + for (int k = 0; k < _num_angle(j); k++) { + _angle_type(i,k) = _angle_type(j,k); + _angle_atom1(i,k) = _angle_atom1(j,k); + _angle_atom2(i,k) = _angle_atom2(j,k); + _angle_atom3(i,k) = _angle_atom3(j,k); + } } - } - if (_datamask & DIHEDRAL_MASK) { - _num_dihedral(i) = _num_dihedral(j); - for (int k = 0; k < _num_dihedral(j); k++) { - _dihedral_type(i,k) = _dihedral_type(j,k); - _dihedral_atom1(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4(i,k) = _dihedral_atom4(j,k); + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = _num_dihedral(j); + for (int k = 0; k < _num_dihedral(j); k++) { + _dihedral_type(i,k) = _dihedral_type(j,k); + _dihedral_atom1(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4(i,k) = _dihedral_atom4(j,k); + } } - } - if (_datamask & IMPROPER_MASK) { - _num_improper(i) = _num_improper(j); - for (int k = 0; k < _num_improper(j); k++) { - _improper_type(i,k) = _improper_type(j,k); - _improper_atom1(i,k) = _improper_atom1(j,k); - _improper_atom2(i,k) = _improper_atom2(j,k); - _improper_atom3(i,k) = _improper_atom3(j,k); - _improper_atom4(i,k) = _improper_atom4(j,k); + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = _num_improper(j); + for (int k = 0; k < _num_improper(j); k++) { + _improper_type(i,k) = _improper_type(j,k); + _improper_atom1(i,k) = _improper_atom1(j,k); + _improper_atom2(i,k) = _improper_atom2(j,k); + _improper_atom3(i,k) = _improper_atom3(j,k); + _improper_atom4(i,k) = _improper_atom4(j,k); + } } - } - if (_datamask & SPECIAL_MASK) { - _nspecial(i,0) = _nspecial(j,0); - _nspecial(i,1) = _nspecial(j,1); - _nspecial(i,2) = _nspecial(j,2); - for (int k = 0; k < _nspecial(j,2); k++) - _special(i,k) = _special(j,k); - } + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = _nspecial(j,0); + _nspecial(i,1) = _nspecial(j,1); + _nspecial(i,2) = _nspecial(j,2); + for (int k = 0; k < _nspecial(j,2); k++) + _special(i,k) = _special(j,k); + } - if (_datamask & MU_MASK) { - _mu(i,0) = _mu(j,0); - _mu(i,1) = _mu(j,1); - _mu(i,2) = _mu(j,2); - _mu(i,3) = _mu(j,3); - } + if (_datamask & MU_MASK) { + _mu(i,0) = _mu(j,0); + _mu(i,1) = _mu(j,1); + _mu(i,2) = _mu(j,2); + _mu(i,3) = _mu(j,3); + } - if (_datamask & SP_MASK) { - _sp(i,0) = _sp(j,0); - _sp(i,1) = _sp(j,1); - _sp(i,2) = _sp(j,2); - _sp(i,3) = _sp(j,3); - } + if (_datamask & SP_MASK) { + _sp(i,0) = _sp(j,0); + _sp(i,1) = _sp(j,1); + _sp(i,2) = _sp(j,2); + _sp(i,3) = _sp(j,3); + } - if (_datamask & RADIUS_MASK) - _radius(i) = _radius(j); + if (_datamask & RADIUS_MASK) + _radius(i) = _radius(j); - if (_datamask & RMASS_MASK) - _rmass(i) = _rmass(j); + if (_datamask & RMASS_MASK) + _rmass(i) = _rmass(j); - if (_datamask & OMEGA_MASK) { - _omega(i,0) = _omega(j,0); - _omega(i,1) = _omega(j,1); - _omega(i,2) = _omega(j,2); - } + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _omega(j,0); + _omega(i,1) = _omega(j,1); + _omega(i,2) = _omega(j,2); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i) = _dpdTheta(j); - _uCond(i) = _uCond(j); - _uMech(i) = _uMech(j); - _uChem(i) = _uChem(j); - _uCG(i) = _uCG(j); - _uCGnew(i) = _uCGnew(j); + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _dpdTheta(j); + _uCond(i) = _uCond(j); + _uMech(i) = _uMech(j); + _uChem(i) = _uChem(j); + _uCG(i) = _uCG(j); + _uCGnew(i) = _uCGnew(j); + } } } } @@ -2337,22 +2421,33 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr } if (space == HostKK) { - AtomVecKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } } else { - AtomVecKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); - Kokkos::parallel_for(nsend,f); - - return nsend*size_exchange; + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } } + + return nsend*size_exchange; } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackExchangeFunctor { typedef DeviceType device_type; typedef ArrayTypes AT; @@ -2471,99 +2566,102 @@ struct AtomVecKokkos_UnpackExchangeFunctor { _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - if (_datamask & Q_MASK) - _q(i) = _buf(myrecv,m++); + if constexpr (!DEFAULT) { - if (_datamask & MOLECULE_MASK) - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & Q_MASK) + _q(i) = _buf(myrecv,m++); - if (_datamask & BOND_MASK) { - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & MOLECULE_MASK) + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & BOND_MASK) { + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & ANGLE_MASK) { - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & ANGLE_MASK) { + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & DIHEDRAL_MASK) { - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & IMPROPER_MASK) { - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } } - } - if (_datamask & SPECIAL_MASK) { - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (int k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } - if (_datamask & MU_MASK) { - _mu(i,0) = _buf(myrecv,m++); - _mu(i,1) = _buf(myrecv,m++); - _mu(i,2) = _buf(myrecv,m++); - _mu(i,3) = _buf(myrecv,m++); - } + if (_datamask & MU_MASK) { + _mu(i,0) = _buf(myrecv,m++); + _mu(i,1) = _buf(myrecv,m++); + _mu(i,2) = _buf(myrecv,m++); + _mu(i,3) = _buf(myrecv,m++); + } - if (_datamask & SP_MASK) { - _sp(i,0) = _buf(myrecv,m++); - _sp(i,1) = _buf(myrecv,m++); - _sp(i,2) = _buf(myrecv,m++); - _sp(i,3) = _buf(myrecv,m++); - } + if (_datamask & SP_MASK) { + _sp(i,0) = _buf(myrecv,m++); + _sp(i,1) = _buf(myrecv,m++); + _sp(i,2) = _buf(myrecv,m++); + _sp(i,3) = _buf(myrecv,m++); + } - if (_datamask & RADIUS_MASK) - _radius(i) = _buf(myrecv,m++); + if (_datamask & RADIUS_MASK) + _radius(i) = _buf(myrecv,m++); - if (_datamask & RMASS_MASK) - _rmass(i) = _buf(myrecv,m++); + if (_datamask & RMASS_MASK) + _rmass(i) = _buf(myrecv,m++); - if (_datamask & OMEGA_MASK) { - _omega(i,0) = _buf(myrecv,m++); - _omega(i,1) = _buf(myrecv,m++); - _omega(i,2) = _buf(myrecv,m++); - } + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _buf(myrecv,m++); + _omega(i,1) = _buf(myrecv,m++); + _omega(i,2) = _buf(myrecv,m++); + } - // DPD-REACT package + // DPD-REACT package - if (_datamask & DPDTHETA_MASK) { - _dpdTheta(i) = _buf(myrecv,m++); - _uCond(i) = _buf(myrecv,m++); - _uMech(i) = _buf(myrecv,m++); - _uChem(i) = _buf(myrecv,m++); - _uCG(i) = _buf(myrecv,m++); - _uCGnew(i) = _buf(myrecv,m++); + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _buf(myrecv,m++); + _uCond(i) = _buf(myrecv,m++); + _uMech(i) = _buf(myrecv,m++); + _uChem(i) = _buf(myrecv,m++); + _uCG(i) = _buf(myrecv,m++); + _uCGnew(i) = _buf(myrecv,m++); + } } } - if (OUTPUT_INDICES) + if constexpr (OUTPUT_INDICES) _indices(myrecv) = i; } }; @@ -2578,37 +2676,58 @@ int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nr atomKK->sync(space,datamask_exchange); if (space == HostKK) { + k_count.view_host()(0) = nlocal; + if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.view_host()(0) = nlocal; - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } } else { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } } + + k_count.modify_device(); + k_count.sync_host(); } atomKK->modified(space,datamask_exchange); From 9e77bf3f37527c0d3ed1c44b7a80a6e3c27383ec Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Nov 2025 15:37:13 -0500 Subject: [PATCH 221/604] consistent capitalization --- doc/src/Build_extras.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index ed7b00b0043..546d4b0323c 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -243,7 +243,7 @@ necessary for ``hipcc`` and the linker to work correctly. When compiling for HIP ROCm, GPU sorting with ``-D HIP_USE_DEVICE_SORT=on`` requires installing the ``hipcub`` library (https://github.com/ROCmSoftwarePlatform/hipCUB). The HIP CUDA-backend -additionally requires cub (https://nvidia.github.io/cccl/cub/). Setting +additionally requires CUB (https://nvidia.github.io/cccl/cub/). Setting ``-DDOWNLOAD_CUB=yes`` will download and compile CUB. The GPU library has some multi-thread support using OpenMP. If LAMMPS From 0735409f9a38e063053b17ebc56438048f0e015b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 15:24:47 -0700 Subject: [PATCH 222/604] Add randomread to read-only views --- src/KOKKOS/atom_vec_kokkos.cpp | 74 ++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index b9118cd0561..21721979cb8 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -58,10 +58,10 @@ struct AtomVecKokkos_PackComm { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -763,13 +763,13 @@ struct AtomVecKokkos_PackCommVel { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_int_1d_randomread _mask; + typename AT::t_kkfloat_1d_3_randomread _v; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_3_randomread _omega; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; @@ -1138,8 +1138,8 @@ struct AtomVecKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; - typename AT::t_kkacc_1d_3 _torque; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; uint64_t _datamask; @@ -1406,16 +1406,16 @@ struct AtomVecKokkos_PackBorder { typename AT::t_double_2d_lr _buf; const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_kkfloat_1d_4 _mu; - const typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_tagint_1d_randomread _tag; + const typename AT::t_int_1d_randomread _type; + const typename AT::t_int_1d_randomread _mask; + const typename AT::t_tagint_1d_randomread _molecule; + const typename AT::t_kkfloat_1d_randomread _q; + const typename AT::t_kkfloat_1d_4_randomread _mu; + const typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _radius,_rmass; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz; uint64_t _datamask; @@ -1512,6 +1512,8 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, { atomKK->sync(space,datamask_border); + printf("NBORDER %i\n",nborder); + double dx,dy,dz; if (pbc_flag != 0) { @@ -1720,18 +1722,18 @@ struct AtomVecKokkos_PackBorderVel { typename AT::t_double_2d_lr_um _buf; const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_kkfloat_1d_4 _mu; - const typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_randomread _v; + const typename AT::t_tagint_1d_randomread _tag; + const typename AT::t_int_1d_randomread _type; + const typename AT::t_int_1d_randomread _mask; + const typename AT::t_tagint_1d_randomread _molecule; + const typename AT::t_kkfloat_1d_randomread _q; + const typename AT::t_kkfloat_1d_4_randomread _mu; + const typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _radius,_rmass; + typename AT::t_kkfloat_1d_3_randomread _omega; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; double _dx,_dy,_dz, _dvx, _dvy, _dvz; const int _deform_groupbit; const uint64_t _datamask; @@ -2414,6 +2416,8 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr atomKK->sync(space,datamask_exchange); set_size_exchange(); + printf("SIZE_EXCHANGE %i\n",size_exchange); + if (nsend > (int) (k_buf.view_host().extent(0)* k_buf.view_host().extent(1))/size_exchange) { int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; From bd7c304454bc760e2693312cefae9047e53b03e0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Nov 2025 15:45:19 -0700 Subject: [PATCH 223/604] Remove printf --- src/KOKKOS/atom_vec_kokkos.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 21721979cb8..5610c4cc65b 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -1512,8 +1512,6 @@ int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, { atomKK->sync(space,datamask_border); - printf("NBORDER %i\n",nborder); - double dx,dy,dz; if (pbc_flag != 0) { @@ -2416,8 +2414,6 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr atomKK->sync(space,datamask_exchange); set_size_exchange(); - printf("SIZE_EXCHANGE %i\n",size_exchange); - if (nsend > (int) (k_buf.view_host().extent(0)* k_buf.view_host().extent(1))/size_exchange) { int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; From a4310ccb7254380f0e7329a06b3357f58274ce9a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Nov 2025 20:20:38 -0500 Subject: [PATCH 224/604] make FFT testers compatible with single precision FFTs --- unittest/utils/fft_test_helpers.h | 8 +++---- unittest/utils/test_fft3d.cpp | 31 +++++++++++++++++----------- unittest/utils/test_fft3d_kokkos.cpp | 10 +++++++-- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/unittest/utils/fft_test_helpers.h b/unittest/utils/fft_test_helpers.h index 204c117b54b..b6ff54c403d 100644 --- a/unittest/utils/fft_test_helpers.h +++ b/unittest/utils/fft_test_helpers.h @@ -537,14 +537,14 @@ class RoundTripValidator : public Validator { error_stats_.reset(); int n_points = nfast_ * nmid_ * nslow_; - double max_original = 0.0; + FFT_SCALAR max_original = 0.0; for (int i = 0; i < n_points; i++) { auto orig_val = get_complex_linear(original_data_, i); max_original = std::max(max_original, std::abs(orig_val)); } // Check if data is essentially zero (avoid division by zero) - bool is_zero_data = (max_original < 1e-14); + bool is_zero_data = (max_original < 1e-12); for (int i = 0; i < n_points; i++) { auto orig = get_complex_linear(original_data_, i); @@ -602,13 +602,13 @@ class KnownAnswerValidator : public Validator { int n_points = nfast_ * nmid_ * nslow_; // Find maximum expected value for relative error calculation - double max_expected = 0.0; + FFT_SCALAR max_expected = 0.0; for (int i = 0; i < n_points; i++) { auto exp_val = get_complex_linear(expected_fft_, i); max_expected = std::max(max_expected, std::abs(exp_val)); } - bool is_zero_expected = (max_expected < 1e-14); + bool is_zero_expected = (max_expected < 1e-12); for (int i = 0; i < n_points; i++) { auto computed = get_complex_linear(computed_fft_, i); diff --git a/unittest/utils/test_fft3d.cpp b/unittest/utils/test_fft3d.cpp index 3b3fb40a6f6..0485c1277c5 100644 --- a/unittest/utils/test_fft3d.cpp +++ b/unittest/utils/test_fft3d.cpp @@ -54,6 +54,12 @@ using namespace FFTValidation; // whether to print verbose output (i.e. not capturing LAMMPS screen output). bool verbose = false; +#ifdef FFT_SINGLE +static constexpr double TOLERANCE = 1.0e-7; +#else +static constexpr double TOLERANCE = 1.0e-10; +#endif + class FFT3DTest : public LAMMPSTest { protected: void SetUp() override @@ -351,7 +357,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) // Validate against expected result FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, 1e-10, verbose); + nslow, TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -360,7 +366,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: 1e-10" << std::endl; + std::cout << " Tolerance: " << TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show sample values @@ -372,7 +378,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) } EXPECT_TRUE(passed) << "Delta function known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), 1e-10); + EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); } // ============================================================================ @@ -410,7 +416,7 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) // Validate against expected result FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, 1e-10, verbose); + nslow, TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -420,7 +426,7 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: 1e-10" << std::endl; + std::cout << " Tolerance: " << TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show DC component @@ -439,7 +445,7 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) } EXPECT_TRUE(passed) << "Constant field known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), 1e-10); + EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); } // ============================================================================ @@ -487,8 +493,9 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) std::complex(0.0, spike_amplitude)); // Validate against expected result + FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, 1e-10, verbose); + nslow, TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -499,7 +506,7 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: 1e-10" << std::endl; + std::cout << " Tolerance: " << TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show the spike values @@ -521,7 +528,7 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) } EXPECT_TRUE(passed) << "Sine wave known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), 1e-10); + EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); } @@ -1002,13 +1009,13 @@ TEST_F(FFT3DTest, MKL_Optimized) } FFTValidation::KnownAnswerValidator validator(delta_output.data(), expected_data.data(), - nfast, nmid, nslow, 1e-10, verbose); + nfast, nmid, nslow, TOLERANCE, verbose); bool passed = validator.validate(); EXPECT_TRUE(passed) << "MKL delta function validation failed" << "\n Max error: " << validator.get_error_stats().max() - << "\n Tolerance: " << 1e-10; - EXPECT_LT(validator.get_error_stats().max(), 1e-10); + << "\n Tolerance: " << TOLERANCE; + EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); } // ============================================================================ diff --git a/unittest/utils/test_fft3d_kokkos.cpp b/unittest/utils/test_fft3d_kokkos.cpp index aaf9b34bf30..54676aec960 100644 --- a/unittest/utils/test_fft3d_kokkos.cpp +++ b/unittest/utils/test_fft3d_kokkos.cpp @@ -59,6 +59,12 @@ using namespace FFTValidation; // Verbose output control bool verbose = false; +#ifdef FFT_SINGLE +static constexpr double TOLERANCE = 1.0e-7; +#else +static constexpr double TOLERANCE = 1.0e-10; +#endif + // Helper function to check if KOKKOS is using a GPU backend static bool is_kokkos_gpu_backend() { @@ -268,7 +274,7 @@ class FFT3DKokkosTest : public LAMMPSTest { } KnownAnswerValidator validator(output_vec.data(), expected_vec.data(), nfast, nmid, nslow, - 1e-10, verbose); + TOLERANCE, verbose); bool valid = validator.validate(); if (verbose || !valid) { @@ -508,7 +514,7 @@ TEST_F(FFT3DKokkosTest, KnownAnswer_Kokkos_Sine) std::complex(0.0, spike_amplitude)); KnownAnswerValidator validator(output_vec.data(), expected_data.data(), nfast, nmid, nslow, - 1e-10, verbose); + TOLERANCE, verbose); bool valid = validator.validate(); if (verbose || !valid) { From 043bb759d795db9392957c073ec468e842bef012 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Nov 2025 22:47:06 -0500 Subject: [PATCH 225/604] change logic since we may have both atom->mass and atom->rmass --- src/QTB/fix_qtb.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QTB/fix_qtb.cpp b/src/QTB/fix_qtb.cpp index 14544e72cb0..cf8dc5b0587 100644 --- a/src/QTB/fix_qtb.cpp +++ b/src/QTB/fix_qtb.cpp @@ -174,7 +174,7 @@ void FixQTB::init() "with alpha value as {}!\n", 0.5/h_timestep, alpha); // set force prefactors - if (!atom->rmass) { + if (atom->mass) { for (int i = 1; i <= atom->ntypes; i++) { //gfactor1 is the friction force \gamma{}m_{i}\frac{dv}{dt} gfactor1[i] = (atom->mass[i]*fric_coef) / force->ftm2v; From 1cd362e4c5afd14582cd2a1d3867d17c92a2c850 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Nov 2025 22:54:03 -0500 Subject: [PATCH 226/604] fix per-atom mass indexing bug --- src/PHONON/dynamical_matrix.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/PHONON/dynamical_matrix.cpp b/src/PHONON/dynamical_matrix.cpp index 203f1de77a6..f22ca0d8b7a 100644 --- a/src/PHONON/dynamical_matrix.cpp +++ b/src/PHONON/dynamical_matrix.cpp @@ -265,8 +265,8 @@ void DynamicalMatrix::calculateMatrix() bigint natoms = atom->natoms; int *type = atom->type; bigint *gm = groupmap; - double imass; // dynamical matrix element double *m = atom->mass; + if (atom->rmass) m = atom->rmass; double **f = atom->f; auto *dynmat = new double*[3]; @@ -320,10 +320,8 @@ void DynamicalMatrix::calculateMatrix() local_jdx = atom->map(j); if (local_idx >= 0 && local_jdx >= 0 && local_jdx < nlocal && (gm[j-1] >= 0 || folded)){ - if (atom->rmass_flag == 1) - imass = sqrt(m[local_idx] * m[local_jdx]); - else - imass = sqrt(m[type[local_idx]] * m[type[local_jdx]]); + const double imass = atom->rmass ? sqrt(m[local_idx] * m[local_jdx]) : + sqrt(m[type[local_idx]] * m[type[local_jdx]]); if (folded){ for (int beta=0; beta<3; beta++){ dynmat[alpha][(j-1)*3+beta] -= -f[local_jdx][beta]; From 031de4eeb2373f86903ae25573caf662a3f54f77 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Nov 2025 22:54:17 -0500 Subject: [PATCH 227/604] simplify --- src/RHEO/fix_rheo_pressure.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/RHEO/fix_rheo_pressure.cpp b/src/RHEO/fix_rheo_pressure.cpp index 6ea38b49d34..f91f2a43924 100644 --- a/src/RHEO/fix_rheo_pressure.cpp +++ b/src/RHEO/fix_rheo_pressure.cpp @@ -246,11 +246,7 @@ double FixRHEOPressure::calc_pressure(double rho, int i) rho_ratio = rho * rho0inv[type]; p = csq[type] * rho0[type] * (pow(rho_ratio, tpower[type]) - 1.0) / tpower[type]; } else if (pressure_style[type] == IDEAL) { - double imass; - if (atom->rmass) - imass = atom->rmass[i]; - else - imass = atom->mass[type]; + const double imass = atom->rmass ? atom->rmass[i] : atom->mass[type]; p = (gamma[type] - 1.0) * rho * atom->esph[i] / imass; } @@ -286,11 +282,7 @@ double FixRHEOPressure::calc_rho(double p, int i) rho *= pow(rho0[type], 1.0 - 1.0 / tpower[type]); rho *= pow(csq[type], -1.0 / tpower[type]); } else if (pressure_style[type] == IDEAL) { - double imass; - if (atom->rmass) - imass = atom->rmass[i]; - else - imass = atom->mass[type]; + const double imass = atom->rmass ? atom->rmass[i] : atom->mass[type]; rho = p * imass / ((gamma[type] - 1.0) * atom->esph[i]); } return rho; From 90861455867044fb05933bf39049b7ae59221c15 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 11 Nov 2025 00:28:59 -0500 Subject: [PATCH 228/604] make fix/compute ids more helpful --- doc/src/compute_reduce.rst | 10 +++++----- doc/src/fix_bond_react.rst | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 24a476dc7c2..6ee54ae3d15 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -187,9 +187,9 @@ with maximum stretch, you can do it as follows: .. code-block:: LAMMPS - compute 1 all property/local batom1 batom2 - compute 2 all bond/local dist - compute 3 all reduce max c_1[1] c_1[2] c_2 replace 1 3 replace 2 3 inputs local + compute batoms all property/local batom1 batom2 + compute blength all bond/local dist + compute 3 all reduce max c_batoms[1] c_batoms[2] c_blength replace 1 3 replace 2 3 inputs local thermo_style custom step temp c_3[1] c_3[2] c_3[3] The first two input values in the compute reduce command are vectors @@ -253,8 +253,8 @@ the default 'c_2[1]'. If the *replace* keyword is used, *vec1* of the *replace* keyword is listed after the colon, followed by '<-', followed by the reduction operation, followed by *vec2* of the *replace* keyword in parentheses. E.g., for the second in-text example above, the first printed -thermo column name would be 'c_3:c_1[1]<-max(c_2)' rather than the default -'c_3[1]'. +thermo column name would be 'c_3:c_batoms[1]<-max(c_blength)' rather than +the default 'c_3[1]'. All the scalar or vector values calculated by this compute are "intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on diff --git a/doc/src/fix_bond_react.rst b/doc/src/fix_bond_react.rst index 00d83f617ed..0e78c927fc4 100644 --- a/doc/src/fix_bond_react.rst +++ b/doc/src/fix_bond_react.rst @@ -87,7 +87,7 @@ For unabridged example scripts and files, see examples/PACKAGES/reaction. molecule mol1 pre_reacted_topology.txt molecule mol2 post_reacted_topology.txt - fix 5 all bond/react react myrxn1 all 1 0 3.25 mol1 mol2 map_file.txt + fix rxns all bond/react react diels_alder all 1 0 3.25 mol1 mol2 map_file.txt molecule mol1 pre_reacted_rxn1.txt molecule mol2 post_reacted_rxn1.txt @@ -792,8 +792,8 @@ This fix supports automatically generated thermo column names when using :doc:`thermo_modify colname auto `. The thermo column names are 'f_', followed by the fix ID, followed by a colon, followed by the react-ID. E.g., the first example in the Examples section above would -print a thermo column name of 'f_5:myrxn1', compared to the default column -output name of 'f_5[1]'. +print a thermo column name of 'f_rxns:diels_alder', compared to the default column +output name of 'f_rxns[1]'. No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. From 389d07719055600e4014f3d17c7b7c1c15b1c672 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 10:50:04 -0500 Subject: [PATCH 229/604] bboxflag only needs to be set once in the constructor --- src/region_block.cpp | 3 +-- src/region_cone.cpp | 3 +-- src/region_cylinder.cpp | 3 +-- src/region_prism.cpp | 3 +-- src/region_sphere.cpp | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/region_block.cpp b/src/region_block.cpp index 0c3ed5dbf5b..56651bd8d44 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -158,10 +158,10 @@ RegBlock::RegBlock(LAMMPS *lmp, int narg, char **arg) : // set extent of block if (interior) { + bboxflag = 1; if (dynamic || varshape) { RegBlock::bbox_update(); } else { - bboxflag = 1; extent_xlo = xlo; extent_xhi = xhi; extent_ylo = ylo; @@ -548,7 +548,6 @@ void RegBlock::bbox_update() zmax = std::max(zmax, pos[2]); } - bboxflag = 1; extent_xlo = xmin; extent_xhi = xmax; extent_ylo = ymin; diff --git a/src/region_cone.cpp b/src/region_cone.cpp index 20df0d6f504..56d7330cef9 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -269,10 +269,10 @@ RegCone::RegCone(LAMMPS *lmp, int narg, char **arg) : maxradius = std::max(radiuslo, radiushi); if (interior) { + bboxflag = 1; if (dynamic || varshape) { RegCone::bbox_update(); } else { - bboxflag = 1; if (axis == 'x') { extent_xlo = lo; extent_xhi = hi; @@ -900,7 +900,6 @@ void RegCone::bbox_update() zmax = std::max(zmax, pos[2]); } - bboxflag = 1; extent_xlo = xmin; extent_xhi = xmax; extent_ylo = ymin; diff --git a/src/region_cylinder.cpp b/src/region_cylinder.cpp index f3e3bb118e9..37b6a30444d 100644 --- a/src/region_cylinder.cpp +++ b/src/region_cylinder.cpp @@ -204,10 +204,10 @@ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : // for variable radius, uses initial radius if (interior) { + bboxflag = 1; if (dynamic || varshape) { RegCylinder::bbox_update(); } else { - bboxflag = 1; if (axis == 'x') { extent_xlo = lo; extent_xhi = hi; @@ -883,7 +883,6 @@ void RegCylinder::bbox_update() zmax = std::max(zmax, pos[2]); } - bboxflag = 1; extent_xlo = xmin; extent_xhi = xmax; extent_ylo = ymin; diff --git a/src/region_prism.cpp b/src/region_prism.cpp index ba6485d1e5b..17287635cba 100644 --- a/src/region_prism.cpp +++ b/src/region_prism.cpp @@ -195,10 +195,10 @@ RegPrism::RegPrism(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg), // extent of prism if (interior) { + bboxflag = 1; if (dynamic || varshape) { RegPrism::bbox_update(); } else { - bboxflag = 1; extent_xlo = MIN(xlo, xlo + xy); extent_xlo = MIN(extent_xlo, extent_xlo + xz); extent_ylo = MIN(ylo, ylo + yz); @@ -652,7 +652,6 @@ void RegPrism::bbox_update() zmax = std::max(zmax, pos[2]); } - bboxflag = 1; extent_xlo = xmin; extent_xhi = xmax; extent_ylo = ymin; diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index a1868307d72..9486ea31b4a 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -82,10 +82,10 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : // for variable radius, uses initial radius and origin for variable center if (interior) { + bboxflag = 1; if (dynamic || varshape) { RegSphere::bbox_update(); } else { - bboxflag = 1; extent_xlo = xc - radius; extent_xhi = xc + radius; extent_ylo = yc - radius; @@ -217,7 +217,6 @@ void RegSphere::shape_update() void RegSphere::bbox_update() { if (varshape || dynamic) { - bboxflag = 1; extent_xlo = xc - radius; extent_xhi = xc + radius; extent_ylo = yc - radius; From 8604770eb9dcca2008362ce6ef6ecad034a18b29 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 10:50:53 -0500 Subject: [PATCH 230/604] add bounding box update for ellipsoid region --- src/region_ellipsoid.cpp | 130 +++++++++++++++++++++++++++++++++------ src/region_ellipsoid.h | 1 + 2 files changed, 111 insertions(+), 20 deletions(-) diff --git a/src/region_ellipsoid.cpp b/src/region_ellipsoid.cpp index cfed7968546..c910ddee0d2 100644 --- a/src/region_ellipsoid.cpp +++ b/src/region_ellipsoid.cpp @@ -16,30 +16,33 @@ #include "domain.h" #include "error.h" #include "input.h" +#include "math_extra.h" #include "variable.h" +#include #include #include using namespace LAMMPS_NS; -static double GetRoot2D(double r0, double z0, double z1, double g); -static double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g); - -static double DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0, - double &x1); -static double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1, - double y2, double &x0, double &x1, double &x2); +namespace { +double GetRoot2D(double r0, double z0, double z1, double g); +double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g); +double DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0, double &x1); +double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1, double y2, + double &x0, double &x1, double &x2); -static constexpr int maxIterations = +constexpr int maxIterations = std::numeric_limits::digits - std::numeric_limits::min_exponent; -static constexpr double EPSILON = std::numeric_limits::epsilon() * 2.0; +constexpr double EPSILON = std::numeric_limits::epsilon() * 2.0; +constexpr double BIG = 1.0e200; +} // namespace /* ---------------------------------------------------------------------- */ RegEllipsoid::RegEllipsoid(LAMMPS *lmp, int narg, char **arg) : - Region(lmp, narg, arg), xvar(-1), yvar(-1), zvar(-1), avar(-1), bvar(-1), cvar(-1), - xstr(nullptr), ystr(nullptr), zstr(nullptr), astr(nullptr), bstr(nullptr), cstr(nullptr) + Region(lmp, narg, arg), xvar(-1), yvar(-1), zvar(-1), avar(-1), bvar(-1), cvar(-1), + xstr(nullptr), ystr(nullptr), zstr(nullptr), astr(nullptr), bstr(nullptr), cstr(nullptr) { options(narg - 8, &arg[8]); @@ -117,16 +120,19 @@ RegEllipsoid::RegEllipsoid(LAMMPS *lmp, int narg, char **arg) : // extent of ellipsoid // for variable axes, uses initial axes and origin for variable center - if (interior && !dynamic && !varshape) { + if (interior) { bboxflag = 1; - extent_xlo = xc - a; - extent_xhi = xc + a; - extent_ylo = yc - b; - extent_yhi = yc + b; - extent_zlo = zc - c; - extent_zhi = zc + c; - } else - bboxflag = 0; + if (dynamic || varshape) { + RegEllipsoid::bbox_update(); + } else { + extent_xlo = xc - a; + extent_xhi = xc + a; + extent_ylo = yc - b; + extent_yhi = yc + b; + extent_zlo = zc - c; + extent_zhi = zc + c; + } + } cmax = 1; contact = new Contact[cmax]; @@ -386,6 +392,88 @@ void RegEllipsoid::shape_update() } } +/* update the boundary information */ + +void RegEllipsoid::bbox_update() +{ + if (varshape || dynamic) { + double corners[2][4][3], pos[3]; + double xmin, xmax, ymin, ymax, zmin, zmax; + + // define bounding box corners in region internal positions + + xmin = xc - a; + xmax = xc + a; + ymin = yc - b; + ymax = yc + b; + zmin = zc - c; + zmax = zc + c; + + // face[0] + + corners[0][0][0] = xmin; + corners[0][0][1] = ymin; + corners[0][0][2] = zmin; + corners[0][1][0] = xmin; + corners[0][1][1] = ymin; + corners[0][1][2] = zmax; + corners[0][2][0] = xmin; + corners[0][2][1] = ymax; + corners[0][2][2] = zmax; + corners[0][3][0] = xmin; + corners[0][3][1] = ymax; + corners[0][3][2] = zmin; + + // face[1] + + corners[1][0][0] = xmax; + corners[1][0][1] = ymin; + corners[1][0][2] = zmin; + corners[1][1][0] = xmax; + corners[1][1][1] = ymin; + corners[1][1][2] = zmax; + corners[1][2][0] = xmax; + corners[1][2][1] = ymax; + corners[1][2][2] = zmax; + corners[1][3][0] = xmax; + corners[1][3][1] = ymax; + corners[1][3][2] = zmin; + + // the corners of face[0] and face[1] cover the full extent of the region + // transform and get min/max in x-, y-, and z-direction for each corner + + xmin = ymin = zmin = BIG; + xmax = ymax = zmax = -BIG; + + for (int i = 0; i < 4; ++i) { + MathExtra::copy3(corners[0][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + + MathExtra::copy3(corners[1][i], pos); + forward_transform(pos[0], pos[1], pos[2]); + xmin = std::min(xmin, pos[0]); + xmax = std::max(xmax, pos[0]); + ymin = std::min(ymin, pos[1]); + ymax = std::max(ymax, pos[1]); + zmin = std::min(zmin, pos[2]); + zmax = std::max(zmax, pos[2]); + } + + extent_xlo = xmin; + extent_xhi = xmax; + extent_ylo = ymin; + extent_yhi = ymax; + extent_zlo = zmin; + extent_zhi = zmax; + } +} + /* ---------------------------------------------------------------------- error check on existence of variable ------------------------------------------------------------------------- */ @@ -435,6 +523,7 @@ void RegEllipsoid::variable_check() } } +namespace { // ------------------------------------------------------------------ // David Eberly, Geometric Tools, Redmond WA 98052 // Copyright (c) 1998-2021 @@ -609,3 +698,4 @@ double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double } return distance; } +} // namespace diff --git a/src/region_ellipsoid.h b/src/region_ellipsoid.h index 16bebe4c27d..0ef410e6390 100644 --- a/src/region_ellipsoid.h +++ b/src/region_ellipsoid.h @@ -36,6 +36,7 @@ class RegEllipsoid : public Region { int surface_interior(double *, double) override; int surface_exterior(double *, double) override; void shape_update() override; + void bbox_update() override; private: double xc, yc, zc; From 3d930432b50b6354e3da9f8e0d97e6e99a874091 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 10:12:22 -0500 Subject: [PATCH 231/604] more error message improvements --- src/MC/fix_gcmc.cpp | 2 +- src/MC/fix_mol_swap.cpp | 3 +-- src/MC/fix_widom.cpp | 38 ++++++++++++++++++--------------- src/PHONON/dynamical_matrix.cpp | 15 +++++++------ src/REACTION/fix_bond_react.cpp | 11 ++++++---- src/region_deprecated.cpp | 2 +- 6 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index 835d3486a19..276b8d4310a 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -462,7 +462,7 @@ int FixGCMC::setmask() void FixGCMC::init() { - if (!atom->mass) error->all(FLERR, "Fix gcmc requires per atom type masses"); + if (!atom->mass) error->all(FLERR, Error::NOLASTLINE, "Fix gcmc requires per atom type masses"); if (atom->rmass_flag && (comm->me == 0)) error->warning(FLERR, "Fix gcmc will use per atom type masses for velocity initialization"); diff --git a/src/MC/fix_mol_swap.cpp b/src/MC/fix_mol_swap.cpp index b0c3b37c28d..e4b05a81c5a 100644 --- a/src/MC/fix_mol_swap.cpp +++ b/src/MC/fix_mol_swap.cpp @@ -87,8 +87,7 @@ FixMolSwap::FixMolSwap(LAMMPS *lmp, int narg, char **arg) : if (seed <= 0) error->all(FLERR,"Illegal fix mol/swap command"); if (temperature <= 0.0) error->all(FLERR,"Illegal fix mol/swap command"); if (ke_flag && atom->rmass) - error->all(FLERR,"Cannot conserve kinetic energy with fix mol/swap " - "unless per-type masses"); + error->all(FLERR,"Cannot conserve kinetic energy with fix mol/swap unless per-type masses"); beta = 1.0/(force->boltz*temperature); diff --git a/src/MC/fix_widom.cpp b/src/MC/fix_widom.cpp index 099e6437a77..1c22524e7bb 100644 --- a/src/MC/fix_widom.cpp +++ b/src/MC/fix_widom.cpp @@ -281,7 +281,7 @@ int FixWidom::setmask() void FixWidom::init() { - if (!atom->mass) error->all(FLERR, "Fix widom requires per atom type masses"); + if (!atom->mass) error->all(FLERR, Error::NOLASTLINE, "Fix widom requires per atom type masses"); if (atom->rmass_flag && (comm->me == 0)) error->warning(FLERR, "Fix widom will use per atom type masses for velocity initialization"); @@ -291,14 +291,15 @@ void FixWidom::init() if (idregion) { region = domain->get_region_by_id(idregion); - if (!region) error->all(FLERR, "Region {} for fix widom does not exist", idregion); + if (!region) + error->all(FLERR, Error::NOLASTLINE, "Region {} for fix widom does not exist", idregion); } if (region) { if (region->bboxflag == 0) - error->all(FLERR,"Fix gcmc region does not support a bounding box"); + error->all(FLERR, Error::NOLASTLINE, "Fix widom region does not support a bounding box"); if (region->dynamic_check()) - error->all(FLERR,"Fix gcmc region cannot be dynamic"); + error->all(FLERR, Error::NOLASTLINE, "Fix widom region cannot be dynamic"); region_xlo = region->extent_xlo; region_xhi = region->extent_xhi; @@ -311,12 +312,13 @@ void FixWidom::init() if ((region_xlo < domain->boxlo_bound[0]) || (region_xhi > domain->boxhi_bound[0]) || (region_ylo < domain->boxlo_bound[1]) || (region_yhi > domain->boxhi_bound[1]) || (region_zlo < domain->boxlo_bound[2]) || (region_zhi > domain->boxhi_bound[2])) - error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id); + error->all(FLERR, Error::NOLASTLINE, + "Fix widom region {} extends outside simulation box", region->id); } else { if ((region_xlo < domain->boxlo[0]) || (region_xhi > domain->boxhi[0]) || (region_ylo < domain->boxlo[1]) || (region_yhi > domain->boxhi[1]) || (region_zlo < domain->boxlo[2]) || (region_zhi > domain->boxhi[2])) - error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id); + error->all(FLERR, Error::NOLASTLINE, "Fix widom region {} extends outside simulation box", region->id); } } @@ -335,15 +337,16 @@ void FixWidom::init() (force->pair->tail_flag)) { full_flag = true; if (comm->me == 0) - error->warning(FLERR,"Fix widom using full_energy option"); + error->warning(FLERR, "Fix widom using full_energy option"); } } if (full_flag) c_pe = modify->get_compute_by_id("thermo_pe"); if (exchmode == EXCHATOM) { - if (nwidom_type <= 0 || nwidom_type > atom->ntypes) - error->all(FLERR,"Invalid atom type in fix widom command"); + if ((nwidom_type <= 0) || (nwidom_type > atom->ntypes)) + error->all(FLERR, Error::NOLASTLINE, + "Invalid atom type {} in fix widom command", nwidom_type); } // if molecules are exchanged or moved, check for unset mol IDs @@ -357,16 +360,17 @@ void FixWidom::init() int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) - error->all(FLERR, "All mol IDs should be set for fix widom group atoms"); + error->all(FLERR, Error::NOLASTLINE, "All mol IDs should be set for fix widom group atoms"); } if (exchmode == EXCHMOL) if (atom->molecule_flag == 0 || !atom->tag_enable || (atom->map_style == Atom::MAP_NONE)) - error->all(FLERR, "Fix widom molecule command requires that atoms have molecule attributes"); + error->all(FLERR, Error::NOLASTLINE, + "Fix widom molecule command requires that atoms have molecule attributes"); if (domain->dimension == 2) - error->all(FLERR,"Cannot use fix widom in a 2d simulation"); + error->all(FLERR, Error::NOLASTLINE, "Cannot use fix widom in a 2d simulation"); // create a new group for interaction exclusions // used for attempted atom or molecule deletions @@ -380,7 +384,7 @@ void FixWidom::init() group->assign(group_id + " subtract all all"); exclusion_group = group->find(group_id); if (exclusion_group == -1) - error->all(FLERR,"Could not find fix widom exclusion group ID"); + error->all(FLERR, Error::NOLASTLINE, "Could not find fix widom exclusion group ID {}", group_id); exclusion_group_bit = group->bitmask[exclusion_group]; // neighbor list exclusion setup @@ -421,7 +425,7 @@ void FixWidom::init() } else gas_mass = atom->mass[nwidom_type]; - if (gas_mass <= 0.0) error->all(FLERR,"Illegal fix widom gas mass <= 0"); + if (gas_mass <= 0.0) error->all(FLERR, Error::NOLASTLINE, "Illegal fix widom gas mass <= 0"); // check that no deletable atoms are in atom->firstgroup // deleting such an atom would not leave firstgroup atoms first @@ -438,7 +442,7 @@ void FixWidom::init() MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) - error->all(FLERR,"Cannot use fix widom on atoms in atom_modify first group"); + error->all(FLERR, Error::NOLASTLINE, "Cannot use fix widom on atoms in atom_modify first group"); } // compute beta @@ -451,8 +455,8 @@ void FixWidom::init() // full_flag on molecules on more than one processor. // Print error if this is the current mode if (full_flag && (exchmode == EXCHMOL) && comm->nprocs > 1) - error->all(FLERR,"fix widom does currently not support full_energy option with " - "molecules on more than 1 MPI process."); + error->all(FLERR, Error::NOLASTLINE, "fix widom does currently not support full_energy option " + "with molecules on more than 1 MPI process."); } diff --git a/src/PHONON/dynamical_matrix.cpp b/src/PHONON/dynamical_matrix.cpp index f22ca0d8b7a..076791c1421 100644 --- a/src/PHONON/dynamical_matrix.cpp +++ b/src/PHONON/dynamical_matrix.cpp @@ -241,13 +241,14 @@ void DynamicalMatrix::openfile(const char *filename) if (me == 0) { if (compressed) { fp = platform::compressed_write(std::string(filename)+".gz"); - if (!fp) error->one(FLERR,"Cannot open compressed file"); + if (!fp) error->one(FLERR, Error::NOLASTLINE, "Cannot open gzip compressed file"); } else if (binaryflag) { fp = fopen(filename,"wb"); } else { fp = fopen(filename,"w"); } - if (!fp) error->one(FLERR,"Cannot open dynmat file: {}", utils::getsyserror()); + if (!fp) + error->one(FLERR, Error::NOLASTLINE, "Cannot open dynmat file: {}", utils::getsyserror()); } file_opened = 1; @@ -283,9 +284,9 @@ void DynamicalMatrix::calculateMatrix() if (me == 0 && screen) { fputs("Calculating Dynamical Matrix ...\n", screen); utils::print(screen," Total # of atoms = {}\n" - " Atoms in group = {}\n" - " Total dynamical matrix elements = {}\n", - natoms, gcount, dynlen*dynlen); + " Atoms in group = {}\n" + " Total dynamical matrix elements = {}\n", + natoms, gcount, dynlen*dynlen); } // emit dynlen rows of dimalpha*dynlen*dimbeta elements @@ -380,7 +381,7 @@ void DynamicalMatrix::writeMatrix(double **dynmat) for (int i=0; i<3; i++) fwrite(dynmat[i], sizeof(double), dynlenb, fp); if (ferror(fp)) - error->one(FLERR, "Error writing to binary file"); + error->one(FLERR, Error::NOLASTLINE, "Error writing to binary file {}", utils::getsyserror()); } else { for (int i = 0; i < 3; i++) { for (bigint j = 0; j < dynlenb; j++) { @@ -389,7 +390,7 @@ void DynamicalMatrix::writeMatrix(double **dynmat) } } if (ferror(fp)) - error->one(FLERR,"Error writing to file"); + error->one(FLERR, Error::NOLASTLINE, "Error writing to text file {}", utils::getsyserror()); } } diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index b3c2d40f60d..2d8f10cba16 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -4083,15 +4083,18 @@ void FixBondReact::CreateAtoms(char *line, Reaction &rxn, int ncreate) for (int i = 0; i < ncreate; i++) { readline(line); rv = sscanf(line,"%d",&tmp); - if (rv != 1) error->one(FLERR, "CreateIDs section is incorrectly formatted"); + if (rv != 1) error->one(FLERR, Error::NOLASTLINE, "CreateIDs section is incorrectly formatted"); if (tmp > rxn.product->natoms) - error->one(FLERR,"Fix bond/react: Invalid atom ID in CreateIDs section of map file"); + error->one(FLERR, Error::NOLASTLINE, "Fix bond/react: Invalid atom ID in CreateIDs section of map file"); rxn.atoms[tmp-1].created = 1; } if (rxn.product->xflag == 0) - error->one(FLERR,"Fix bond/react: 'Coords' section required in post-reaction template when creating new atoms"); + error->one(FLERR, Error::NOLASTLINE, + "Fix bond/react: 'Coords' section required in post-reaction template when creating new atoms"); if (atom->rmass_flag && !rxn.product->rmassflag) - error->one(FLERR, "Fix bond/react: 'Masses' section required in post-reaction template when creating new atoms if per-atom masses are defined."); + error->one(FLERR, Error::NOLASTLINE, + "Fix bond/react: 'Masses' section required in post-reaction template when creating new atoms " + "and per-atom masses are defined."); } void FixBondReact::CustomCharges(int ifragment, Reaction &rxn) diff --git a/src/region_deprecated.cpp b/src/region_deprecated.cpp index 00142b13ed6..05e978d8f25 100644 --- a/src/region_deprecated.cpp +++ b/src/region_deprecated.cpp @@ -28,5 +28,5 @@ RegionDeprecated::RegionDeprecated(LAMMPS *lmp, int narg, char **arg) : Region(l if (lmp->comm->me == 0) utils::logmesg(lmp, "\nRegion style 'DEPRECATED' is a dummy style\n\n"); return; } - error->all(FLERR, "This region style is no longer available"); + error->all(FLERR, 1, "Region style {} is no longer available", my_style); } From 2eb966324c64499d5a5752969895f5c404a8366d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 11:08:59 -0500 Subject: [PATCH 232/604] check that single precision FFTs can be compiled and tested correctly --- .github/workflows/unittest-single.yml | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 .github/workflows/unittest-single.yml diff --git a/.github/workflows/unittest-single.yml b/.github/workflows/unittest-single.yml new file mode 100644 index 00000000000..7a5d9b70fb4 --- /dev/null +++ b/.github/workflows/unittest-single.yml @@ -0,0 +1,80 @@ +# GitHub action to build LAMMPS on Linux and run standard unit tests +name: "Unittest for Linux /w -DFFT_SINGLE=ON" + +on: + push: + branches: + - develop + - maintenance + pull_request: + branches: + - develop + + workflow_dispatch: + +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + build: + name: Linux Unit Test + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 2 + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache \ + libeigen3-dev \ + libcurl4-openssl-dev \ + mold \ + ninja-build \ + python3-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-unit-ccache-${{ github.sha }} + restore-keys: linux-unit-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install numpy + python3 -m pip install pyyaml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/most.cmake \ + -C cmake/presets/nolib.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=on \ + -D FFT=kiss \ + -D FFT_SINGLE=on \ + -D DOWNLOAD_POTENTIALS=off \ + -D ENABLE_TESTING=on \ + -D MLIAP_ENABLE_PYTHON=off \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Tests + working-directory: build + shell: bash + run: ctest -V From 9b3af8a83fb0ec5b1e7d284b1d52017f88ea988a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 11:20:20 -0500 Subject: [PATCH 233/604] implement bugfixes suggested by GitHub copilot --- src/dump_image.cpp | 8 ++++++++ src/region_sphere.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/dump_image.cpp b/src/dump_image.cpp index bb958b46646..ad07a2e5d04 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -1829,6 +1829,7 @@ void DumpImage::create_image() // inconsistent style. should not happen. if (!myreg) continue; + // construct coordinates for lo/hi tip/center of cone double lo[3], hi[3]; if (myreg->axis == 'x') { lo[0] = myreg->lo; @@ -1852,6 +1853,9 @@ void DumpImage::create_image() hi[1] = myreg->c2; hi[2] = myreg->hi; } + // Apply forward_transform for cone lo/hi tip/center for dynamic regions + myreg->forward_transform(lo[0], lo[1], lo[2]); + myreg->forward_transform(hi[0], hi[1], hi[2]); double p1[3], p2[3], p3[3], p4[3]; if (reg.style == FRAME) { @@ -1990,6 +1994,7 @@ void DumpImage::create_image() // inconsistent style. should not happen. if (!myreg) continue; + // construct coordinates for lo/hi center of cylinder double lo[3], hi[3]; if (myreg->axis == 'x') { lo[0] = myreg->lo; @@ -2013,6 +2018,9 @@ void DumpImage::create_image() hi[1] = myreg->c2; hi[2] = myreg->hi; } + // Apply forward_transform for cylinder lo/hi center for dynamic regions + myreg->forward_transform(lo[0], lo[1], lo[2]); + myreg->forward_transform(hi[0], hi[1], hi[2]); double p1[3], p2[3], p3[3], p4[3]; if (reg.style == FRAME) { diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index 9486ea31b4a..6061fe43978 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -82,7 +82,7 @@ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : // for variable radius, uses initial radius and origin for variable center if (interior) { - bboxflag = 1; + bboxflag = 1; if (dynamic || varshape) { RegSphere::bbox_update(); } else { From 25a68ffd51ce12ffe4276cbf74bf96782c2374e9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Nov 2025 10:46:55 -0700 Subject: [PATCH 234/604] Fix race condition --- src/KOKKOS/comm_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 4f034e5a1cf..75c2bab1c8a 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -313,6 +313,7 @@ void CommKokkos::reverse_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_reverse_self(sendnum[iswap],k_sendlist_iswap, firstrecv[iswap]); + DeviceType().fence(); } } } From 5af84ac37f563163b66f03ee316ded998c6444bb Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Nov 2025 10:47:13 -0700 Subject: [PATCH 235/604] Restore no-init --- src/KOKKOS/memory_kokkos.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index b84fe0e33cf..fad2aba98cd 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -426,7 +426,7 @@ template static std::enable_if_t realloc_kokkos(TYPE &data, const char *name, Indices... ns) { data = TYPE(); - data = TYPE(std::string(name), ns...); + data = TYPE(Kokkos::NoInit(std::string(name)), ns...); } template @@ -434,7 +434,7 @@ static std::enable_if_t Date: Tue, 11 Nov 2025 12:54:44 -0700 Subject: [PATCH 236/604] Move fences around --- src/KOKKOS/comm_kokkos.cpp | 116 +++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 75c2bab1c8a..82a346b4457 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -178,14 +178,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(), n,MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) + if (size_forward_recv[iswap]) { MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else if (ghost_velocity) { if (size_forward_recv[iswap]) { @@ -196,14 +198,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType().fence(); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view().data(), @@ -212,13 +216,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); - if (n) + if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } atomKK->avecKK->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType().fence(); } } else { if (!ghost_velocity) { @@ -226,15 +233,12 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_self(sendnum[iswap],k_sendlist_iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); } } else { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType().fence(); } } } @@ -286,11 +290,14 @@ void CommKokkos::reverse_comm_device() buf = (double*)atomKK->k_f.view().data() + firstrecv[iswap]*atomKK->k_f.view().extent(1); + DeviceType().fence(); MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, recvproc[iswap],0,world); } - if (size_reverse_recv[iswap]) + if (size_reverse_recv[iswap]) { MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else { if (size_reverse_recv[iswap]) @@ -298,22 +305,24 @@ void CommKokkos::reverse_comm_device() size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); n = atomKK->avecKK->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType().fence(); - if (n) + if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,recvproc[iswap],0,world); - if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (size_reverse_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_recv); - DeviceType().fence(); } else { if (sendnum[iswap]) { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_reverse_self(sendnum[iswap],k_sendlist_iswap, firstrecv[iswap]); - DeviceType().fence(); } } } @@ -366,7 +375,6 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = fixKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send_fix,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -388,9 +396,15 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) MPI_Irecv(buf_recv_fix,nsize*recvnum[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } - if (sendnum[iswap]) + if (sendnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_fix,n,MPI_DOUBLE,sendproc[iswap],0,world); - if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (recvnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_fix.modify_host(); @@ -402,7 +416,6 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) // unpack buffer fixKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); - DeviceType().fence(); } } @@ -541,7 +554,6 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -563,9 +575,15 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) MPI_Irecv(buf_recv_pair,nsize*recvnum[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } - if (sendnum[iswap]) + if (sendnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); - if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (recvnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_pair.modify_host(); @@ -577,7 +595,6 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); - DeviceType().fence(); } } @@ -637,7 +654,6 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) // pack buffer n = pairKKBase->pack_reverse_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send_pair); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -657,9 +673,14 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) if (sendproc[iswap] != me) { if (sendnum[iswap]) MPI_Irecv(buf_recv_pair,nsize*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world,&request); - if (recvnum[iswap]) + if (recvnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_pair,n,MPI_DOUBLE,recvproc[iswap],0,world); - if (sendnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (sendnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_pair.modify_host(); @@ -673,7 +694,6 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_tmp); - DeviceType().fence(); } } @@ -886,7 +906,6 @@ void CommKokkos::exchange_device() atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send, k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); - DeviceType().fence(); atom->nlocal = nlocal; // send/recv atoms in both directions @@ -913,21 +932,26 @@ void CommKokkos::exchange_device() MPI_Irecv(k_buf_recv.view().data(),nrecv1, MPI_DOUBLE,procneigh[dim][1],0, world,&request); + + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),nsend, MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); if (procgrid[dim] > 2) { MPI_Irecv(k_buf_recv.view().data()+nrecv1, nrecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); + MPI_Send(k_buf_send.view().data(),nsend, MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); } + DeviceType().fence(); if (nrecv) { - if (atom->nextra_grow) { if ((int) k_indices.extent(0) < nrecv/data_size) MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); @@ -938,8 +962,6 @@ void CommKokkos::exchange_device() atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space,k_indices); - - DeviceType().fence(); } } @@ -955,7 +977,6 @@ void CommKokkos::exchange_device() nextrasend = kkbase->pack_exchange_kokkos( count,k_buf_send,k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); - DeviceType().fence(); } int nextrarecv,nextrarecv1,nextrarecv2; @@ -980,25 +1001,30 @@ void CommKokkos::exchange_device() MPI_Irecv(k_buf_recv.view().data(),nextrarecv1, MPI_DOUBLE,procneigh[dim][1],0, world,&request); + + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),nextrasend, MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); if (procgrid[dim] > 2) { MPI_Irecv(k_buf_recv.view().data()+nextrarecv1, nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); } + DeviceType().fence(); if (nextrarecv) { kkbase->unpack_exchange_kokkos( k_buf_recv,k_indices,nrecv/data_size, nrecv1/data_size,nextrarecv1, ExecutionSpaceFromDevice::space); - DeviceType().fence(); } } } @@ -1265,13 +1291,11 @@ void CommKokkos::borders_device() { n = atomKK->avecKK-> pack_border_vel_kokkos(nsend,k_sendlist_iswap,k_buf_send, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType().fence(); } else { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK-> pack_border_kokkos(nsend,k_sendlist_iswap,k_buf_send, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType().fence(); } // swap atoms with other proc @@ -1286,9 +1310,15 @@ void CommKokkos::borders_device() { if (nrecv) MPI_Irecv(k_buf_recv.view().data(), nrecv*size_border,MPI_DOUBLE, recvproc[iswap],0,world,&request); - if (n) MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - if (nrecv) MPI_Wait(&request,MPI_STATUS_IGNORE); + if (n) { + DeviceType().fence(); + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + } + if (nrecv) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else { nrecv = nsend; } @@ -1299,21 +1329,17 @@ void CommKokkos::borders_device() { if (sendproc[iswap] != me) { atomKK->avecKK->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType().fence(); } else { atomKK->avecKK->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType().fence(); } } else { if (sendproc[iswap] != me) { atomKK->avecKK->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType().fence(); } else { atomKK->avecKK->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType().fence(); } } // set all pointers & counters From 6291b5de4374f8964e08fbb862657d45294922ec Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Nov 2025 13:44:07 -0700 Subject: [PATCH 237/604] Add missing modify --- src/KOKKOS/atom_vec_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 5610c4cc65b..f065c6e11ab 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -2442,6 +2442,8 @@ int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr } } + atomKK->modified(space,datamask_exchange); + return nsend*size_exchange; } From c18fd82fa03ffdf87201f6d0b31ce7b21ad4f72f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 17:03:16 -0500 Subject: [PATCH 238/604] refactor error tolerance handling to use existing constants in header --- unittest/utils/fft_test_helpers.h | 12 +++++------- unittest/utils/test_fft3d.cpp | 6 ------ unittest/utils/test_fft3d_kokkos.cpp | 10 ++-------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/unittest/utils/fft_test_helpers.h b/unittest/utils/fft_test_helpers.h index b6ff54c403d..d4f84153a67 100644 --- a/unittest/utils/fft_test_helpers.h +++ b/unittest/utils/fft_test_helpers.h @@ -47,14 +47,12 @@ constexpr double TWO_PI = 2.0 * PI; // Precision-aware tolerances (based on FFT_SCALAR type) #ifdef FFT_SINGLE -constexpr FFT_SCALAR BASE_TOLERANCE = 1e-5; -constexpr FFT_SCALAR ROUNDTRIP_TOLERANCE = 1e-4; -constexpr FFT_SCALAR KNOWN_ANSWER_TOLERANCE = 1e-5; -constexpr FFT_SCALAR PARSEVAL_TOLERANCE = 1e-4; -constexpr FFT_SCALAR HERMITIAN_TOLERANCE = 1e-5; -constexpr FFT_SCALAR LINEARITY_TOLERANCE = 1e-4; +constexpr FFT_SCALAR ROUNDTRIP_TOLERANCE = 1e-5; +constexpr FFT_SCALAR KNOWN_ANSWER_TOLERANCE = 1e-6; +constexpr FFT_SCALAR PARSEVAL_TOLERANCE = 1e-5; +constexpr FFT_SCALAR HERMITIAN_TOLERANCE = 1e-6; +constexpr FFT_SCALAR LINEARITY_TOLERANCE = 1e-5; #else -constexpr FFT_SCALAR BASE_TOLERANCE = 1e-12; constexpr FFT_SCALAR ROUNDTRIP_TOLERANCE = 1e-11; constexpr FFT_SCALAR KNOWN_ANSWER_TOLERANCE = 1e-12; constexpr FFT_SCALAR PARSEVAL_TOLERANCE = 1e-10; diff --git a/unittest/utils/test_fft3d.cpp b/unittest/utils/test_fft3d.cpp index 0485c1277c5..c11ca6f64b5 100644 --- a/unittest/utils/test_fft3d.cpp +++ b/unittest/utils/test_fft3d.cpp @@ -54,12 +54,6 @@ using namespace FFTValidation; // whether to print verbose output (i.e. not capturing LAMMPS screen output). bool verbose = false; -#ifdef FFT_SINGLE -static constexpr double TOLERANCE = 1.0e-7; -#else -static constexpr double TOLERANCE = 1.0e-10; -#endif - class FFT3DTest : public LAMMPSTest { protected: void SetUp() override diff --git a/unittest/utils/test_fft3d_kokkos.cpp b/unittest/utils/test_fft3d_kokkos.cpp index 54676aec960..e65ed371bf5 100644 --- a/unittest/utils/test_fft3d_kokkos.cpp +++ b/unittest/utils/test_fft3d_kokkos.cpp @@ -59,12 +59,6 @@ using namespace FFTValidation; // Verbose output control bool verbose = false; -#ifdef FFT_SINGLE -static constexpr double TOLERANCE = 1.0e-7; -#else -static constexpr double TOLERANCE = 1.0e-10; -#endif - // Helper function to check if KOKKOS is using a GPU backend static bool is_kokkos_gpu_backend() { @@ -274,7 +268,7 @@ class FFT3DKokkosTest : public LAMMPSTest { } KnownAnswerValidator validator(output_vec.data(), expected_vec.data(), nfast, nmid, nslow, - TOLERANCE, verbose); + KNOWN_ANSWER_TOLERANCE, verbose); bool valid = validator.validate(); if (verbose || !valid) { @@ -514,7 +508,7 @@ TEST_F(FFT3DKokkosTest, KnownAnswer_Kokkos_Sine) std::complex(0.0, spike_amplitude)); KnownAnswerValidator validator(output_vec.data(), expected_data.data(), nfast, nmid, nslow, - TOLERANCE, verbose); + KNOWN_ANSWER_TOLERANCE, verbose); bool valid = validator.validate(); if (verbose || !valid) { From 5214c14cd085de8a8afbd3241a66eeafb4e57002 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 17:03:32 -0500 Subject: [PATCH 239/604] formatting, apply clang-format --- unittest/utils/test_fft3d.cpp | 180 +++++++++++++-------------- unittest/utils/test_fft3d_kokkos.cpp | 168 ++++++++++++------------- 2 files changed, 167 insertions(+), 181 deletions(-) diff --git a/unittest/utils/test_fft3d.cpp b/unittest/utils/test_fft3d.cpp index c11ca6f64b5..7c1948d286e 100644 --- a/unittest/utils/test_fft3d.cpp +++ b/unittest/utils/test_fft3d.cpp @@ -29,10 +29,10 @@ // 13. KISS_NonPowerOf2 - KISS FFT with various non-power-of-2 sizes (conditional) // 14. HeFFTe_Distributed - HeFFTe distributed FFT (conditional) -#include "lmpfftsettings.h" #include "KSPACE/fft3d_wrap.h" #include "info.h" #include "lammps.h" +#include "lmpfftsettings.h" #include "../testing/core.h" #include "fft_test_helpers.h" @@ -67,13 +67,13 @@ class FFT3DTest : public LAMMPSTest { LAMMPSTest::SetUp(); // Initialize FFT-related members - fft = nullptr; - input_data = nullptr; + fft = nullptr; + input_data = nullptr; output_data = nullptr; // Default grid size (will be set by individual tests) nfast = 0; - nmid = 0; + nmid = 0; nslow = 0; } @@ -92,14 +92,14 @@ class FFT3DTest : public LAMMPSTest { void create_serial_fft(int nfast_in, int nmid_in, int nslow_in) { nfast = nfast_in; - nmid = nmid_in; + nmid = nmid_in; nslow = nslow_in; // Total grid size int nsize = nfast * nmid * nslow; // Allocate data buffers (complex data: 2 * nsize) - input_data = new FFT_SCALAR[2 * nsize]; + input_data = new FFT_SCALAR[2 * nsize]; output_data = new FFT_SCALAR[2 * nsize]; // Zero buffers @@ -116,10 +116,10 @@ class FFT3DTest : public LAMMPSTest { int out_klo = 0, out_khi = nslow - 1; // FFT parameters - int scaled = 0; // No scaling - int permute = 0; // No permutation - int nbuf = 0; // Buffer size (output) - int usecollective = 0; // Use point-to-point communication + int scaled = 0; // No scaling + int permute = 0; // No permutation + int nbuf = 0; // Buffer size (output) + int usecollective = 0; // Use point-to-point communication // Create FFT3d object BEGIN_HIDE_OUTPUT(); @@ -149,7 +149,8 @@ TEST_F(FFT3DTest, BackendDetection) EXPECT_FALSE(fft_info.empty()) << "FFT info should not be empty"; // Check FFT library macro is defined -#if defined(FFT_KISS) || defined(FFT_FFTW3) || defined(FFT_MKL) || defined(FFT_NVPL) || defined(FFT_HEFFTE) +#if defined(FFT_KISS) || defined(FFT_FFTW3) || defined(FFT_MKL) || defined(FFT_NVPL) || \ + defined(FFT_HEFFTE) SUCCEED() << "FFT library: " << LMP_FFT_LIB; #else FAIL() << "No FFT library defined"; @@ -193,8 +194,8 @@ TEST_F(FFT3DTest, RoundTrip_Serial_32x32x32) } // Validate round-trip: input_data should match original_data - FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, nslow, - ROUNDTRIP_TOLERANCE, verbose); + FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); // Report results @@ -248,8 +249,8 @@ TEST_F(FFT3DTest, RoundTrip_Serial_64x64x64) } // Validate round-trip - FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, nslow, - ROUNDTRIP_TOLERANCE, verbose); + FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); // Report results @@ -303,8 +304,8 @@ TEST_F(FFT3DTest, RoundTrip_Serial_48x48x48) } // Validate round-trip - FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, nslow, - ROUNDTRIP_TOLERANCE, verbose); + FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -351,7 +352,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) // Validate against expected result FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, TOLERANCE, verbose); + nslow, KNOWN_ANSWER_TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -360,7 +361,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: " << TOLERANCE << std::endl; + std::cout << " Tolerance: " << KNOWN_ANSWER_TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show sample values @@ -372,7 +373,7 @@ TEST_F(FFT3DTest, KnownAnswer_DeltaFunction) } EXPECT_TRUE(passed) << "Delta function known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); + EXPECT_LT(validator.get_error_stats().max(), KNOWN_ANSWER_TOLERANCE); } // ============================================================================ @@ -404,13 +405,13 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) // Create expected output std::vector expected_data(2 * nsize, 0.0); - FFT_SCALAR dc_value = static_cast(nsize); // N³ + FFT_SCALAR dc_value = static_cast(nsize); // N³ set_complex(expected_data.data(), 0, 0, 0, nfast, nmid, std::complex(dc_value, 0.0)); // Validate against expected result FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, TOLERANCE, verbose); + nslow, KNOWN_ANSWER_TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -420,7 +421,7 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: " << TOLERANCE << std::endl; + std::cout << " Tolerance: " << KNOWN_ANSWER_TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show DC component @@ -439,7 +440,7 @@ TEST_F(FFT3DTest, KnownAnswer_Constant) } EXPECT_TRUE(passed) << "Constant field known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); + EXPECT_LT(validator.get_error_stats().max(), KNOWN_ANSWER_TOLERANCE); } // ============================================================================ @@ -489,7 +490,7 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) // Validate against expected result FFTValidation::KnownAnswerValidator validator(output_data, expected_data.data(), nfast, nmid, - nslow, TOLERANCE, verbose); + nslow, KNOWN_ANSWER_TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { @@ -500,16 +501,16 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) std::cout << " Max error: " << validator.get_error_stats().max() << " (at index " << validator.get_error_stats().idx() << ")" << std::endl; std::cout << " Avg error: " << validator.get_error_stats().avg() << std::endl; - std::cout << " Tolerance: " << TOLERANCE << std::endl; + std::cout << " Tolerance: " << KNOWN_ANSWER_TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; // Show the spike values auto spike_pos = get_complex(output_data, 2, 0, 0, nfast, nmid); auto spike_neg = get_complex(output_data, nfast - 2, 0, 0, nfast, nmid); - std::cout << " Spike at (2,0,0): " << spike_pos << " (expected: (0, " - << -spike_amplitude << "))" << std::endl; - std::cout << " Spike at (" << (nfast - 2) << ",0,0): " << spike_neg - << " (expected: (0, " << spike_amplitude << "))" << std::endl; + std::cout << " Spike at (2,0,0): " << spike_pos << " (expected: (0, " << -spike_amplitude + << "))" << std::endl; + std::cout << " Spike at (" << (nfast - 2) << ",0,0): " << spike_neg << " (expected: (0, " + << spike_amplitude << "))" << std::endl; // Show sample of bins that should be zero std::cout << " Sample zero bins:" << std::endl; @@ -522,10 +523,9 @@ TEST_F(FFT3DTest, KnownAnswer_SineWave) } EXPECT_TRUE(passed) << "Sine wave known answer validation failed"; - EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); + EXPECT_LT(validator.get_error_stats().max(), KNOWN_ANSWER_TOLERANCE); } - // ============================================================================ // Test 8: Parseval's Theorem - Energy Conservation (32x32x32) // ============================================================================ @@ -550,7 +550,7 @@ TEST_F(FFT3DTest, ParsevalsTheorem_EnergyConservation) double spatial_energy = 0.0; for (int i = 0; i < nsize; i++) { std::complex val = FFTTestHelpers::get_complex_linear(spatial_data.data(), i); - spatial_energy += std::norm(val); // |z|² = re² + im² + spatial_energy += std::norm(val); // |z|² = re² + im² } // Forward FFT @@ -562,20 +562,20 @@ TEST_F(FFT3DTest, ParsevalsTheorem_EnergyConservation) double frequency_energy = 0.0; for (int i = 0; i < nsize; i++) { std::complex val = FFTTestHelpers::get_complex_linear(output_data, i); - frequency_energy += std::norm(val); // |Z|² = Re² + Im² + frequency_energy += std::norm(val); // |Z|² = Re² + Im² } // Apply Parseval's theorem normalization: E_spatial = (1/N³) × E_freq - double n_cubed = static_cast(nsize); + double n_cubed = static_cast(nsize); double frequency_energy_normalized = frequency_energy / n_cubed; // Calculate relative error - double abs_error = std::abs(spatial_energy - frequency_energy_normalized); + double abs_error = std::abs(spatial_energy - frequency_energy_normalized); double relative_error = (spatial_energy > 1e-14) ? abs_error / spatial_energy : abs_error; // Validate using ParsevalValidator FFTValidation::ParsevalValidator validator(spatial_data.data(), output_data, nfast, nmid, nslow, - PARSEVAL_TOLERANCE, verbose); + PARSEVAL_TOLERANCE, verbose); bool passed = validator.validate(); // Report results @@ -584,7 +584,8 @@ TEST_F(FFT3DTest, ParsevalsTheorem_EnergyConservation) std::cout << " Grid size (N³): " << nsize << std::endl; std::cout << " Spatial energy (Σ|x|²): " << spatial_energy << std::endl; std::cout << " Frequency energy (Σ|X|²): " << frequency_energy << std::endl; - std::cout << " Normalized frequency (Σ|X|²/N³): " << frequency_energy_normalized << std::endl; + std::cout << " Normalized frequency (Σ|X|²/N³): " << frequency_energy_normalized + << std::endl; std::cout << " Relative error: " << relative_error << std::endl; std::cout << " Tolerance: " << PARSEVAL_TOLERANCE << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; @@ -611,7 +612,7 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) // Grid dimensions nfast = 32; - nmid = 32; + nmid = 32; nslow = 32; // Domain decomposition: split along slow (z) dimension @@ -624,10 +625,10 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) if (rank == 0) { in_klo = 0; - in_khi = nslow / 2 - 1; // 0..15 + in_khi = nslow / 2 - 1; // 0..15 } else { in_klo = nslow / 2; - in_khi = nslow - 1; // 16..31 + in_khi = nslow - 1; // 16..31 } // Output decomposition: same as input for simplicity @@ -637,33 +638,32 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) // Calculate local size: each rank owns a slab int local_nfast = nfast; - int local_nmid = nmid; + int local_nmid = nmid; int local_nslow = in_khi - in_klo + 1; - int local_size = local_nfast * local_nmid * local_nslow; + int local_size = local_nfast * local_nmid * local_nslow; if (verbose) { - std::cout << "Rank " << rank << ": local grid = " << local_nfast << "x" << local_nmid - << "x" << local_nslow << " (z-range: " << in_klo << ".." << in_khi << ")" - << std::endl; + std::cout << "Rank " << rank << ": local grid = " << local_nfast << "x" << local_nmid << "x" + << local_nslow << " (z-range: " << in_klo << ".." << in_khi << ")" << std::endl; } // Allocate local data buffers - input_data = new FFT_SCALAR[2 * local_size]; + input_data = new FFT_SCALAR[2 * local_size]; output_data = new FFT_SCALAR[2 * local_size]; std::memset(input_data, 0, 2 * local_size * sizeof(FFT_SCALAR)); std::memset(output_data, 0, 2 * local_size * sizeof(FFT_SCALAR)); // FFT parameters - int scaled = 0; // No scaling - int permute = 0; // No permutation - int nbuf = 0; // Buffer size (output) - int usecollective = 0; // Use point-to-point communication + int scaled = 0; // No scaling + int permute = 0; // No permutation + int nbuf = 0; // Buffer size (output) + int usecollective = 0; // Use point-to-point communication // Create MPI-aware FFT3d object BEGIN_HIDE_OUTPUT(); - fft = new FFT3d(lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, in_jlo, in_jhi, - in_klo, in_khi, out_ilo, out_ihi, out_jlo, out_jhi, out_klo, out_khi, - scaled, permute, &nbuf, usecollective); + fft = new FFT3d(lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, in_jlo, in_jhi, in_klo, + in_khi, out_ilo, out_ihi, out_jlo, out_jhi, out_klo, out_khi, scaled, permute, + &nbuf, usecollective); END_HIDE_OUTPUT(); ASSERT_NE(fft, nullptr); @@ -684,8 +684,8 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) FFT_SCALAR re = dist(rng); FFT_SCALAR im = dist(rng); - int local_idx = k * nmid * nfast + j * nfast + i; - input_data[2 * local_idx] = re; + int local_idx = k * nmid * nfast + j * nfast + i; + input_data[2 * local_idx] = re; input_data[2 * local_idx + 1] = im; } } @@ -706,7 +706,7 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) END_HIDE_OUTPUT(); // Apply normalization: LAMMPS backward FFT does not include 1/N³ scaling - int total_size = nfast * nmid * nslow; + int total_size = nfast * nmid * nslow; FFT_SCALAR norm = 1.0 / static_cast(total_size); for (int i = 0; i < 2 * local_size; i++) { input_data[i] *= norm; @@ -714,8 +714,8 @@ TEST_F(FFT3DTest, RoundTrip_MPI_2proc_32x32x32) // Validate round-trip on local data FFTValidation::RoundTripValidator validator(original_data.data(), input_data, local_nfast, - local_nmid, local_nslow, ROUNDTRIP_TOLERANCE, - verbose); + local_nmid, local_nslow, ROUNDTRIP_TOLERANCE, + verbose); bool passed = validator.validate(); // Gather validation results from all ranks @@ -759,7 +759,7 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) // Grid dimensions (larger for better load balancing with 4 procs) nfast = 64; - nmid = 64; + nmid = 64; nslow = 64; // Domain decomposition: split along slow (z) dimension @@ -773,8 +773,8 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) int in_klo, in_khi; int slices_per_proc = nslow / nprocs; - in_klo = rank * slices_per_proc; - in_khi = (rank + 1) * slices_per_proc - 1; + in_klo = rank * slices_per_proc; + in_khi = (rank + 1) * slices_per_proc - 1; // Output decomposition: same as input int out_ilo = in_ilo, out_ihi = in_ihi; @@ -783,33 +783,32 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) // Calculate local size int local_nfast = nfast; - int local_nmid = nmid; + int local_nmid = nmid; int local_nslow = in_khi - in_klo + 1; - int local_size = local_nfast * local_nmid * local_nslow; + int local_size = local_nfast * local_nmid * local_nslow; if (verbose) { - std::cout << "Rank " << rank << ": local grid = " << local_nfast << "x" << local_nmid - << "x" << local_nslow << " (z-range: " << in_klo << ".." << in_khi << ")" - << std::endl; + std::cout << "Rank " << rank << ": local grid = " << local_nfast << "x" << local_nmid << "x" + << local_nslow << " (z-range: " << in_klo << ".." << in_khi << ")" << std::endl; } // Allocate local data buffers - input_data = new FFT_SCALAR[2 * local_size]; + input_data = new FFT_SCALAR[2 * local_size]; output_data = new FFT_SCALAR[2 * local_size]; std::memset(input_data, 0, 2 * local_size * sizeof(FFT_SCALAR)); std::memset(output_data, 0, 2 * local_size * sizeof(FFT_SCALAR)); // FFT parameters - int scaled = 0; // No scaling - int permute = 0; // No permutation - int nbuf = 0; // Buffer size (output) - int usecollective = 0; // Use point-to-point communication + int scaled = 0; // No scaling + int permute = 0; // No permutation + int nbuf = 0; // Buffer size (output) + int usecollective = 0; // Use point-to-point communication // Create MPI-aware FFT3d object BEGIN_HIDE_OUTPUT(); - fft = new FFT3d(lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, in_jlo, in_jhi, - in_klo, in_khi, out_ilo, out_ihi, out_jlo, out_jhi, out_klo, out_khi, - scaled, permute, &nbuf, usecollective); + fft = new FFT3d(lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, in_jlo, in_jhi, in_klo, + in_khi, out_ilo, out_ihi, out_jlo, out_jhi, out_klo, out_khi, scaled, permute, + &nbuf, usecollective); END_HIDE_OUTPUT(); ASSERT_NE(fft, nullptr); @@ -830,8 +829,8 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) FFT_SCALAR re = dist(rng); FFT_SCALAR im = dist(rng); - int local_idx = k * nmid * nfast + j * nfast + i; - input_data[2 * local_idx] = re; + int local_idx = k * nmid * nfast + j * nfast + i; + input_data[2 * local_idx] = re; input_data[2 * local_idx + 1] = im; } } @@ -852,7 +851,7 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) END_HIDE_OUTPUT(); // Apply normalization - int total_size = nfast * nmid * nslow; + int total_size = nfast * nmid * nslow; FFT_SCALAR norm = 1.0 / static_cast(total_size); for (int i = 0; i < 2 * local_size; i++) { input_data[i] *= norm; @@ -860,8 +859,8 @@ TEST_F(FFT3DTest, RoundTrip_MPI_4proc_64x64x64) // Validate round-trip on local data FFTValidation::RoundTripValidator validator(original_data.data(), input_data, local_nfast, - local_nmid, local_nslow, ROUNDTRIP_TOLERANCE, - verbose); + local_nmid, local_nslow, ROUNDTRIP_TOLERANCE, + verbose); bool passed = validator.validate(); // Gather validation results from all ranks @@ -943,7 +942,7 @@ TEST_F(FFT3DTest, FFTW3_Threading) // Validate round-trip (threaded FFTW3 should give identical results) FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, - nslow, ROUNDTRIP_TOLERANCE, verbose); + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); EXPECT_TRUE(passed) << "FFTW3 threaded round-trip validation failed" @@ -1002,14 +1001,14 @@ TEST_F(FFT3DTest, MKL_Optimized) set_complex_linear(expected_data.data(), i, std::complex(1.0, 0.0)); } - FFTValidation::KnownAnswerValidator validator(delta_output.data(), expected_data.data(), - nfast, nmid, nslow, TOLERANCE, verbose); + FFTValidation::KnownAnswerValidator validator(delta_output.data(), expected_data.data(), nfast, + nmid, nslow, KNOWN_ANSWER_TOLERANCE, verbose); bool passed = validator.validate(); EXPECT_TRUE(passed) << "MKL delta function validation failed" << "\n Max error: " << validator.get_error_stats().max() - << "\n Tolerance: " << TOLERANCE; - EXPECT_LT(validator.get_error_stats().max(), TOLERANCE); + << "\n Tolerance: " << KNOWN_ANSWER_TOLERANCE; + EXPECT_LT(validator.get_error_stats().max(), KNOWN_ANSWER_TOLERANCE); } // ============================================================================ @@ -1048,8 +1047,8 @@ TEST_F(FFT3DTest, KISS_NonPowerOf2) int nsize = nfast * nmid * nslow; if (verbose) { - std::cout << " Testing size: " << size << "x" << size << "x" << size << " (N³=" - << nsize << ")" << std::endl; + std::cout << " Testing size: " << size << "x" << size << "x" << size + << " (N³=" << nsize << ")" << std::endl; } // Generate random complex data @@ -1078,11 +1077,12 @@ TEST_F(FFT3DTest, KISS_NonPowerOf2) // Validate round-trip FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, - nslow, ROUNDTRIP_TOLERANCE, verbose); + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); if (verbose || !passed) { - std::cout << " Size " << size << ": Max error = " << validator.get_error_stats().max() + std::cout << " Size " << size + << ": Max error = " << validator.get_error_stats().max() << " (tolerance = " << ROUNDTRIP_TOLERANCE << ")" << std::endl; std::cout << " Status: " << (passed ? "PASSED" : "FAILED") << std::endl; } @@ -1139,7 +1139,7 @@ TEST_F(FFT3DTest, HeFFTe_Distributed) // Validate round-trip FFTValidation::RoundTripValidator validator(original_data.data(), input_data, nfast, nmid, - nslow, ROUNDTRIP_TOLERANCE, verbose); + nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); EXPECT_TRUE(passed) << "HeFFTe round-trip validation failed" diff --git a/unittest/utils/test_fft3d_kokkos.cpp b/unittest/utils/test_fft3d_kokkos.cpp index e65ed371bf5..a1b41b669f4 100644 --- a/unittest/utils/test_fft3d_kokkos.cpp +++ b/unittest/utils/test_fft3d_kokkos.cpp @@ -39,7 +39,7 @@ #include "lammps.h" #include "../testing/core.h" -#include "../utils/fft_test_helpers.h" +#include "fft_test_helpers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -95,7 +95,7 @@ class FFT3DKokkosTest : public LAMMPSTest { // Initialize Kokkos if not already initialized if (!Kokkos::is_initialized()) { - int argc = 0; + int argc = 0; char **argv = nullptr; Kokkos::initialize(argc, argv); kokkos_initialized_here = true; @@ -104,9 +104,9 @@ class FFT3DKokkosTest : public LAMMPSTest { } // Initialize FFT-related members - fft = nullptr; + fft = nullptr; nfast = 0; - nmid = 0; + nmid = 0; nslow = 0; } @@ -124,11 +124,10 @@ class FFT3DKokkosTest : public LAMMPSTest { } // Helper: Create serial KOKKOS FFT3d object (no MPI decomposition) - template - void create_serial_fft(int nfast_in, int nmid_in, int nslow_in) + template void create_serial_fft(int nfast_in, int nmid_in, int nslow_in) { nfast = nfast_in; - nmid = nmid_in; + nmid = nmid_in; nslow = nslow_in; // Serial FFT: entire grid on one processor @@ -141,26 +140,25 @@ class FFT3DKokkosTest : public LAMMPSTest { int out_klo = 0, out_khi = nslow - 1; // FFT parameters - int scaled = 0; // No scaling - int permute = 0; // No permutation - int nbuf = 0; // Buffer size (output) - int usecollective = 0; // Use point-to-point communication - int usegpu = 0; // Let KOKKOS decide based on backend + int scaled = 0; // No scaling + int permute = 0; // No permutation + int nbuf = 0; // Buffer size (output) + int usecollective = 0; // Use point-to-point communication + int usegpu = 0; // Let KOKKOS decide based on backend // Create FFT3dKokkos object BEGIN_HIDE_OUTPUT(); - fft = new FFT3dKokkos( - lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, in_jlo, in_jhi, in_klo, - in_khi, out_ilo, out_ihi, out_jlo, out_jhi, out_klo, out_khi, scaled, permute, &nbuf, - usecollective, usegpu); + fft = new FFT3dKokkos(lmp, MPI_COMM_WORLD, nfast, nmid, nslow, in_ilo, in_ihi, + in_jlo, in_jhi, in_klo, in_khi, out_ilo, out_ihi, out_jlo, + out_jhi, out_klo, out_khi, scaled, permute, &nbuf, + usecollective, usegpu); END_HIDE_OUTPUT(); ASSERT_NE(fft, nullptr); } // Helper: Perform round-trip test (forward + backward FFT) - template - void run_roundtrip_test(int nfast_in, int nmid_in, int nslow_in) + template void run_roundtrip_test(int nfast_in, int nmid_in, int nslow_in) { create_serial_fft(nfast_in, nmid_in, nslow_in); @@ -186,7 +184,7 @@ class FFT3DKokkosTest : public LAMMPSTest { Kokkos::deep_copy(d_input, h_input); // Perform forward FFT - auto fft_ptr = static_cast*>(fft); + auto fft_ptr = static_cast *>(fft); fft_ptr->compute(d_input, d_output, FFT3dKokkos::FORWARD); // Perform backward FFT (in-place: output becomes input) @@ -201,13 +199,13 @@ class FFT3DKokkosTest : public LAMMPSTest { std::vector output_vec(2 * nsize); for (int i = 0; i < 2 * nsize; ++i) { - input_vec[i] = h_input(i); - output_vec[i] = h_output(i) / static_cast(nsize); // Apply normalization + input_vec[i] = h_input(i); + output_vec[i] = h_output(i) / static_cast(nsize); // Apply normalization } // Validate round-trip: output should equal input after normalization RoundTripValidator validator(input_vec.data(), output_vec.data(), nfast, nmid, nslow, - ROUNDTRIP_TOLERANCE, verbose); + ROUNDTRIP_TOLERANCE, verbose); bool valid = validator.validate(); if (verbose || !valid) { @@ -218,15 +216,14 @@ class FFT3DKokkosTest : public LAMMPSTest { } // Clean up FFT object with proper cast - delete static_cast*>(fft); + delete static_cast *>(fft); fft = nullptr; EXPECT_TRUE(valid) << "Round-trip test failed"; } // Helper: Perform known-answer test (delta function) - template - void run_delta_test(int nfast_in, int nmid_in, int nslow_in) + template void run_delta_test(int nfast_in, int nmid_in, int nslow_in) { create_serial_fft(nfast_in, nmid_in, nslow_in); @@ -247,7 +244,7 @@ class FFT3DKokkosTest : public LAMMPSTest { Kokkos::deep_copy(d_input, h_input); // Perform forward FFT - auto fft_ptr = static_cast*>(fft); + auto fft_ptr = static_cast *>(fft); fft_ptr->compute(d_input, d_output, FFT3dKokkos::FORWARD); // Copy result back to host @@ -279,14 +276,14 @@ class FFT3DKokkosTest : public LAMMPSTest { } // Clean up FFT object with proper cast - delete static_cast*>(fft); + delete static_cast *>(fft); fft = nullptr; EXPECT_TRUE(valid) << "Delta function test failed"; } // Member variables - void *fft; // Type-erased pointer (actual type is FFT3dKokkos*) + void *fft; // Type-erased pointer (actual type is FFT3dKokkos*) int nfast, nmid, nslow; bool kokkos_initialized_here; }; @@ -324,7 +321,6 @@ TEST_F(FFT3DKokkosTest, BackendDetection) TEST_F(FFT3DKokkosTest, RoundTrip_cuFFT_32x32x32) { - // Use LMPDeviceType which is Kokkos::Cuda when CUDA is enabled typedef Kokkos::Cuda DeviceType; run_roundtrip_test(32, 32, 32); @@ -333,7 +329,6 @@ TEST_F(FFT3DKokkosTest, RoundTrip_cuFFT_32x32x32) TEST_F(FFT3DKokkosTest, RoundTrip_cuFFT_64x64x64) { - typedef Kokkos::Cuda DeviceType; run_roundtrip_test(64, 64, 64); } @@ -341,12 +336,11 @@ TEST_F(FFT3DKokkosTest, RoundTrip_cuFFT_64x64x64) TEST_F(FFT3DKokkosTest, KnownAnswer_cuFFT_DeltaFunction) { - typedef Kokkos::Cuda DeviceType; run_delta_test(32, 32, 32); } -#endif // KOKKOS_ENABLE_CUDA && FFT_KOKKOS_CUFFT +#endif // KOKKOS_ENABLE_CUDA && FFT_KOKKOS_CUFFT // ============================================================================= // hipFFT Tests (AMD HIP Backend) @@ -357,7 +351,6 @@ TEST_F(FFT3DKokkosTest, KnownAnswer_cuFFT_DeltaFunction) TEST_F(FFT3DKokkosTest, RoundTrip_hipFFT_32x32x32) { - typedef Kokkos::HIP DeviceType; run_roundtrip_test(32, 32, 32); } @@ -365,7 +358,6 @@ TEST_F(FFT3DKokkosTest, RoundTrip_hipFFT_32x32x32) TEST_F(FFT3DKokkosTest, RoundTrip_hipFFT_64x64x64) { - typedef Kokkos::HIP DeviceType; run_roundtrip_test(64, 64, 64); } @@ -373,12 +365,11 @@ TEST_F(FFT3DKokkosTest, RoundTrip_hipFFT_64x64x64) TEST_F(FFT3DKokkosTest, KnownAnswer_hipFFT_DeltaFunction) { - typedef Kokkos::HIP DeviceType; run_delta_test(32, 32, 32); } -#endif // KOKKOS_ENABLE_HIP && FFT_KOKKOS_HIPFFT +#endif // KOKKOS_ENABLE_HIP && FFT_KOKKOS_HIPFFT // ============================================================================= // MKL_GPU Tests (Intel SYCL Backend) @@ -389,7 +380,6 @@ TEST_F(FFT3DKokkosTest, KnownAnswer_hipFFT_DeltaFunction) TEST_F(FFT3DKokkosTest, RoundTrip_MKL_GPU_32x32x32) { - typedef Kokkos::Experimental::SYCL DeviceType; run_roundtrip_test(32, 32, 32); } @@ -397,7 +387,6 @@ TEST_F(FFT3DKokkosTest, RoundTrip_MKL_GPU_32x32x32) TEST_F(FFT3DKokkosTest, RoundTrip_MKL_GPU_64x64x64) { - typedef Kokkos::Experimental::SYCL DeviceType; run_roundtrip_test(64, 64, 64); } @@ -405,12 +394,11 @@ TEST_F(FFT3DKokkosTest, RoundTrip_MKL_GPU_64x64x64) TEST_F(FFT3DKokkosTest, KnownAnswer_MKL_GPU_DeltaFunction) { - typedef Kokkos::Experimental::SYCL DeviceType; run_delta_test(32, 32, 32); } -#endif // KOKKOS_ENABLE_SYCL && FFT_KOKKOS_MKL_GPU +#endif // KOKKOS_ENABLE_SYCL && FFT_KOKKOS_MKL_GPU // ============================================================================= // CPU Backend Tests (Task 4.3) @@ -439,7 +427,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_OpenMP_32x32x32) // When OpenMP is enabled, LMPHostType == Kokkos::OpenMP run_roundtrip_test(32, 32, 32); } -#endif // KOKKOS_ENABLE_OPENMP +#endif // KOKKOS_ENABLE_OPENMP #if defined(KOKKOS_ENABLE_THREADS) TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_Threads_32x32x32) @@ -447,7 +435,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_Threads_32x32x32) // When Threads is enabled, LMPHostType == Kokkos::Threads run_roundtrip_test(32, 32, 32); } -#endif // KOKKOS_ENABLE_THREADS +#endif // KOKKOS_ENABLE_THREADS TEST_F(FFT3DKokkosTest, KnownAnswer_Kokkos_DeltaFunction) { @@ -484,7 +472,7 @@ TEST_F(FFT3DKokkosTest, KnownAnswer_Kokkos_Sine) Kokkos::deep_copy(d_input, h_input); // Perform forward FFT - auto fft_ptr = static_cast*>(fft); + auto fft_ptr = static_cast *>(fft); fft_ptr->compute(d_input, d_output, FFT3dKokkos::FORWARD); // Copy result back to host @@ -541,9 +529,8 @@ TEST_F(FFT3DKokkosTest, Threading_OpenMP_Concurrent) #else // Grid dimensions const int grid_size = 32; - const int nsize = grid_size * grid_size * grid_size; - const int num_ffts = 4; - + const int nsize = grid_size * grid_size * grid_size; + const int num_ffts = 4; // Create multiple FFT instances std::vector *> ffts; @@ -615,8 +602,8 @@ TEST_F(FFT3DKokkosTest, Threading_OpenMP_Concurrent) } // Validate round-trip - RoundTripValidator validator(original_buffers[n].data(), input_buffers[n].data(), - grid_size, grid_size, grid_size, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_buffers[n].data(), input_buffers[n].data(), grid_size, + grid_size, grid_size, ROUNDTRIP_TOLERANCE, verbose); if (!validator.validate()) { all_passed = false; std::cout << " FFT instance " << n << " FAILED" << std::endl; @@ -644,9 +631,8 @@ TEST_F(FFT3DKokkosTest, Threading_Threads_Concurrent) #else // Grid dimensions const int grid_size = 32; - const int nsize = grid_size * grid_size * grid_size; - const int num_ffts = 4; - + const int nsize = grid_size * grid_size * grid_size; + const int num_ffts = 4; // Create multiple FFT instances std::vector *> ffts; @@ -709,8 +695,8 @@ TEST_F(FFT3DKokkosTest, Threading_Threads_Concurrent) input_buffers[n][i] = h_in(i) * norm; } - RoundTripValidator validator(original_buffers[n].data(), input_buffers[n].data(), - grid_size, grid_size, grid_size, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_buffers[n].data(), input_buffers[n].data(), grid_size, + grid_size, grid_size, ROUNDTRIP_TOLERANCE, verbose); if (!validator.validate()) { all_passed = false; } @@ -734,7 +720,7 @@ TEST_F(FFT3DKokkosTest, Threading_Safety) // Test runs with any CPU backend (OpenMP, Threads, Serial) const int grid_size = 32; - const int nsize = grid_size * grid_size * grid_size; + const int nsize = grid_size * grid_size * grid_size; // Create FFT instance int in_ilo = 0, in_ihi = grid_size - 1; @@ -777,7 +763,7 @@ TEST_F(FFT3DKokkosTest, Threading_Safety) // Run multiple FFT operations to check for data corruption const int num_iterations = 10; - bool all_passed = true; + bool all_passed = true; for (int iter = 0; iter < num_iterations; iter++) { BEGIN_HIDE_OUTPUT(); @@ -792,8 +778,8 @@ TEST_F(FFT3DKokkosTest, Threading_Safety) input_data[i] = h_in(i) * norm; } - RoundTripValidator validator(original_data.data(), input_data.data(), - grid_size, grid_size, grid_size, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_data.data(), input_data.data(), grid_size, grid_size, + grid_size, ROUNDTRIP_TOLERANCE, verbose); if (!validator.validate()) { all_passed = false; std::cout << " Iteration " << iter << " FAILED" << std::endl; @@ -812,7 +798,7 @@ TEST_F(FFT3DKokkosTest, Threading_Safety) EXPECT_TRUE(all_passed) << "Thread safety validation failed"; } -#endif // KOKKOS_ENABLE_OPENMP || KOKKOS_ENABLE_THREADS +#endif // KOKKOS_ENABLE_OPENMP || KOKKOS_ENABLE_THREADS // ============================================================================= // TASK 4.6: MPI Tests (2 procs, 4 procs, GPU+MPI) @@ -839,7 +825,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) } // Grid dimensions - const int grid_size = 32; + const int grid_size = 32; const int nsize_global = grid_size * grid_size * grid_size; // Domain decomposition: split along slow (z) dimension @@ -851,10 +837,10 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) if (rank == 0) { in_klo = 0; - in_khi = grid_size / 2 - 1; // 0..15 + in_khi = grid_size / 2 - 1; // 0..15 } else { in_klo = grid_size / 2; - in_khi = grid_size - 1; // 16..31 + in_khi = grid_size - 1; // 16..31 } int out_ilo = in_ilo, out_ihi = in_ihi; @@ -863,7 +849,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) // Calculate local size int local_nslow = in_khi - in_klo + 1; - int local_size = grid_size * grid_size * local_nslow; + int local_size = grid_size * grid_size * local_nslow; // Allocate local data buffers std::vector input_data(2 * local_size); @@ -874,7 +860,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) for (int k = 0; k < local_nslow; k++) { for (int j = 0; j < grid_size; j++) { for (int i = 0; i < grid_size; i++) { - int global_k = in_klo + k; + int global_k = in_klo + k; int global_idx = global_k * grid_size * grid_size + j * grid_size + i; // Generate deterministic random data for this grid point @@ -883,10 +869,10 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) FFT_SCALAR re = dist(rng); FFT_SCALAR im = dist(rng); - int local_idx = k * grid_size * grid_size + j * grid_size + i; - input_data[2 * local_idx] = re; - input_data[2 * local_idx + 1] = im; - original_data[2 * local_idx] = re; + int local_idx = k * grid_size * grid_size + j * grid_size + i; + input_data[2 * local_idx] = re; + input_data[2 * local_idx + 1] = im; + original_data[2 * local_idx] = re; original_data[2 * local_idx + 1] = im; } } @@ -938,8 +924,8 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_2proc_32x32x32) } // Validate round-trip on local data - RoundTripValidator validator(original_data.data(), input_data.data(), - grid_size, grid_size, local_nslow, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_data.data(), input_data.data(), grid_size, grid_size, + local_nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); // Gather validation results from all ranks @@ -978,7 +964,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) } // Grid dimensions (larger for better load balancing) - const int grid_size = 64; + const int grid_size = 64; const int nsize_global = grid_size * grid_size * grid_size; // Domain decomposition: split along slow (z) dimension @@ -987,8 +973,8 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) int in_klo, in_khi; int slices_per_proc = grid_size / nprocs; - in_klo = rank * slices_per_proc; - in_khi = (rank + 1) * slices_per_proc - 1; + in_klo = rank * slices_per_proc; + in_khi = (rank + 1) * slices_per_proc - 1; int out_ilo = in_ilo, out_ihi = in_ihi; int out_jlo = in_jlo, out_jhi = in_jhi; @@ -996,7 +982,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) // Calculate local size int local_nslow = in_khi - in_klo + 1; - int local_size = grid_size * grid_size * local_nslow; + int local_size = grid_size * grid_size * local_nslow; // Allocate local data buffers std::vector input_data(2 * local_size); @@ -1006,7 +992,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) for (int k = 0; k < local_nslow; k++) { for (int j = 0; j < grid_size; j++) { for (int i = 0; i < grid_size; i++) { - int global_k = in_klo + k; + int global_k = in_klo + k; int global_idx = global_k * grid_size * grid_size + j * grid_size + i; // Generate deterministic random data for this grid point @@ -1015,10 +1001,10 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) FFT_SCALAR re = dist(rng); FFT_SCALAR im = dist(rng); - int local_idx = k * grid_size * grid_size + j * grid_size + i; - input_data[2 * local_idx] = re; - input_data[2 * local_idx + 1] = im; - original_data[2 * local_idx] = re; + int local_idx = k * grid_size * grid_size + j * grid_size + i; + input_data[2 * local_idx] = re; + input_data[2 * local_idx + 1] = im; + original_data[2 * local_idx] = re; original_data[2 * local_idx + 1] = im; } } @@ -1070,8 +1056,8 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_4proc_64x64x64) } // Validate round-trip on local data - RoundTripValidator validator(original_data.data(), input_data.data(), - grid_size, grid_size, local_nslow, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_data.data(), input_data.data(), grid_size, grid_size, + local_nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); // Gather validation results from all ranks @@ -1108,7 +1094,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) // Note: GPU tests are skipped in SetUp() - no safe GPU detection available // Grid dimensions - const int grid_size = 32; + const int grid_size = 32; const int nsize_global = grid_size * grid_size * grid_size; // Domain decomposition @@ -1129,7 +1115,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) int out_klo = in_klo, out_khi = in_khi; int local_nslow = in_khi - in_klo + 1; - int local_size = grid_size * grid_size * local_nslow; + int local_size = grid_size * grid_size * local_nslow; // Allocate local data buffers std::vector input_data(2 * local_size); @@ -1139,7 +1125,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) for (int k = 0; k < local_nslow; k++) { for (int j = 0; j < grid_size; j++) { for (int i = 0; i < grid_size; i++) { - int global_k = in_klo + k; + int global_k = in_klo + k; int global_idx = global_k * grid_size * grid_size + j * grid_size + i; // Generate deterministic random data for this grid point @@ -1148,10 +1134,10 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) FFT_SCALAR re = dist(rng); FFT_SCALAR im = dist(rng); - int local_idx = k * grid_size * grid_size + j * grid_size + i; - input_data[2 * local_idx] = re; - input_data[2 * local_idx + 1] = im; - original_data[2 * local_idx] = re; + int local_idx = k * grid_size * grid_size + j * grid_size + i; + input_data[2 * local_idx] = re; + input_data[2 * local_idx + 1] = im; + original_data[2 * local_idx] = re; original_data[2 * local_idx + 1] = im; } } @@ -1160,7 +1146,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) // FFT parameters (disable GPU-aware MPI for now) int scaled = 0, permute = 0, nbuf = 0; int usecollective = 0; - int usegpu_aware = 0; // Would check lmp->kokkos->gpu_aware_flag if KokkosLMP was complete + int usegpu_aware = 0; // Would check lmp->kokkos->gpu_aware_flag if KokkosLMP was complete // Create MPI+GPU FFT3dKokkos object BEGIN_HIDE_OUTPUT(); @@ -1204,8 +1190,8 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) } // Validate round-trip - RoundTripValidator validator(original_data.data(), input_data.data(), - grid_size, grid_size, local_nslow, ROUNDTRIP_TOLERANCE, verbose); + RoundTripValidator validator(original_data.data(), input_data.data(), grid_size, grid_size, + local_nslow, ROUNDTRIP_TOLERANCE, verbose); bool passed = validator.validate(); // Gather results @@ -1222,7 +1208,7 @@ TEST_F(FFT3DKokkosTest, RoundTrip_Kokkos_MPI_GPU_2proc) EXPECT_TRUE(all_passed) << "Round-trip validation failed on rank " << rank; } -#endif // LMP_KOKKOS +#endif // LMP_KOKKOS // ============================================================================= // Main From bf4177d106cdf179e856b0c0cb357472e539aea5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Nov 2025 15:53:44 -0700 Subject: [PATCH 240/604] Improve Kokkos support for hybrid atom_vec styles --- src/KOKKOS/atom_kokkos.cpp | 6 +- src/KOKKOS/atom_kokkos.h | 6 +- src/KOKKOS/atom_vec_angle_kokkos.cpp | 493 +--- src/KOKKOS/atom_vec_angle_kokkos.h | 59 +- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 373 +-- src/KOKKOS/atom_vec_atomic_kokkos.h | 34 +- src/KOKKOS/atom_vec_bond_kokkos.cpp | 469 +--- src/KOKKOS/atom_vec_bond_kokkos.h | 48 +- src/KOKKOS/atom_vec_charge_kokkos.cpp | 443 +--- src/KOKKOS/atom_vec_charge_kokkos.h | 39 +- src/KOKKOS/atom_vec_dipole_kokkos.cpp | 467 +--- src/KOKKOS/atom_vec_dipole_kokkos.h | 40 +- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 799 +------ src/KOKKOS/atom_vec_dpd_kokkos.h | 45 +- src/KOKKOS/atom_vec_full_kokkos.cpp | 644 +---- src/KOKKOS/atom_vec_full_kokkos.h | 77 +- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 79 +- src/KOKKOS/atom_vec_hybrid_kokkos.h | 50 +- src/KOKKOS/atom_vec_kokkos.cpp | 2740 +++++++++++++++++++--- src/KOKKOS/atom_vec_kokkos.h | 213 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 627 +---- src/KOKKOS/atom_vec_molecular_kokkos.h | 77 +- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 1529 +----------- src/KOKKOS/atom_vec_sphere_kokkos.h | 60 +- src/KOKKOS/atom_vec_spin_kokkos.cpp | 452 +--- src/KOKKOS/atom_vec_spin_kokkos.h | 45 +- src/KOKKOS/comm_kokkos.cpp | 212 +- src/KOKKOS/comm_tiled_kokkos.cpp | 5 - src/KOKKOS/dynamical_matrix_kokkos.cpp | 2 +- src/KOKKOS/fix_property_atom_kokkos.cpp | 6 +- src/KOKKOS/fix_property_atom_kokkos.h | 6 +- src/KOKKOS/memory_kokkos.h | 4 +- src/KOKKOS/mliap_data_kokkos.cpp | 4 +- src/KOKKOS/mliap_data_kokkos.h | 4 +- src/KOKKOS/neigh_bond_kokkos.h | 2 +- src/KOKKOS/third_order_kokkos.cpp | 2 +- src/KOKKOS/verlet_kokkos.cpp | 4 +- src/angle.h | 2 +- src/atom_masks.h | 112 +- src/bond.h | 2 +- src/compute.h | 2 +- src/dihedral.h | 2 +- src/fix.h | 2 +- src/improper.h | 2 +- src/kspace.h | 2 +- src/pair.h | 2 +- 46 files changed, 2909 insertions(+), 7384 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index ca50f382cb3..d9d85e2a3e7 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -152,7 +152,7 @@ void AtomKokkos::update_property_atom() /* ---------------------------------------------------------------------- */ -void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) +void AtomKokkos::sync(const ExecutionSpace space, uint64_t mask) { if ((space == Device || space == HostKK) && lmp->kokkos->auto_sync) { @@ -171,7 +171,7 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) +void AtomKokkos::modified(const ExecutionSpace space, uint64_t mask) { avecKK->modified(space, mask); for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask); @@ -184,7 +184,7 @@ void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomKokkos::sync_pinned(const ExecutionSpace space, unsigned int mask, int async_flag) +void AtomKokkos::sync_pinned(const ExecutionSpace space, uint64_t mask, int async_flag) { avecKK->sync_pinned(space, mask, async_flag); for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_pinned(space, mask, async_flag); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 02b757ce180..319992ec176 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -161,9 +161,9 @@ class AtomKokkos : public Atom { void init() override; void update_property_atom(); void allocate_type_arrays() override; - void sync(const ExecutionSpace space, unsigned int mask); - void modified(const ExecutionSpace space, unsigned int mask); - void sync_pinned(const ExecutionSpace space, unsigned int mask, int async_flag = 0); + void sync(const ExecutionSpace space, uint64_t mask); + void modified(const ExecutionSpace space, uint64_t mask); + void sync_pinned(const ExecutionSpace space, uint64_t mask, int async_flag = 0); void sort() override; int add_custom(const char *, int, int, int border = 0) override; void remove_custom(int, int, int) override; diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 6047996046f..2b6ed3a3d1e 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -33,6 +33,17 @@ AtomVecKokkos(lmp), AtomVecAngle(lmp) } +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::init() +{ + AtomVecAngle::init(); + + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA @@ -186,483 +197,7 @@ void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort &Sorter /* ---------------------------------------------------------------------- */ -template -struct AtomVecAngleKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecAngleKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecAngleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAngleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecAngleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAngleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecAngleKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecAngleKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAngleKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecAngleKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 to store buffer length - - size_exchange = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecAngleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAngleKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecAngleKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecAngleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAngleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecAngleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -744,7 +279,7 @@ void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -837,7 +372,7 @@ void AtomVecAngleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, in /* ---------------------------------------------------------------------- */ -void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecAngleKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 73fb5770a9c..9b28cf73fc0 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -31,71 +31,20 @@ namespace LAMMPS_NS { class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { public: AtomVecAngleKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: tagint *molecule; tagint **special; tagint **bond_atom; tagint **angle_atom1,**angle_atom2,**angle_atom3; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 20625a7e461..15176a88d5e 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -29,7 +29,17 @@ using namespace LAMMPS_NS; AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecAtomic(lmp) { - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecAtomicKokkos::init() +{ + AtomVecAtomic::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -119,362 +129,7 @@ void AtomVecAtomicKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecAtomicKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - double _dx,_dy,_dz; - - AtomVecAtomicKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecAtomicKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAtomicKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecAtomicKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecAtomicKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*6; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - int _first; - - - AtomVecAtomicKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecAtomicKokkos_UnpackBorder f(buf.view_host(),h_x,h_tag,h_type,h_mask,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecAtomicKokkos_UnpackBorder f(buf.view_device(),d_x,d_tag,d_type,d_mask,first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecAtomicKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) -{ - size_exchange = 11; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecAtomicKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecAtomicKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()),_dim(dim), - _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecAtomicKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecAtomicKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -505,7 +160,7 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -542,7 +197,7 @@ void AtomVecAtomicKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i /* ---------------------------------------------------------------------- */ -void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecAtomicKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 172c84cf0d4..43756c238c1 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -32,40 +32,14 @@ namespace LAMMPS_NS { class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { public: AtomVecAtomicKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; - - protected: - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; }; } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 91a3ff4a4a1..5e8a924c9e9 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -29,7 +29,17 @@ using namespace LAMMPS_NS; AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecBond(lmp) { - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::init() +{ + AtomVecBond::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -151,458 +161,7 @@ void AtomVecBondKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecBondKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecBondKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecBondKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecBondKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecBondKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecBondKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecBondKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecBondKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecBondKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecBondKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 to store buffer length - - size_exchange = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecBondKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecBondKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecBondKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecBondKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecBondKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -663,7 +222,7 @@ void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -732,7 +291,7 @@ void AtomVecBondKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecBondKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 1f0582e9bbc..7adea656dd2 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -31,59 +31,19 @@ namespace LAMMPS_NS { class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { public: AtomVecBondKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: tagint *molecule; tagint **special; tagint **bond_atom; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; }; } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 25eb08b76f2..6930cc15a99 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -29,7 +29,17 @@ using namespace LAMMPS_NS; AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecCharge(lmp), q(nullptr) { - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::init() +{ + AtomVecCharge::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -126,432 +136,7 @@ void AtomVecChargeKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecChargeKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecChargeKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - double _dx,_dy,_dz; - - AtomVecChargeKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecChargeKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecChargeKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecChargeKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecChargeKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - int _first; - - - AtomVecChargeKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - if (first+n >= nmax) { - grow(first+n+100); - } - if (space==Host) { - struct AtomVecChargeKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecChargeKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,first); - Kokkos::parallel_for(n,f); - } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecChargeKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _q[i]; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 12; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecChargeKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecChargeKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecChargeKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()),_dim(dim), - _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _q[i] = _buf(myrecv,11); - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecChargeKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecChargeKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -585,7 +170,7 @@ void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecChargeKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -619,7 +204,7 @@ void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecChargeKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecChargeKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index a9ee88fede7..ee2f5c19f8f 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -32,48 +32,17 @@ namespace LAMMPS_NS { class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { public: AtomVecChargeKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - - HAT::t_kkfloat_1d h_q; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index c476139e93b..5e1d29e6258 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -29,6 +29,17 @@ using namespace LAMMPS_NS; AtomVecDipoleKokkos::AtomVecDipoleKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecDipole(lmp), q(nullptr) {} +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::init() +{ + AtomVecDipole::init(); + + set_atom_masks(); +} + /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by DELTA @@ -128,457 +139,7 @@ void AtomVecDipoleKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecDipoleKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDipoleKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_4 &mu, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _mu(mu.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t elements = 7; // size_forward - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _mu(j,0); - _buf(i,4) = _mu(j,1); - _buf(i,5) = _mu(j,2); - _buf(i,6) = _mu(j,3); - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_kkfloat_1d_4_randomread _mu; - double _dx,_dy,_dz; - - AtomVecDipoleKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const typename AT::t_kkfloat_1d_4_randomread &mu, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = _mu(j,0); - _buf(i,8) = _mu(j,1); - _buf(i,9) = _mu(j,2); - _buf(i,10) = _mu(j,3); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = _mu(j,0); - _buf(i,8) = _mu(j,1); - _buf(i,9) = _mu(j,2); - _buf(i,10) = _mu(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecDipoleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDipoleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecDipoleKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDipoleKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - typename AT::t_kkfloat_1d_4 _mu; - int _first; - - - AtomVecDipoleKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - typename AT::t_kkfloat_1d_4 &mu, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),_first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - _mu(i+_first,0) = _buf(i,7); - _mu(i+_first,1) = _buf(i,8); - _mu(i+_first,2) = _buf(i,9); - _mu(i+_first,3) = _buf(i,10); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDipoleKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MU_MASK); - if (space==Host) { - struct AtomVecDipoleKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_mu,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDipoleKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_mu,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_kkfloat_1d_4_randomread _mu; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_kkfloat_1d_4 _muw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecDipoleKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _muw(atom->k_mu.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _q[i]; - _buf(mysend,12) = _mu(i,0); - _buf(mysend,13) = _mu(i,1); - _buf(mysend,14) = _mu(i,2); - _buf(mysend,15) = _mu(i,3); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - _muw(i,0) = _mu(j,0); - _muw(i,1) = _mu(j,1); - _muw(i,2) = _mu(j,2); - _muw(i,3) = _mu(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDipoleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 16; // # of elements packed - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/12) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecDipoleKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDipoleKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_kkfloat_1d_4 _mu; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecDipoleKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _mu(atom->k_mu.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _q[i] = _buf(myrecv,11); - _mu(i,0) = _buf(myrecv,12); - _mu(i,1) = _buf(myrecv,13); - _mu(i,2) = _buf(myrecv,14); - _mu(i,3) = _buf(myrecv,15); - } - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecDipoleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecDipoleKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecDipoleKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -615,7 +176,7 @@ void AtomVecDipoleKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecDipoleKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -652,7 +213,7 @@ void AtomVecDipoleKokkos::modified(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDipoleKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecDipoleKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index 0a4306d375e..0c0972d4763 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -32,49 +32,17 @@ namespace LAMMPS_NS { class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { public: AtomVecDipoleKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - HAT::t_kkfloat_1d h_q; - DAT::t_kkfloat_1d_4 d_mu; - HAT::t_kkfloat_1d_4 h_mu; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index a6725a006d0..be7a5d4f697 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -31,7 +31,17 @@ using namespace LAMMPS_NS; AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecDPD(lmp) { - no_comm_vel_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::init() +{ + AtomVecDPD::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -162,788 +172,7 @@ void AtomVecDPDKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecDPDKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDPDKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _buf(i,3) = _dpdTheta(j); - _buf(i,4) = _uCond(j); - _buf(i,5) = _uMech(j); - _buf(i,6) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackCommSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - int _nfirst; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecDPDKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _dpdTheta(i+_nfirst) = _dpdTheta(j); - _uCond(i+_nfirst) = _uCond(j); - _uMech(i+_nfirst) = _uMech(j); - _uChem(i+_nfirst) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename AT::t_double_2d_lr_const _buf; - int _first; - - AtomVecDPDKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &dpdTheta, - const typename DAT::ttransform_kkfloat_1d &uCond, - const typename DAT::ttransform_kkfloat_1d &uMech, - const typename DAT::ttransform_kkfloat_1d &uChem, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _dpdTheta(i+_first) = _buf(i,3); - _uCond(i+_first) = _buf(i,4); - _uMech(i+_first) = _buf(i,5); - _uChem(i+_first) = _buf(i,6); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(HostKK,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - } else { - atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - double _dx,_dy,_dz; - - AtomVecDPDKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &dpdTheta, - const typename AT::t_kkfloat_1d &uCond, - const typename AT::t_kkfloat_1d &uMech, - const typename AT::t_kkfloat_1d &uChem, - const typename AT::t_kkfloat_1d &uCG, - const typename AT::t_kkfloat_1d &uCGnew, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCG), - _uCGnew(uCGnew), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _dpdTheta(j); - _buf(i,7) = _uCond(j); - _buf(i,8) = _uMech(j); - _buf(i,9) = _uChem(j); - _buf(i,10) = _uCG(j); - _buf(i,11) = _uCGnew(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*6; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - int _first; - - - AtomVecDPDKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &dpdTheta, - const typename AT::t_kkfloat_1d &uCond, - const typename AT::t_kkfloat_1d &uMech, - const typename AT::t_kkfloat_1d &uChem, - const typename AT::t_kkfloat_1d &uCG, - const typename AT::t_kkfloat_1d &uCGnew, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCG), - _uCGnew(uCGnew), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _dpdTheta(i+_first) = _buf(i,6); - _uCond(i+_first) = _buf(i,7); - _uMech(i+_first) = _buf(i,8); - _uChem(i+_first) = _buf(i,9); - _uCG(i+_first) = _buf(i,10); - _uCGnew(i+_first) = _buf(i,11); -// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); - if (space==Host) { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecDPDKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _dpdTheta(atom->k_dpdTheta.view()), - _uCond(atom->k_uCond.view()), - _uMech(atom->k_uMech.view()), - _uChem(atom->k_uChem.view()), - _uCG(atom->k_uCG.view()), - _uCGnew(atom->k_uCGnew.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _dpdThetaw(atom->k_dpdTheta.view()), - _uCondw(atom->k_uCond.view()), - _uMechw(atom->k_uMech.view()), - _uChemw(atom->k_uChem.view()), - _uCGw(atom->k_uCG.view()), - _uCGneww(atom->k_uCGnew.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = _tag[i]; - _buf(mysend,8) = _type[i]; - _buf(mysend,9) = _mask[i]; - _buf(mysend,10) = _image[i]; - _buf(mysend,11) = _dpdTheta[i]; - _buf(mysend,12) = _uCond[i]; - _buf(mysend,13) = _uMech[i]; - _buf(mysend,14) = _uChem[i]; - _buf(mysend,15) = _uCG[i]; - _buf(mysend,16) = _uCGnew[i]; - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - _dpdThetaw[i] = _dpdTheta(j); - _uCondw[i] = _uCond(j); - _uMechw[i] = _uMech(j); - _uChemw[i] = _uChem(j); - _uCGw[i] = _uCG(j); - _uCGneww[i] = _uCGnew(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space) -{ - size_exchange = 17; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | - UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | - DVECTOR_MASK); - if (space == HostKK) { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } else { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } - return nsend*size_exchange; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _dpdTheta; - typename AT::t_kkfloat_1d _uCond; - typename AT::t_kkfloat_1d _uMech; - typename AT::t_kkfloat_1d _uChem; - typename AT::t_kkfloat_1d _uCG; - typename AT::t_kkfloat_1d _uCGnew; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecDPDKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = _buf(myrecv,7); - _type[i] = _buf(myrecv,8); - _mask[i] = _buf(myrecv,9); - _image[i] = _buf(myrecv,10); - _dpdTheta[i] = _buf(myrecv,11); - _uCond[i] = _buf(myrecv,12); - _uMech[i] = _buf(myrecv,13); - _uChem[i] = _buf(myrecv,14); - _uCG[i] = _buf(myrecv,15); - _uCGnew[i] = _buf(myrecv,16); - } - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - - atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | - UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | - DVECTOR_MASK); - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecDPDKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -998,7 +227,7 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1067,7 +296,7 @@ void AtomVecDPDKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecDPDKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index 42caa92d0bc..e522c0c6ce6 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -32,53 +32,16 @@ namespace LAMMPS_NS { class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { public: AtomVecDPDKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; double *duChem; - - protected: - DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; - HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index f9d72042cd1..e1e5de88f0d 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -29,7 +29,16 @@ using namespace LAMMPS_NS; AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecFull(lmp) { - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::init() +{ + AtomVecFull::init(); + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -268,634 +277,7 @@ void AtomVecFullKokkos::sort_kokkos(Kokkos::BinSort &Sorter) /* ---------------------------------------------------------------------- */ -template -struct AtomVecFullKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d _q; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecFullKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &q, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _q(j); - _buf(i,7) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecFullKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecFullKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecFullKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecFullKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _q; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecFullKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d &q, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _q(i+_first) = _buf(i,6); - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,7)).i; - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); - - while (first+n >= nmax) grow(0); - - if (space==Host) { - struct AtomVecFullKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecFullKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _q; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _qw; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecFullKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _qw(atom->k_q.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = _q(i); - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; - } - - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _qw(i) = _q(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _num_dihedralw(i) = _num_dihedral(j); - for (k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - _num_improperw(i) = _num_improper(j); - for (k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 charge - // 1 to store buffer length - - size_exchange = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom+5*atom->improper_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecFullKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecFullKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _q; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d _num_dihedral; - typename AT::t_int_2d _dihedral_type; - typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d _num_improper; - typename AT::t_int_2d _improper_type; - typename AT::t_tagint_2d _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecFullKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _q(atom->k_q.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - _q(i) = _buf(myrecv,m++); - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecFullKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecFullKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -1028,7 +410,7 @@ void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1179,7 +561,7 @@ void AtomVecFullKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int /* ---------------------------------------------------------------------- */ -void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecFullKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index f7a552beb3c..df7133638e5 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -31,28 +31,14 @@ namespace LAMMPS_NS { class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { public: AtomVecFullKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: double *q; @@ -62,61 +48,6 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { tagint **angle_atom1,**angle_atom2,**angle_atom3; tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d d_q; - HAT::t_kkfloat_1d h_q; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; - - DAT::t_int_1d d_num_dihedral; - DAT::t_int_2d d_dihedral_type; - DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, - d_dihedral_atom3,d_dihedral_atom4; - DAT::t_int_1d d_num_improper; - DAT::t_int_2d d_improper_type; - DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, - d_improper_atom3,d_improper_atom4; - - HAT::t_int_1d h_num_dihedral; - HAT::t_int_2d h_dihedral_type; - HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, - h_dihedral_atom3,h_dihedral_atom4; - HAT::t_int_1d h_num_improper; - HAT::t_int_2d h_improper_type; - HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, - h_improper_atom3,h_improper_atom4; }; } diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 9bd5a9b1650..6cfe5660862 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -15,7 +15,10 @@ #include "atom_vec_hybrid_kokkos.h" #include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain.h" #include "error.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -24,8 +27,17 @@ using namespace LAMMPS_NS; AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecHybrid(lmp) { - no_comm_vel_flag = 1; - no_border_vel_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::init() +{ + AtomVecHybrid::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- */ @@ -54,82 +66,25 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort &Sorte (dynamic_cast(styles[k]))->sort_kokkos(Sorter); } -/* ---------------------------------------------------------------------- */ - -int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_1d &/*k_sendlist*/, - const DAT::tdual_double_2d_lr &/*buf*/, - const int &/*pbc_flag*/, const int /*pbc*/[]) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; -} - -void AtomVecHybridKokkos::unpack_comm_kokkos(const int &/*n*/, const int &/*nfirst*/, - const DAT::tdual_double_2d_lr &/*buf*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); -} - -int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_1d &/*list*/, - const int /*nfirst*/, - const int &/*pbc_flag*/, const int pbc[]) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; -} - -int AtomVecHybridKokkos::pack_border_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_double_2d_lr /*buf*/, - int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; -} - -void AtomVecHybridKokkos::unpack_border_kokkos(const int &/*n*/, const int &/*nfirst*/, - const DAT::tdual_double_2d_lr &/*buf*/, - ExecutionSpace /*space*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); -} - -int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_double_2d_lr &/*buf*/, - DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_int_1d /*k_copylist*/, - ExecutionSpace /*space*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; -} - -int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr & /*k_buf*/, int /*nrecv*/, - int /*nlocal*/, int /*dim*/, double /*lo*/, - double /*hi*/, ExecutionSpace /*space*/, - DAT::tdual_int_1d &/*k_indices*/) -{ - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); - return 0; -} - // TODO: move dynamic_cast into init /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::sync(ExecutionSpace space, unsigned int h_mask) +void AtomVecHybridKokkos::sync(ExecutionSpace space, uint64_t h_mask) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->sync(space,h_mask); } /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::sync_pinned(ExecutionSpace space, unsigned int h_mask, int async_flag) +void AtomVecHybridKokkos::sync_pinned(ExecutionSpace space, uint64_t h_mask, int async_flag) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->sync_pinned(space,h_mask,async_flag); } /* ---------------------------------------------------------------------- */ -void AtomVecHybridKokkos::modified(ExecutionSpace space, unsigned int h_mask) +void AtomVecHybridKokkos::modified(ExecutionSpace space, uint64_t h_mask) { for (int k = 0; k < nstyles; k++) (dynamic_cast(styles[k]))->modified(space,h_mask); } diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 5f4eeb02119..dd4691f0896 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -32,55 +32,13 @@ namespace LAMMPS_NS { class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { public: AtomVecHybridKokkos(class LAMMPS *); + void init() override; void grow(int) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; - - private: - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_kkfloat_1d_3 d_omega, d_angmom; - HAT::t_kkfloat_1d_3 h_omega, h_angmom; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 63e912b4915..f065c6e11ab 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -33,12 +33,11 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) kokkosable = 1; buffer = nullptr; buffer_size = 0; - - no_comm_vel_flag = 0; - no_border_vel_flag = 1; - unpack_exchange_indices_flag = 0; size_exchange = 0; + datamask_grow = datamask_comm = datamask_comm_vel = datamask_reverse = + datamask_border = datamask_border_vel = datamask_exchange = EMPTY_MASK; + k_count = DAT::tdual_int_1d("atom:k_count",1); atomKK = (AtomKokkos *) atom; } @@ -54,51 +53,94 @@ AtomVecKokkos::~AtomVecKokkos() /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackComm { typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + uint64_t _datamask; AtomVecKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); + const int j = _list(i); + int m = 0; + if constexpr (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + } + } + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); } + } } }; @@ -114,57 +156,113 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackComm f(atomKK->k_x,buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackComm f(atomKK,buf,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } } @@ -174,50 +272,89 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackCommSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; int _nfirst; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; + uint64_t _datamask; AtomVecKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()),_nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + const AtomKokkos* atomKK, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const double &xprd, const double &yprd, const double &zprd, + const double &xy, const double &xz, const double &yz, const int* const pbc, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _nfirst(nfirst),_list(list.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + const int j = _list(i); + if constexpr (PBC_FLAG == 0) { + _x(i+_nfirst,0) = _x(j,0); + _x(i+_nfirst,1) = _x(j,1); + _x(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _x(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _x(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _x(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } + _x(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _x(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _x(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & MU_MASK) { + _mu(i+_nfirst,0) = _mu(j,0); + _mu(i+_nfirst,1) = _mu(j,1); + _mu(i+_nfirst,2) = _mu(j,2); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _sp(i+_nfirst,0) = _sp(j,0); + _sp(i+_nfirst,1) = _sp(j,1); + _sp(i+_nfirst,2) = _sp(j,2); + _sp(i+_nfirst,3) = _sp(j,3); } + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_nfirst) = _dpdTheta(j); + _uCond(i+_nfirst) = _uCond(j); + _uMech(i+_nfirst) = _uMech(j); + _uChem(i+_nfirst) = _uChem(j); + } + } } }; @@ -225,77 +362,137 @@ struct AtomVecKokkos_PackCommSelf { int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst, const int &pbc_flag, const int* const pbc) { + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (pbc_flag) { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } else { if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelf f(atomKK->k_x,nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelf f(atomKK,nfirst,list, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,datamask_comm); + Kokkos::parallel_for(n,f); + } } } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } - /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackCommSelfFused { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_int_2d_lr_const _list; typename AT::t_int_2d_const _pbc; typename AT::t_int_1d_const _pbc_flag; @@ -303,9 +500,10 @@ struct AtomVecKokkos_PackCommSelfFused { typename AT::t_int_1d_const _sendnum_scan; typename AT::t_int_1d_const _g2l; double _xprd,_yprd,_zprd,_xy,_xz,_yz; + uint64_t _datamask; AtomVecKokkos_PackCommSelfFused( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, + const AtomKokkos* atomKK, const typename DAT::tdual_int_2d_lr &list, const typename DAT::tdual_int_2d &pbc, const typename DAT::tdual_int_1d &pbc_flag, @@ -313,8 +511,15 @@ struct AtomVecKokkos_PackCommSelfFused { const typename DAT::tdual_int_1d &sendnum_scan, const typename DAT::tdual_int_1d &g2l, const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz): - _x(x.view()),_xw(x.view()), + const double &xy, const double &xz, const double &yz, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _pbc(pbc.view()), _pbc_flag(pbc_flag.view()), @@ -322,7 +527,7 @@ struct AtomVecKokkos_PackCommSelfFused { _sendnum_scan(sendnum_scan.view()), _g2l(g2l.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) {}; + _xy(xy),_xz(xz),_yz(yz),_datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& ii) const { @@ -341,18 +546,47 @@ struct AtomVecKokkos_PackCommSelfFused { j = _g2l(j-nlocal); if (_pbc_flag(ii) == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + _x(i+_nfirst,0) = _x(j,0); + _x(i+_nfirst,1) = _x(j,1); + _x(i+_nfirst,2) = _x(j,2); } else { if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _x(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _x(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + _x(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _x(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _x(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + } + } + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & MU_MASK) { + _mu(i+_nfirst,0) = _mu(j,0); + _mu(i+_nfirst,1) = _mu(j,1); + _mu(i+_nfirst,2) = _mu(j,2); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _sp(i+_nfirst,0) = _sp(j,0); + _sp(i+_nfirst,1) = _sp(j,1); + _sp(i+_nfirst,2) = _sp(j,2); + _sp(i+_nfirst,3) = _sp(j,3); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_nfirst) = _dpdTheta(j); + _uCond(i+_nfirst) = _uCond(j); + _uMech(i+_nfirst) = _uMech(j); + _uChem(i+_nfirst) = _uChem(j); } } } @@ -364,64 +598,134 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, const DAT::tdual_int_1d &g2l) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } - atomKK->modified(HostKK,X_MASK); + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); + atomKK->sync(Device,datamask_comm); if (domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz); - Kokkos::parallel_for(n,f); + if (comm_x_only) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,datamask_comm); + Kokkos::parallel_for(n,f); + } } - atomKK->modified(Device,X_MASK); + atomKK->modified(Device,datamask_comm); } - return n*3; + return n*size_forward; } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackComm { typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; + uint64_t _datamask; AtomVecKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_x(x.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _first(first),_datamask(datamask) { + const int size_forward = atomKK->avecKK->size_forward; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/size_forward; + const size_t elements = size_forward; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + } + } } }; @@ -430,15 +734,25 @@ struct AtomVecKokkos_UnpackComm { void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK); - struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK); + atomKK->sync(HostKK,datamask_comm); + if (comm_x_only) { + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } + atomKK->modified(HostKK,datamask_comm); } else { - atomKK->sync(Device,X_MASK); - struct AtomVecKokkos_UnpackComm f(atomKK->k_x,buf,first); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK); + atomKK->sync(Device,datamask_comm); + if (comm_x_only) { + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackComm f(atomKK,buf,first,datamask_comm); + Kokkos::parallel_for(n,f); + } + atomKK->modified(Device,datamask_comm); } } @@ -450,34 +764,44 @@ struct AtomVecKokkos_PackCommVel { typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_3 _v; + typename AT::t_int_1d_randomread _mask; + typename AT::t_kkfloat_1d_3_randomread _v; + typename AT::t_kkfloat_1d_4_randomread _mu; + typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_3_randomread _omega; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_um _buf; typename AT::t_int_1d_const _list; double _xprd,_yprd,_zprd,_xy,_xz,_yz; double _pbc[6]; double _h_rate[6]; const int _deform_vremap; + uint64_t _datamask; AtomVecKokkos_PackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_int_1d &mask, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, const typename DAT::tdual_int_1d &list, const double &xprd, const double &yprd, const double &zprd, const double &xy, const double &xz, const double &yz, const int* const pbc, const double * const h_rate, - const int &deform_vremap): - _x(x.view()), - _mask(mask.view()), - _v(v.view()), + const int &deform_vremap, + const uint64_t &datamask): + _x(atomKK->k_x.view()), + _mask(atomKK->k_mask.view()), + _v(atomKK->k_v.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), _list(list.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap) + _deform_vremap(deform_vremap), + _datamask(datamask) { - const size_t elements = 6; + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; @@ -488,47 +812,82 @@ struct AtomVecKokkos_PackCommVel { KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + if constexpr (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - - if (DEFORM_VREMAP == 0) { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,m++) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,m++) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,m++) = _x(j,2) + _pbc[2]*_zprd; + } + + if constexpr (DEFORM_VREMAP == 0) { + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } else { if (_mask(i) & _deform_vremap) { - _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + _buf(i,m++) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,m++) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,m++) = _v(j,2) + _pbc[2]*_h_rate[2]; } else { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); } } } - } -}; -/* ---------------------------------------------------------------------- */ + // DIPOLE package -int AtomVecKokkos::pack_comm_vel_kokkos( + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + // SPHERE package + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_vel_kokkos( const int &n, const DAT::tdual_int_1d &list, const DAT::tdual_double_2d_lr &buf, @@ -536,162 +895,211 @@ int AtomVecKokkos::pack_comm_vel_kokkos( const int* const pbc) { if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); + atomKK->sync(HostKK,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { - atomKK->sync(Device,X_MASK|V_MASK); + atomKK->sync(Device,datamask_comm_vel); if (pbc_flag) { if (deform_vremap) { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } else { if (domain->triclinic) { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } else { struct AtomVecKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_v, + atomKK, buf,list, domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap, + datamask_comm_vel); Kokkos::parallel_for(n,f); } } } - return n*6; + return n*(size_forward + size_velocity); } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnpackCommVel { typedef DeviceType device_type; typedef ArrayTypes AT; typename AT::t_kkfloat_1d_3_lr _x; typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem; typename AT::t_double_2d_lr_const _buf; int _first; + uint64_t _datamask; AtomVecKokkos_UnpackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_3 &v, + const AtomKokkos* atomKK, const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _v(v.view()), - _first(first) + const int &first, const uint64_t &datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _first(first),_datamask(datamask) { - const size_t elements = 6; + const size_t elements = atomKK->avecKK->size_forward + atomKK->avecKK->size_velocity; const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _v(i+_first,0) = _buf(i,3); - _v(i+_first,1) = _buf(i,4); - _v(i+_first,2) = _buf(i,5); + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + } + + // SPIN package + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + // SPHERE package + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + } + } } }; @@ -699,45 +1107,86 @@ struct AtomVecKokkos_UnpackCommVel { void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { + if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|V_MASK); - struct AtomVecKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,X_MASK|V_MASK); + atomKK->sync(HostKK,datamask_comm_vel); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } + atomKK->modified(HostKK,datamask_comm_vel); } else { - atomKK->sync(Device,X_MASK|V_MASK); - struct AtomVecKokkos_UnpackCommVel f(atomKK->k_x,atomKK->k_v,buf,first); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,X_MASK|V_MASK); + atomKK->sync(Device,datamask_comm_vel); + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackCommVel f(atomKK,buf,first,datamask_comm_vel); + Kokkos::parallel_for(n,f); + } + atomKK->modified(Device,datamask_comm_vel); } } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_PackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; + typename AT::t_kkacc_1d_3_randomread _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3_randomread _torque; typename AT::t_double_2d_lr _buf; int _first; + uint64_t _datamask; AtomVecKokkos_PackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, - const typename DAT::tdual_double_2d_lr &buf, - const int& first):_f(f.view()), - _first(first) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const int &first, const uint64_t &datamask): + _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _first(first),_datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - _buf(i,0) = _f(i+_first,0); - _buf(i,1) = _f(i+_first,1); - _buf(i,2) = _f(i+_first,2); + int m = 0; + _buf(i,m++) = _f(i+_first,0); + _buf(i,m++) = _f(i+_first,1); + _buf(i,m++) = _f(i+_first,2); + + if constexpr (!DEFAULT) { + + // DIPLE package + + if (_datamask & TORQUE_MASK) { + _buf(i,m++) = _torque(i+_first,0); + _buf(i,m++) = _torque(i+_first,1); + _buf(i,m++) = _torque(i+_first,2); + } + + // SPIN package + + if (_datamask & FM_MASK) { + _buf(i,m++) = _fm(i+_first,0); + _buf(i,m++) = _fm(i+_first,1); + _buf(i,m++) = _fm(i+_first,2); + + _buf(i,m++) = _fm_long(i+_first,0); + _buf(i,m++) = _fm_long(i+_first,1); + _buf(i,m++) = _fm_long(i+_first,2); + } + } } }; @@ -746,13 +1195,23 @@ struct AtomVecKokkos_PackReverse { int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, const DAT::tdual_double_2d_lr &buf) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); - Kokkos::parallel_for(n,f); + atomKK->sync(HostKK,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_PackReverse f(atomKK->k_f,buf,first); - Kokkos::parallel_for(n,f); + atomKK->sync(Device,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackReverse f(atomKK,buf,first,datamask_reverse); + Kokkos::parallel_for(n,f); + } } return n*size_reverse; @@ -760,29 +1219,58 @@ int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first, /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnPackReverseSelf { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3_randomread _f; - typename AT::t_kkacc_1d_3 _fw; - int _nfirst; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3 _torque; typename AT::t_int_1d_const _list; + int _nfirst; + uint64_t _datamask; AtomVecKokkos_UnPackReverseSelf( - const typename DAT::ttransform_kkacc_1d_3 &f, - const int &nfirst, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_fw(f.view()),_nfirst(nfirst),_list(list.view()) { - }; + const AtomKokkos* atomKK, + const int &nfirst, + const typename DAT::tdual_int_1d &list, + const uint64_t &datamask): + _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _nfirst(nfirst),_list(list.view()), + _datamask(datamask) {}; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { const int j = _list(i); - _fw(j,0) += _f(i+_nfirst,0); - _fw(j,1) += _f(i+_nfirst,1); - _fw(j,2) += _f(i+_nfirst,2); + _f(j,0) += _f(i+_nfirst,0); + _f(j,1) += _f(i+_nfirst,1); + _f(j,2) += _f(i+_nfirst,2); + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _torque(i+_nfirst,0); + _torque(j,1) += _torque(i+_nfirst,1); + _torque(j,2) += _torque(i+_nfirst,2); + } + + // SPIN package + + if (_datamask & FM_MASK) { + _fm(j,0) += _fm(i+_nfirst,0); + _fm(j,1) += _fm(i+_nfirst,1); + _fm(j,2) += _fm(i+_nfirst,2); + + _fm_long(j,0) += _fm_long(i+_nfirst,0); + _fm_long(j,1) += _fm_long(i+_nfirst,1); + _fm_long(j,2) += _fm_long(i+_nfirst,2); + } + } } }; @@ -791,47 +1279,89 @@ struct AtomVecKokkos_UnPackReverseSelf { int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, const int nfirst) { if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); + atomKK->sync(HostKK,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } + atomKK->modified(HostKK,datamask_reverse); } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_UnPackReverseSelf f(atomKK->k_f,nfirst,list); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); + atomKK->sync(Device,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverseSelf f(atomKK,nfirst,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } + atomKK->modified(Device,datamask_reverse); } - return n*3; + return n*size_reverse; } /* ---------------------------------------------------------------------- */ -template +template struct AtomVecKokkos_UnPackReverse { typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_kkacc_1d_3 _f; + typename AT::t_kkacc_1d_3 _f,_fm,_fm_long; + typename AT::t_kkacc_1d_3 _torque; typename AT::t_double_2d_lr_const _buf; typename AT::t_int_1d_const _list; + uint64_t _datamask; AtomVecKokkos_UnPackReverse( - const typename DAT::ttransform_kkacc_1d_3 &f, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list): - _f(f.view()),_list(list.view()) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 3; + const AtomKokkos* atomKK, + const typename DAT::tdual_double_2d_lr &buf, + const typename DAT::tdual_int_1d &list, + const uint64_t datamask): + _f(atomKK->k_f.view()), + _torque(atomKK->k_torque.view()), + _fm(atomKK->k_fm.view()), + _fm_long(atomKK->k_fm_long.view()), + _list(list.view()), + _datamask(datamask) { + const size_t elements = atomKK->avecKK->size_reverse; + const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; buffer_view(_buf,buf,maxsend,elements); - }; + }; KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { + int m = 0; const int j = _list(i); - _f(j,0) += _buf(i,0); - _f(j,1) += _buf(i,1); - _f(j,2) += _buf(i,2); + _f(j,0) += _buf(i,m++); + _f(j,1) += _buf(i,m++); + _f(j,2) += _buf(i,m++); + + if constexpr (!DEFAULT) { + + // DIPOLE package + + if (_datamask & TORQUE_MASK) { + _torque(j,0) += _buf(i,m++); + _torque(j,1) += _buf(i,m++); + _torque(j,2) += _buf(i,m++); + } + + // SPIN package + + if (_datamask & FM_MASK) { + _fm(j,0) += _buf(i,m++); + _fm(j,1) += _buf(i,m++); + _fm(j,2) += _buf(i,m++); + + _fm_long(j,0) += _buf(i,m++); + _fm_long(j,1) += _buf(i,m++); + _fm_long(j,2) += _buf(i,m++); + } + } } }; @@ -845,14 +1375,1530 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n, // Choose correct reverse UnPackReverse kernel if (lmp->kokkos->reverse_comm_on_host) { - atomKK->sync(HostKK,F_MASK); - struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list); - Kokkos::parallel_for(n,f); - atomKK->modified(HostKK,F_MASK); + atomKK->sync(HostKK,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } + atomKK->modified(HostKK,datamask_reverse); + } else { + atomKK->sync(Device,datamask_reverse); + if (comm_f_only) { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnPackReverse f(atomKK,buf,list,datamask_reverse); + Kokkos::parallel_for(n,f); + } + atomKK->modified(Device,datamask_reverse); + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + const typename AT::t_tagint_1d_randomread _tag; + const typename AT::t_int_1d_randomread _type; + const typename AT::t_int_1d_randomread _mask; + const typename AT::t_tagint_1d_randomread _molecule; + const typename AT::t_kkfloat_1d_randomread _q; + const typename AT::t_kkfloat_1d_4_randomread _mu; + const typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _radius,_rmass; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + double _dx,_dy,_dz; + uint64_t _datamask; + + AtomVecKokkos_PackBorder( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const uint64_t &datamask): + _buf(buf),_list(list), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _dx(dx),_dy(dy),_dz(dz),_datamask(datamask) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(i); + int m = 0; + if constexpr (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + } + + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if constexpr (!DEFAULT) { + + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; + + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); + _buf(i,m++) = _uCG(j); + _buf(i,m++) = _uCGnew(j); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + atomKK->sync(space,datamask_border); + + double dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (space == HostKK) { + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + } else { + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + } + } else { + dx = dy = dz = 0; + if (space == HostKK) { + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + } else { + if (!nborder) { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorder f( + atomKK,buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,datamask_border); + Kokkos::parallel_for(n,f); + } + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackBorder { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + const typename AT::t_double_2d_lr_const _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + typename AT::t_kkfloat_1d _q; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + uint64_t _datamask; + + AtomVecKokkos_UnpackBorder( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, const uint64_t &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _first(first),_datamask(datamask) { + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + _type(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + _mask(i+_first) = (int) d_ubuf(_buf(i,m++)).i; + + if constexpr (!DEFAULT) { + + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space) { + while (first+n >= nmax) grow(0); + + atomKK->sync(space,datamask_border); + + if (space == HostKK) { + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_host(),first,datamask_border); + Kokkos::parallel_for(n,f); + } + } else { + if (!nborder) { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorder + f(atomKK,buf.view_device(),first,datamask_border); + Kokkos::parallel_for(n,f); + } + } + + atomKK->modified(space,datamask_border); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_um _buf; + const typename AT::t_int_1d_const _list; + const typename AT::t_kkfloat_1d_3_lr_randomread _x; + typename AT::t_kkfloat_1d_3_randomread _v; + const typename AT::t_tagint_1d_randomread _tag; + const typename AT::t_int_1d_randomread _type; + const typename AT::t_int_1d_randomread _mask; + const typename AT::t_tagint_1d_randomread _molecule; + const typename AT::t_kkfloat_1d_randomread _q; + const typename AT::t_kkfloat_1d_4_randomread _mu; + const typename AT::t_kkfloat_1d_4_randomread _sp; + typename AT::t_kkfloat_1d_randomread _radius,_rmass; + typename AT::t_kkfloat_1d_3_randomread _omega; + typename AT::t_kkfloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + double _dx,_dy,_dz, _dvx, _dvy, _dvz; + const int _deform_groupbit; + const uint64_t _datamask; + + AtomVecKokkos_PackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr &buf, + const typename AT::t_int_1d_const &list, + const double &dx, const double &dy, const double &dz, + const double &dvx, const double &dvy, const double &dvz, + const int &deform_groupbit, + const uint64_t &datamask): + _buf(buf),_list(list),_datamask(datamask), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _dx(dx),_dy(dy),_dz(dz), + _dvx(dvx),_dvy(dvy),_dvz(dvz), + _deform_groupbit(deform_groupbit) { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + const int j = _list(i); + if constexpr (PBC_FLAG == 0) { + _buf(i,m++) = _x(j,0); + _buf(i,m++) = _x(j,1); + _buf(i,m++) = _x(j,2); + } else { + _buf(i,m++) = _x(j,0) + _dx; + _buf(i,m++) = _x(j,1) + _dy; + _buf(i,m++) = _x(j,2) + _dz; + } + _buf(i,m++) = d_ubuf(_tag(j)).d; + _buf(i,m++) = d_ubuf(_type(j)).d; + _buf(i,m++) = d_ubuf(_mask(j)).d; + + if constexpr (DEFORM_VREMAP) { + if (_mask(i) & _deform_groupbit) { + _buf(i,m++) = _v(j,0) + _dvx; + _buf(i,m++) = _v(j,1) + _dvy; + _buf(i,m++) = _v(j,2) + _dvz; + } + } else { + _buf(i,m++) = _v(j,0); + _buf(i,m++) = _v(j,1); + _buf(i,m++) = _v(j,2); + } + + if (_datamask & MOLECULE_MASK) + _buf(i,m++) = d_ubuf(_molecule(j)).d; + + if (_datamask & Q_MASK) + _buf(i,m++) = _q(j); + + if (_datamask & MU_MASK) { + _buf(i,m++) = _mu(j,0); + _buf(i,m++) = _mu(j,1); + _buf(i,m++) = _mu(j,2); + _buf(i,m++) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _buf(i,m++) = _sp(j,0); + _buf(i,m++) = _sp(j,1); + _buf(i,m++) = _sp(j,2); + _buf(i,m++) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _buf(i,m++) = _radius(j); + + if (_datamask & RMASS_MASK) + _buf(i,m++) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _buf(i,m++) = _omega(j,0); + _buf(i,m++) = _omega(j,1); + _buf(i,m++) = _omega(j,2); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(i,m++) = _dpdTheta(j); + _buf(i,m++) = _uCond(j); + _buf(i,m++) = _uMech(j); + _buf(i,m++) = _uChem(j); + _buf(i,m++) = _uCG(j); + _buf(i,m++) = _uCGnew(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_border_vel_kokkos( + int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + double dx = 0, dy = 0, dz = 0; + double dvx = 0, dvy = 0, dvz = 0; + + atomKK->sync(space,datamask_border_vel); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + if (space == HostKK) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + if (space == HostKK) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } } else { - atomKK->sync(Device,F_MASK); - struct AtomVecKokkos_UnPackReverse f(atomKK->k_f,buf,list); - Kokkos::parallel_for(n,f); - atomKK->modified(Device,F_MASK); + if (space == HostKK) { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_host(), k_sendlist.view_host(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + AtomVecKokkos_PackBorderVel f( + atomKK, + buf.view_device(), k_sendlist.view_device(), + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit,datamask_border_vel); + Kokkos::parallel_for(n,f); + } } + + return n*(size_border + size_velocity); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackBorderVel { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_double_2d_lr_const_um _buf; + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_tagint_1d _molecule; + typename AT::t_kkfloat_1d _q; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + uint64_t _datamask; + + AtomVecKokkos_UnpackBorderVel( + const AtomKokkos* atomKK, + const typename AT::t_double_2d_lr_const &buf, + const int &first, + const uint64_t &datamask): + _buf(buf), + _x(atomKK->k_x.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _molecule(atomKK->k_molecule.view()), + _q(atomKK->k_q.view()), + _v(atomKK->k_v.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + _first(first),_datamask(datamask) + { + const size_t elements = atomKK->avecKK->size_border + atomKK->avecKK->size_velocity; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + int m = 0; + _x(i+_first,0) = _buf(i,m++); + _x(i+_first,1) = _buf(i,m++); + _x(i+_first,2) = _buf(i,m++); + _tag(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _type(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _mask(i+_first) = static_cast(d_ubuf(_buf(i,m++)).i); + _v(i+_first,0) = _buf(i,m++); + _v(i+_first,1) = _buf(i,m++); + _v(i+_first,2) = _buf(i,m++); + + if constexpr (!DEFAULT) { + + if (_datamask & MOLECULE_MASK) + _molecule(i+_first) = (tagint) d_ubuf(_buf(i,m++)).i; + + if (_datamask & Q_MASK) + _q(i+_first) = _buf(i,m++); + + if (_datamask & MU_MASK) { + _mu(i+_first,0) = _buf(i,m++); + _mu(i+_first,1) = _buf(i,m++); + _mu(i+_first,2) = _buf(i,m++); + _mu(i+_first,3) = _buf(i,m++); + } + + if (_datamask & SP_MASK) { + _sp(i+_first,0) = _buf(i,m++); + _sp(i+_first,1) = _buf(i,m++); + _sp(i+_first,2) = _buf(i,m++); + _sp(i+_first,3) = _buf(i,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i+_first) = _buf(i,m++); + + if (_datamask & RMASS_MASK) + _rmass(i+_first) = _buf(i,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i+_first,0) = _buf(i,m++); + _omega(i+_first,1) = _buf(i,m++); + _omega(i+_first,2) = _buf(i,m++); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i+_first) = _buf(i,m++); + _uCond(i+_first) = _buf(i,m++); + _uMech(i+_first) = _buf(i,m++); + _uChem(i+_first) = _buf(i,m++); + _uCG(i+_first) = _buf(i,m++); + _uCGnew(i+_first) = _buf(i,m++); + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_border_vel_kokkos( + const int &n, const int &first, + const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { + + while (first+n >= nmax) grow(0); + + atomKK->sync(space,datamask_border_vel); + + if (space == HostKK) { + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_host(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } else { + if (!ncomm_vel) { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_UnpackBorderVel f( + atomKK, + buf.view_device(), + first,datamask_border_vel); + Kokkos::parallel_for(n,f); + } + } + + atomKK->modified(space,datamask_border_vel); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _size_exchange; + uint64_t _datamask; + + AtomVecKokkos_PackExchangeFunctor( + const AtomKokkos* atomKK, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d sendlist, + DAT::tdual_int_1d copylist, + const uint64_t datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _size_exchange(atomKK->avecKK->size_exchange), + _datamask(datamask) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = _size_exchange; + int m = 1; + + _buf(mysend,m++) = _x(i,0); + _buf(mysend,m++) = _x(i,1); + _buf(mysend,m++) = _x(i,2); + _buf(mysend,m++) = _v(i,0); + _buf(mysend,m++) = _v(i,1); + _buf(mysend,m++) = _v(i,2); + _buf(mysend,m++) = d_ubuf(_tag(i)).d; + _buf(mysend,m++) = d_ubuf(_type(i)).d; + _buf(mysend,m++) = d_ubuf(_mask(i)).d; + _buf(mysend,m++) = d_ubuf(_image(i)).d; + + if constexpr (!DEFAULT) { + + if (_datamask & Q_MASK) + _buf(mysend,m++) = _q(i); + + if (_datamask & MOLECULE_MASK) + _buf(mysend,m++) = d_ubuf(_molecule(i)).d; + + if (_datamask & BOND_MASK) { + _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; + for (int k = 0; k < _num_bond(i); k++) { + _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; + } + } + + if (_datamask & ANGLE_MASK) { + _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; + for (int k = 0; k < _num_angle(i); k++) { + _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; + } + } + + if (_datamask & DIHEDRAL_MASK) { + _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; + for (int k = 0; k < _num_dihedral(i); k++) { + _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; + } + } + + if (_datamask & IMPROPER_MASK) { + _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; + for (int k = 0; k < _num_improper(i); k++) { + _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; + _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; + } + } + + if (_datamask & SPECIAL_MASK) { + _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; + _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; + for (int k = 0; k < _nspecial(i,2); k++) + _buf(mysend,m++) = d_ubuf(_special(i,k)).d; + } + + if (_datamask & MU_MASK) { + _buf(mysend,m++) = _mu(i,0); + _buf(mysend,m++) = _mu(i,1); + _buf(mysend,m++) = _mu(i,2); + _buf(mysend,m++) = _mu(i,3); + } + + if (_datamask & SP_MASK) { + _buf(mysend,m++) = _sp(i,0); + _buf(mysend,m++) = _sp(i,1); + _buf(mysend,m++) = _sp(i,2); + _buf(mysend,m++) = _sp(i,3); + } + + if (_datamask & RADIUS_MASK) + _buf(mysend,m++) = _radius(i); + + if (_datamask & RMASS_MASK) + _buf(mysend,m++) = _rmass(i); + + if (_datamask & OMEGA_MASK) { + _buf(mysend,m++) = _omega(i,0); + _buf(mysend,m++) = _omega(i,1); + _buf(mysend,m++) = _omega(i,2); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _buf(mysend,m++) = _dpdTheta(i); + _buf(mysend,m++) = _uCond(i); + _buf(mysend,m++) = _uMech(i); + _buf(mysend,m++) = _uChem(i); + _buf(mysend,m++) = _uCG(i); + _buf(mysend,m++) = _uCGnew(i); + } + } + + const int j = _copylist(mysend); + + if (j > -1) { + _x(i,0) = _x(j,0); + _x(i,1) = _x(j,1); + _x(i,2) = _x(j,2); + _v(i,0) = _v(j,0); + _v(i,1) = _v(j,1); + _v(i,2) = _v(j,2); + _tag(i) = _tag(j); + _type(i) = _type(j); + _mask(i) = _mask(j); + _image(i) = _image(j); + + if constexpr (!DEFAULT) { + + if (_datamask & Q_MASK) + _q(i) = _q(j); + + if (_datamask & MOLECULE_MASK) + _molecule(i) = _molecule(j); + + if (_datamask & BOND_MASK) { + _num_bond(i) = _num_bond(j); + for (int k = 0; k < _num_bond(j); k++) { + _bond_type(i,k) = _bond_type(j,k); + _bond_atom(i,k) = _bond_atom(j,k); + } + } + + if (_datamask & ANGLE_MASK) { + _num_angle(i) = _num_angle(j); + for (int k = 0; k < _num_angle(j); k++) { + _angle_type(i,k) = _angle_type(j,k); + _angle_atom1(i,k) = _angle_atom1(j,k); + _angle_atom2(i,k) = _angle_atom2(j,k); + _angle_atom3(i,k) = _angle_atom3(j,k); + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = _num_dihedral(j); + for (int k = 0; k < _num_dihedral(j); k++) { + _dihedral_type(i,k) = _dihedral_type(j,k); + _dihedral_atom1(i,k) = _dihedral_atom1(j,k); + _dihedral_atom2(i,k) = _dihedral_atom2(j,k); + _dihedral_atom3(i,k) = _dihedral_atom3(j,k); + _dihedral_atom4(i,k) = _dihedral_atom4(j,k); + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = _num_improper(j); + for (int k = 0; k < _num_improper(j); k++) { + _improper_type(i,k) = _improper_type(j,k); + _improper_atom1(i,k) = _improper_atom1(j,k); + _improper_atom2(i,k) = _improper_atom2(j,k); + _improper_atom3(i,k) = _improper_atom3(j,k); + _improper_atom4(i,k) = _improper_atom4(j,k); + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = _nspecial(j,0); + _nspecial(i,1) = _nspecial(j,1); + _nspecial(i,2) = _nspecial(j,2); + for (int k = 0; k < _nspecial(j,2); k++) + _special(i,k) = _special(j,k); + } + + if (_datamask & MU_MASK) { + _mu(i,0) = _mu(j,0); + _mu(i,1) = _mu(j,1); + _mu(i,2) = _mu(j,2); + _mu(i,3) = _mu(j,3); + } + + if (_datamask & SP_MASK) { + _sp(i,0) = _sp(j,0); + _sp(i,1) = _sp(j,1); + _sp(i,2) = _sp(j,2); + _sp(i,3) = _sp(j,3); + } + + if (_datamask & RADIUS_MASK) + _radius(i) = _radius(j); + + if (_datamask & RMASS_MASK) + _rmass(i) = _rmass(j); + + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _omega(j,0); + _omega(i,1) = _omega(j,1); + _omega(i,2) = _omega(j,2); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _dpdTheta(j); + _uCond(i) = _uCond(j); + _uMech(i) = _uMech(j); + _uChem(i) = _uChem(j); + _uCG(i) = _uCG(j); + _uCGnew(i) = _uCGnew(j); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + atomKK->sync(space,datamask_exchange); + set_size_exchange(); + + if (nsend > (int) (k_buf.view_host().extent(0)* + k_buf.view_host().extent(1))/size_exchange) { + int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; + k_buf.resize(newsize,k_buf.view_host().extent(1)); + } + + if (space == HostKK) { + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } + } else { + if (size_exchange == 11) { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecKokkos_PackExchangeFunctor + f(atomKK,k_buf,k_sendlist,k_copylist,datamask_exchange); + Kokkos::parallel_for(nsend,f); + } + } + + atomKK->modified(space,datamask_exchange); + + return nsend*size_exchange; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + + typename AT::t_kkfloat_1d_3_lr _x; + typename AT::t_kkfloat_1d_3 _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_kkfloat_1d _q; + typename AT::t_tagint_1d _molecule; + typename AT::t_int_2d _nspecial; + typename AT::t_tagint_2d _special; + typename AT::t_int_1d _num_bond; + typename AT::t_int_2d _bond_type; + typename AT::t_tagint_2d _bond_atom; + typename AT::t_int_1d _num_angle; + typename AT::t_int_2d _angle_type; + typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; + typename AT::t_int_1d _num_dihedral; + typename AT::t_int_2d _dihedral_type; + typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, + _dihedral_atom3,_dihedral_atom4; + typename AT::t_int_1d _num_improper; + typename AT::t_int_2d _improper_type; + typename AT::t_tagint_2d _improper_atom1,_improper_atom2, + _improper_atom3,_improper_atom4; + typename AT::t_kkfloat_1d_4 _mu; + typename AT::t_kkfloat_1d_4 _sp; + typename AT::t_kkfloat_1d _radius,_rmass; + typename AT::t_kkfloat_1d_3 _omega; + typename AT::t_kkfloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + + typename AT::t_double_2d_lr_um _buf; + typename AT::t_int_1d _nlocal; + typename AT::t_int_1d _indices; + int _dim; + double _lo,_hi; + int _size_exchange; + uint64_t _datamask; + + AtomVecKokkos_UnpackExchangeFunctor( + const AtomKokkos* atomKK, + const DAT::tdual_double_2d_lr buf, + DAT::tdual_int_1d nlocal, + DAT::tdual_int_1d indices, + int dim, double lo, double hi, + uint64_t datamask): + _x(atomKK->k_x.view()), + _v(atomKK->k_v.view()), + _tag(atomKK->k_tag.view()), + _type(atomKK->k_type.view()), + _mask(atomKK->k_mask.view()), + _image(atomKK->k_image.view()), + _q(atomKK->k_q.view()), + _molecule(atomKK->k_molecule.view()), + _nspecial(atomKK->k_nspecial.view()), + _special(atomKK->k_special.view()), + _num_bond(atomKK->k_num_bond.view()), + _bond_type(atomKK->k_bond_type.view()), + _bond_atom(atomKK->k_bond_atom.view()), + _num_angle(atomKK->k_num_angle.view()), + _angle_type(atomKK->k_angle_type.view()), + _angle_atom1(atomKK->k_angle_atom1.view()), + _angle_atom2(atomKK->k_angle_atom2.view()), + _angle_atom3(atomKK->k_angle_atom3.view()), + _num_dihedral(atomKK->k_num_dihedral.view()), + _dihedral_type(atomKK->k_dihedral_type.view()), + _dihedral_atom1(atomKK->k_dihedral_atom1.view()), + _dihedral_atom2(atomKK->k_dihedral_atom2.view()), + _dihedral_atom3(atomKK->k_dihedral_atom3.view()), + _dihedral_atom4(atomKK->k_dihedral_atom4.view()), + _num_improper(atomKK->k_num_improper.view()), + _improper_type(atomKK->k_improper_type.view()), + _improper_atom1(atomKK->k_improper_atom1.view()), + _improper_atom2(atomKK->k_improper_atom2.view()), + _improper_atom3(atomKK->k_improper_atom3.view()), + _improper_atom4(atomKK->k_improper_atom4.view()), + _mu(atomKK->k_mu.view()), + _sp(atomKK->k_sp.view()), + _radius(atomKK->k_radius.view()), + _rmass(atomKK->k_rmass.view()), + _omega(atomKK->k_omega.view()), + _dpdTheta(atomKK->k_dpdTheta.view()), + _uCond(atomKK->k_uCond.view()), + _uMech(atomKK->k_uMech.view()), + _uChem(atomKK->k_uChem.view()), + _uCG(atomKK->k_uCG.view()), + _uCGnew(atomKK->k_uCGnew.view()), + + _nlocal(nlocal.template view()), + _indices(indices.template view()), + _dim(dim),_lo(lo),_hi(hi),_size_exchange(atomKK->avecKK->size_exchange), + _datamask(datamask) { + const int maxsendlist = (buf.template view().extent(0)* + buf.template view().extent(1))/_size_exchange; + buffer_view(_buf,buf,maxsendlist,_size_exchange); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + double x = _buf(myrecv,_dim+1); + int i = -1; + if (x >= _lo && x < _hi) { + i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + int m = 1; + _x(i,0) = _buf(myrecv,m++); + _x(i,1) = _buf(myrecv,m++); + _x(i,2) = _buf(myrecv,m++); + _v(i,0) = _buf(myrecv,m++); + _v(i,1) = _buf(myrecv,m++); + _v(i,2) = _buf(myrecv,m++); + _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; + + if constexpr (!DEFAULT) { + + if (_datamask & Q_MASK) + _q(i) = _buf(myrecv,m++); + + if (_datamask & MOLECULE_MASK) + _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + + if (_datamask & BOND_MASK) { + _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_bond(i); k++) { + _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & ANGLE_MASK) { + _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_angle(i); k++) { + _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & DIHEDRAL_MASK) { + _num_dihedral(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_dihedral(i); k++) { + _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & IMPROPER_MASK) { + _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _num_improper(i); k++) { + _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + } + + if (_datamask & SPECIAL_MASK) { + _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; + _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; + for (int k = 0; k < _nspecial(i,2); k++) + _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; + } + + if (_datamask & MU_MASK) { + _mu(i,0) = _buf(myrecv,m++); + _mu(i,1) = _buf(myrecv,m++); + _mu(i,2) = _buf(myrecv,m++); + _mu(i,3) = _buf(myrecv,m++); + } + + if (_datamask & SP_MASK) { + _sp(i,0) = _buf(myrecv,m++); + _sp(i,1) = _buf(myrecv,m++); + _sp(i,2) = _buf(myrecv,m++); + _sp(i,3) = _buf(myrecv,m++); + } + + if (_datamask & RADIUS_MASK) + _radius(i) = _buf(myrecv,m++); + + if (_datamask & RMASS_MASK) + _rmass(i) = _buf(myrecv,m++); + + if (_datamask & OMEGA_MASK) { + _omega(i,0) = _buf(myrecv,m++); + _omega(i,1) = _buf(myrecv,m++); + _omega(i,2) = _buf(myrecv,m++); + } + + // DPD-REACT package + + if (_datamask & DPDTHETA_MASK) { + _dpdTheta(i) = _buf(myrecv,m++); + _uCond(i) = _buf(myrecv,m++); + _uMech(i) = _buf(myrecv,m++); + _uChem(i) = _buf(myrecv,m++); + _uCG(i) = _buf(myrecv,m++); + _uCGnew(i) = _buf(myrecv,m++); + } + } + } + + if constexpr (OUTPUT_INDICES) + _indices(myrecv) = i; + } +}; + +/* ---------------------------------------------------------------------- */ +int AtomVecKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, + int dim, double lo, double hi, ExecutionSpace space, + DAT::tdual_int_1d &k_indices) +{ + while (nlocal + nrecv/size_exchange >= nmax) grow(0); + + atomKK->sync(space,datamask_exchange); + + if (space == HostKK) { + k_count.view_host()(0) = nlocal; + + if (k_indices.view_host().data()) { + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } else { + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } + } else { + k_count.view_host()(0) = nlocal; + k_count.modify_host(); + k_count.sync_device(); + + if (k_indices.view_host().data()) { + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } else { + if (size_exchange == 11) { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } else { + AtomVecKokkos_UnpackExchangeFunctor + f(atomKK,k_buf,k_count,k_indices,dim,lo,hi,datamask_exchange); + Kokkos::parallel_for(nrecv/size_exchange,f); + } + } + + k_count.modify_device(); + k_count.sync_host(); + } + + atomKK->modified(space,datamask_exchange); + + return k_count.view_host()(0); +} + +/* ---------------------------------------------------------------------- */ + +uint64_t AtomVecKokkos::field2mask(std::string field) +{ + if (field == "id") + return TAG_MASK; + else if (field == "type") + return TYPE_MASK; + else if (field == "mask") + return MASK_MASK; + else if (field == "image") + return IMAGE_MASK; + else if (field == "x") + return X_MASK; + else if (field == "v") + return V_MASK; + else if (field == "f") + return F_MASK; + else if (field == "rmass") + return RMASS_MASK; + else if (field == "q") + return Q_MASK; + else if (field == "mu") + return MU_MASK; + else if (field == "mu3") + return MU_MASK; + else if (field == "radius") + return RADIUS_MASK; + else if (field == "omega") + return OMEGA_MASK; + else if (field == "torque") + return TORQUE_MASK; + else if (field == "molecule") + return MOLECULE_MASK; + else if (field == "nspecial") + return SPECIAL_MASK; + else if (field == "num_bond") + return BOND_MASK; + else if (field == "num_angle") + return ANGLE_MASK; + else if (field == "num_dihedral") + return DIHEDRAL_MASK; + else if (field == "num_improper") + return IMPROPER_MASK; + else if (field == "sp") + return SP_MASK; + else if (field == "fm") + return FM_MASK; + else if (field == "fm_long") + return FML_MASK; + else if (field == "rho") // conflicts with SPH package "rho" + return DPDRHO_MASK; + else if (field == "dpdTheta") + return DPDTHETA_MASK; + else if (field == "uCond") + return UCOND_MASK; + else if (field == "uMech") + return UMECH_MASK; + else if (field == "uChem") + return UCHEM_MASK; + else if (field == "uCG") + return UCG_MASK; + else if (field == "uCGnew") + return UCGNEW_MASK; + else if (field == "duChem") + return DUCHEM_MASK; + else + return EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::field2size(std::string field) +{ + if (field == "id") return 1; + else if (field == "type") return 1; + else if (field == "mask") return 1; + else if (field == "image") return 1; + else if (field == "x") return 3; + else if (field == "v") return 3; + else if (field == "f") return 3; + else if (field == "rmass") return 1; + else if (field == "q") return 1; + else if (field == "mu") return 4; + else if (field == "mu3") return 3; + else if (field == "radius") return 1; + else if (field == "omega") return 3; + else if (field == "torque") return 3; + else if (field == "molecule") return 1; + else if (field == "special") return 3+atom->maxspecial; + else if (field == "num_bond") return 1+2*atom->bond_per_atom; + else if (field == "num_angle") return 1+4*atom->angle_per_atom; + else if (field == "num_dihedral") return 1+5*atom->dihedral_per_atom; + else if (field == "num_improper") return 1+5*atom->dihedral_per_atom; + else if (field == "sp") return 4; + else if (field == "fm") return 3; + else if (field == "fm_long") return 3; + else if (field == "rho") return 1; + else if (field == "dpdTheta") return 1; + else if (field == "uCond") return 1; + else if (field == "uMech") return 1; + else if (field == "uChem") return 1; + else if (field == "uCG") return 1; + else if (field == "uCGnew") return 1; + else if (field == "duChem") return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::set_atom_masks() +{ + datamask_grow = EMPTY_MASK; + for (int i = 0; i < default_grow.size(); i++) + datamask_grow |= field2mask(default_grow[i]); + for (int i = 0; i < ngrow; i++) + datamask_grow |= field2mask(fields_grow[i]); + + datamask_comm = EMPTY_MASK; + for (int i = 0; i < default_comm.size(); i++) + datamask_comm |= field2mask(default_comm[i]); + for (int i = 0; i < ncomm; i++) + datamask_comm |= field2mask(fields_comm[i]); + + datamask_comm_vel = EMPTY_MASK; + for (int i = 0; i < default_comm_vel.size(); i++) + datamask_comm_vel |= field2mask(default_comm_vel[i]); + for (int i = 0; i < ncomm_vel; i++) + datamask_comm_vel |= field2mask(fields_comm_vel[i]); + + datamask_reverse = EMPTY_MASK; + for (int i = 0; i < default_reverse.size(); i++) + datamask_reverse |= field2mask(default_reverse[i]); + for (int i = 0; i < nreverse; i++) + datamask_reverse |= field2mask(fields_reverse[i]); + + datamask_border = EMPTY_MASK; + for (int i = 0; i < default_border.size(); i++) + datamask_border |= field2mask(default_border[i]); + for (int i = 0; i < nborder; i++) + datamask_border |= field2mask(fields_border[i]); + + datamask_border_vel = EMPTY_MASK; + for (int i = 0; i < default_border_vel.size(); i++) + datamask_border_vel |= field2mask(default_border_vel[i]); + for (int i = 0; i < nborder_vel; i++) + datamask_border_vel |= field2mask(fields_border_vel[i]); + + datamask_exchange = EMPTY_MASK; + for (int i = 0; i < default_exchange.size(); i++) + datamask_exchange |= field2mask(default_exchange[i]); + for (int i = 0; i < nexchange; i++) + datamask_exchange |= field2mask(fields_exchange[i]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::set_size_exchange() +{ + size_exchange = 1; // 1 to store buffer length + for (int i = 0; i < default_exchange.size(); i++) + size_exchange += field2size(default_exchange[i]); + for (int i = 0; i < nexchange; i++) + size_exchange += field2size(fields_exchange[i]); } diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 05e6d7624e7..59124c47df3 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -34,99 +34,174 @@ class AtomVecKokkos : virtual public AtomVec { virtual void sort_kokkos(Kokkos::BinSort &Sorter) = 0; - virtual void sync(ExecutionSpace space, unsigned int mask) = 0; - virtual void modified(ExecutionSpace space, unsigned int mask) = 0; - virtual void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) = 0; - - virtual int - pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]); - - virtual int - pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, - const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, - const DAT::tdual_int_1d &pbc_flag, - const DAT::tdual_int_2d &pbc, - const DAT::tdual_int_1d &g2l); - - virtual int - pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, + virtual void sync(ExecutionSpace space, uint64_t mask) = 0; + virtual void modified(ExecutionSpace space, uint64_t mask) = 0; + virtual void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) = 0; + + int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst, const int &pbc_flag, const int pbc[]); - virtual void - unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + int pack_comm_self_fused(const int &n, const DAT::tdual_int_2d_lr &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l); - virtual int - pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, const int pbc[]); - virtual void - unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf); - virtual int - pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst); + int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf, + const int &pbc_flag, const int pbc[]); - virtual int - pack_reverse_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf); + void unpack_comm_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf); - virtual void - unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, + int pack_reverse_self(const int &n, const DAT::tdual_int_1d &list, + const int nfirst); + + int pack_reverse_kokkos(const int &n, const int &nfirst, const DAT::tdual_double_2d_lr &buf); - virtual int - pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) = 0; + void unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list, + const DAT::tdual_double_2d_lr &buf); - virtual void - unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) = 0; + int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, + DAT::tdual_double_2d_lr buf, + int pbc_flag, int *pbc, ExecutionSpace space); - virtual int - pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, - DAT::tdual_double_2d_lr /*buf*/, - int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/) { return 0; } + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_double_2d_lr &buf, + ExecutionSpace space); + + int pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/, + DAT::tdual_double_2d_lr /*buf*/, + int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/); + + void unpack_border_vel_kokkos(const int &/*n*/, const int & /*nfirst*/, + const DAT::tdual_double_2d_lr & /*buf*/, + ExecutionSpace /*space*/); + + int pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space); + + int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, + int nlocal, int dim, double lo, double hi, + ExecutionSpace space, + DAT::tdual_int_1d &k_indices); - virtual void - unpack_border_vel_kokkos(const int &/*n*/, const int & /*nfirst*/, - const DAT::tdual_double_2d_lr & /*buf*/, - ExecutionSpace /*space*/) {} - - virtual int - pack_exchange_kokkos(const int &nsend, DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) = 0; - - virtual int - unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) = 0; - - int no_comm_vel_flag,no_border_vel_flag; - int unpack_exchange_indices_flag; int size_exchange; + uint64_t datamask_grow; + uint64_t datamask_comm; + uint64_t datamask_comm_vel; + uint64_t datamask_reverse; + uint64_t datamask_border; + uint64_t datamask_border_vel; + uint64_t datamask_exchange; + protected: + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_kkfloat_1d_3_lr d_x; + DAT::t_kkfloat_1d_3 d_v; + DAT::t_kkacc_1d_3 d_f; HAT::t_kkfloat_1d_3_lr h_x; HAT::t_kkfloat_1d_3 h_v; HAT::t_kkacc_1d_3 h_f; + DAT::t_kkfloat_1d_3 d_omega, d_angmom; + HAT::t_kkfloat_1d_3 h_omega, h_angmom; + + // FULL + + DAT::t_kkfloat_1d d_q; + HAT::t_kkfloat_1d h_q; + + DAT::t_tagint_1d d_molecule; + DAT::t_int_2d d_nspecial; + DAT::t_tagint_2d d_special; + DAT::t_int_1d d_num_bond; + DAT::t_int_2d d_bond_type; + DAT::t_tagint_2d d_bond_atom; + + HAT::t_tagint_1d h_molecule; + HAT::t_int_2d h_nspecial; + HAT::t_tagint_2d h_special; + HAT::t_int_1d h_num_bond; + HAT::t_int_2d h_bond_type; + HAT::t_tagint_2d h_bond_atom; + + DAT::t_int_1d d_num_angle; + DAT::t_int_2d d_angle_type; + DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; + + HAT::t_int_1d h_num_angle; + HAT::t_int_2d h_angle_type; + HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; + + DAT::t_int_1d d_num_dihedral; + DAT::t_int_2d d_dihedral_type; + DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, + d_dihedral_atom3,d_dihedral_atom4; + DAT::t_int_1d d_num_improper; + DAT::t_int_2d d_improper_type; + DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, + d_improper_atom3,d_improper_atom4; + + HAT::t_int_1d h_num_dihedral; + HAT::t_int_2d h_dihedral_type; + HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, + h_dihedral_atom3,h_dihedral_atom4; + HAT::t_int_1d h_num_improper; + HAT::t_int_2d h_improper_type; + HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, + h_improper_atom3,h_improper_atom4; + + DAT::t_kkfloat_1d_4 d_mu; + HAT::t_kkfloat_1d_4 h_mu; + + DAT::t_kkfloat_1d_4 d_sp; + DAT::t_kkacc_1d_3 d_fm; + DAT::t_kkacc_1d_3 d_fm_long; + HAT::t_kkfloat_1d_4 h_sp; + HAT::t_kkacc_1d_3 h_fm; + HAT::t_kkacc_1d_3 h_fm_long; + + DAT::t_kkfloat_1d d_radius; + HAT::t_kkfloat_1d h_radius; + DAT::t_kkfloat_1d d_rmass; + HAT::t_kkfloat_1d h_rmass; + DAT::t_kkacc_1d_3 d_torque; + HAT::t_kkacc_1d_3 h_torque; + + DAT::t_kkfloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; + HAT::t_kkfloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; + size_t buffer_size; void* buffer; DAT::tdual_int_1d k_count; + uint64_t field2mask(std::string); + int field2size(std::string); + void set_atom_masks(); + void set_size_exchange(); + public: #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 4b90ad273ee..ae72ea7b164 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -30,7 +30,17 @@ using namespace LAMMPS_NS; AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecMolecular(lmp) { - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::init() +{ + AtomVecMolecular::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -260,616 +270,7 @@ void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort &So /* ---------------------------------------------------------------------- */ -template -struct AtomVecMolecularKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_tagint_1d _molecule; - double _dx,_dy,_dz; - - AtomVecMolecularKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_tagint_1d &molecule, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = d_ubuf(_molecule(j)).d; - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecMolecularKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecMolecularKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecMolecularKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecMolecularKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_tagint_1d _molecule; - int _first; - - - AtomVecMolecularKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_tagint_1d &molecule, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule), - _first(first) { - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _molecule(i+_first) = (tagint) d_ubuf(_buf(i,6)).i; - - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) { - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - while (first+n >= nmax) grow(0); - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); - if (space==Host) { - struct AtomVecMolecularKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_molecule,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecMolecularKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_molecule,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_tagint_1d_randomread _molecule; - typename AT::t_int_2d_randomread _nspecial; - typename AT::t_tagint_2d_randomread _special; - typename AT::t_int_1d_randomread _num_bond; - typename AT::t_int_2d_randomread _bond_type; - typename AT::t_tagint_2d_randomread _bond_atom; - typename AT::t_int_1d_randomread _num_angle; - typename AT::t_int_2d_randomread _angle_type; - typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d_randomread _num_dihedral; - typename AT::t_int_2d_randomread _dihedral_type; - typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d_randomread _num_improper; - typename AT::t_int_2d_randomread _improper_type; - typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_tagint_1d _moleculew; - typename AT::t_int_2d _nspecialw; - typename AT::t_tagint_2d _specialw; - typename AT::t_int_1d _num_bondw; - typename AT::t_int_2d _bond_typew; - typename AT::t_tagint_2d _bond_atomw; - typename AT::t_int_1d _num_anglew; - typename AT::t_int_2d _angle_typew; - typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w; - typename AT::t_int_1d _num_dihedralw; - typename AT::t_int_2d _dihedral_typew; - typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w, - _dihedral_atom3w,_dihedral_atom4w; - typename AT::t_int_1d _num_improperw; - typename AT::t_int_2d _improper_typew; - typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w, - _improper_atom3w,_improper_atom4w; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecMolecularKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _moleculew(atom->k_molecule.view()), - _nspecialw(atom->k_nspecial.view()), - _specialw(atom->k_special.view()), - _num_bondw(atom->k_num_bond.view()), - _bond_typew(atom->k_bond_type.view()), - _bond_atomw(atom->k_bond_atom.view()), - _num_anglew(atom->k_num_angle.view()), - _angle_typew(atom->k_angle_type.view()), - _angle_atom1w(atom->k_angle_atom1.view()), - _angle_atom2w(atom->k_angle_atom2.view()), - _angle_atom3w(atom->k_angle_atom3.view()), - _num_dihedralw(atom->k_num_dihedral.view()), - _dihedral_typew(atom->k_dihedral_type.view()), - _dihedral_atom1w(atom->k_dihedral_atom1.view()), - _dihedral_atom2w(atom->k_dihedral_atom2.view()), - _dihedral_atom3w(atom->k_dihedral_atom3.view()), - _dihedral_atom4w(atom->k_dihedral_atom4.view()), - _num_improperw(atom->k_num_improper.view()), - _improper_typew(atom->k_improper_type.view()), - _improper_atom1w(atom->k_improper_atom1.view()), - _improper_atom2w(atom->k_improper_atom2.view()), - _improper_atom3w(atom->k_improper_atom3.view()), - _improper_atom4w(atom->k_improper_atom4.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - int k; - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - int m = 1; - _buf(mysend,m++) = _x(i,0); - _buf(mysend,m++) = _x(i,1); - _buf(mysend,m++) = _x(i,2); - _buf(mysend,m++) = _v(i,0); - _buf(mysend,m++) = _v(i,1); - _buf(mysend,m++) = _v(i,2); - _buf(mysend,m++) = d_ubuf(_tag(i)).d; - _buf(mysend,m++) = d_ubuf(_type(i)).d; - _buf(mysend,m++) = d_ubuf(_mask(i)).d; - _buf(mysend,m++) = d_ubuf(_image(i)).d; - _buf(mysend,m++) = d_ubuf(_molecule(i)).d; - _buf(mysend,m++) = d_ubuf(_num_bond(i)).d; - for (k = 0; k < _num_bond(i); k++) { - _buf(mysend,m++) = d_ubuf(_bond_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_bond_atom(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_angle(i)).d; - for (k = 0; k < _num_angle(i); k++) { - _buf(mysend,m++) = d_ubuf(_angle_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_angle_atom3(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_dihedral(i)).d; - for (k = 0; k < _num_dihedral(i); k++) { - _buf(mysend,m++) = d_ubuf(_dihedral_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_dihedral_atom4(i,k)).d; - } - _buf(mysend,m++) = d_ubuf(_num_improper(i)).d; - for (k = 0; k < _num_improper(i); k++) { - _buf(mysend,m++) = d_ubuf(_improper_type(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom1(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom2(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom3(i,k)).d; - _buf(mysend,m++) = d_ubuf(_improper_atom4(i,k)).d; - } - - _buf(mysend,m++) = d_ubuf(_nspecial(i,0)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,1)).d; - _buf(mysend,m++) = d_ubuf(_nspecial(i,2)).d; - for (k = 0; k < _nspecial(i,2); k++) - _buf(mysend,m++) = d_ubuf(_special(i,k)).d; - - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _moleculew(i) = _molecule(j); - _num_bondw(i) = _num_bond(j); - for (k = 0; k < _num_bond(j); k++) { - _bond_typew(i,k) = _bond_type(j,k); - _bond_atomw(i,k) = _bond_atom(j,k); - } - _num_anglew(i) = _num_angle(j); - for (k = 0; k < _num_angle(j); k++) { - _angle_typew(i,k) = _angle_type(j,k); - _angle_atom1w(i,k) = _angle_atom1(j,k); - _angle_atom2w(i,k) = _angle_atom2(j,k); - _angle_atom3w(i,k) = _angle_atom3(j,k); - } - _num_dihedralw(i) = _num_dihedral(j); - for (k = 0; k < _num_dihedral(j); k++) { - _dihedral_typew(i,k) = _dihedral_type(j,k); - _dihedral_atom1w(i,k) = _dihedral_atom1(j,k); - _dihedral_atom2w(i,k) = _dihedral_atom2(j,k); - _dihedral_atom3w(i,k) = _dihedral_atom3(j,k); - _dihedral_atom4w(i,k) = _dihedral_atom4(j,k); - } - _num_improperw(i) = _num_improper(j); - for (k = 0; k < _num_improper(j); k++) { - _improper_typew(i,k) = _improper_type(j,k); - _improper_atom1w(i,k) = _improper_atom1(j,k); - _improper_atom2w(i,k) = _improper_atom2(j,k); - _improper_atom3w(i,k) = _improper_atom3(j,k); - _improper_atom4w(i,k) = _improper_atom4(j,k); - } - _nspecialw(i,0) = _nspecial(j,0); - _nspecialw(i,1) = _nspecial(j,1); - _nspecialw(i,2) = _nspecial(j,2); - for (k = 0; k < _nspecial(j,2); k++) - _specialw(i,k) = _special(j,k); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial, - // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom, - // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2, - // and angle_atom3 - // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom - // 1 num_improper, 5*improper_per_atom - // 1 to store buffer length - - size_exchange = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+ - 5*atom->dihedral_per_atom + 5*atom->improper_per_atom; - - if (nsend > (int) (k_buf.view_host().extent(0)* - k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecMolecularKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecMolecularKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_tagint_1d _molecule; - typename AT::t_int_2d _nspecial; - typename AT::t_tagint_2d _special; - typename AT::t_int_1d _num_bond; - typename AT::t_int_2d _bond_type; - typename AT::t_tagint_2d _bond_atom; - typename AT::t_int_1d _num_angle; - typename AT::t_int_2d _angle_type; - typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3; - typename AT::t_int_1d _num_dihedral; - typename AT::t_int_2d _dihedral_type; - typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2, - _dihedral_atom3,_dihedral_atom4; - typename AT::t_int_1d _num_improper; - typename AT::t_int_2d _improper_type; - typename AT::t_tagint_2d _improper_atom1,_improper_atom2, - _improper_atom3,_improper_atom4; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecMolecularKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _molecule(atom->k_molecule.view()), - _nspecial(atom->k_nspecial.view()), - _special(atom->k_special.view()), - _num_bond(atom->k_num_bond.view()), - _bond_type(atom->k_bond_type.view()), - _bond_atom(atom->k_bond_atom.view()), - _num_angle(atom->k_num_angle.view()), - _angle_type(atom->k_angle_type.view()), - _angle_atom1(atom->k_angle_atom1.view()), - _angle_atom2(atom->k_angle_atom2.view()), - _angle_atom3(atom->k_angle_atom3.view()), - _num_dihedral(atom->k_num_dihedral.view()), - _dihedral_type(atom->k_dihedral_type.view()), - _dihedral_atom1(atom->k_dihedral_atom1.view()), - _dihedral_atom2(atom->k_dihedral_atom2.view()), - _dihedral_atom3(atom->k_dihedral_atom3.view()), - _dihedral_atom4(atom->k_dihedral_atom4.view()), - _num_improper(atom->k_num_improper.view()), - _improper_type(atom->k_improper_type.view()), - _improper_atom1(atom->k_improper_atom1.view()), - _improper_atom2(atom->k_improper_atom2.view()), - _improper_atom3(atom->k_improper_atom3.view()), - _improper_atom4(atom->k_improper_atom4.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - int m = 1; - _x(i,0) = _buf(myrecv,m++); - _x(i,1) = _buf(myrecv,m++); - _x(i,2) = _buf(myrecv,m++); - _v(i,0) = _buf(myrecv,m++); - _v(i,1) = _buf(myrecv,m++); - _v(i,2) = _buf(myrecv,m++); - _tag(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _type(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _mask(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - _image(i) = (imageint) d_ubuf(_buf(myrecv,m++)).i; - - _molecule(i) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _num_bond(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - int k; - for (k = 0; k < _num_bond(i); k++) { - _bond_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _bond_atom(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_angle(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_angle(i); k++) { - _angle_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _angle_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_dihedral(i) = d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _num_dihedral(i); k++) { - _dihedral_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _dihedral_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _num_improper(i) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < (int) _num_improper(i); k++) { - _improper_type(i,k) = (int) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom1(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom2(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom3(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - _improper_atom4(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - _nspecial(i,0) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,1) = (int) d_ubuf(_buf(myrecv,m++)).i; - _nspecial(i,2) = (int) d_ubuf(_buf(myrecv,m++)).i; - for (k = 0; k < _nspecial(i,2); k++) - _special(i,k) = (tagint) d_ubuf(_buf(myrecv,m++)).i; - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ -int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - k_count.view_host()(0) = nlocal; - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - if (k_indices.view_host().data()) { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecMolecularKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - } - } - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecMolecularKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -997,7 +398,7 @@ void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) } } -void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1144,7 +545,7 @@ void AtomVecMolecularKokkos::sync_pinned(ExecutionSpace space, unsigned int mask /* ---------------------------------------------------------------------- */ -void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecMolecularKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index bf1021eee2a..5aa1b18bf59 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -31,28 +31,14 @@ namespace LAMMPS_NS { class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { public: AtomVecMolecularKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; protected: tagint *molecule; @@ -61,61 +47,6 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { tagint **angle_atom1,**angle_atom2,**angle_atom3; tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; - - DAT::t_tagint_1d d_tag; - DAT::t_int_1d d_type, d_mask; - HAT::t_tagint_1d h_tag; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - HAT::t_kkfloat_1d_3_lr h_x; - HAT::t_kkfloat_1d_3 h_v; - HAT::t_kkacc_1d_3 h_f; - - DAT::t_tagint_1d d_molecule; - DAT::t_int_2d d_nspecial; - DAT::t_tagint_2d d_special; - DAT::t_int_1d d_num_bond; - DAT::t_int_2d d_bond_type; - DAT::t_tagint_2d d_bond_atom; - - HAT::t_tagint_1d h_molecule; - HAT::t_int_2d h_nspecial; - HAT::t_tagint_2d h_special; - HAT::t_int_1d h_num_bond; - HAT::t_int_2d h_bond_type; - HAT::t_tagint_2d h_bond_atom; - - DAT::t_int_1d d_num_angle; - DAT::t_int_2d d_angle_type; - DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3; - - HAT::t_int_1d h_num_angle; - HAT::t_int_2d h_angle_type; - HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3; - - DAT::t_int_1d d_num_dihedral; - DAT::t_int_2d d_dihedral_type; - DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2, - d_dihedral_atom3,d_dihedral_atom4; - DAT::t_int_1d d_num_improper; - DAT::t_int_2d d_improper_type; - DAT::t_tagint_2d d_improper_atom1,d_improper_atom2, - d_improper_atom3,d_improper_atom4; - - HAT::t_int_1d h_num_dihedral; - HAT::t_int_2d h_dihedral_type; - HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2, - h_dihedral_atom3,h_dihedral_atom4; - HAT::t_int_1d h_num_improper; - HAT::t_int_2d h_improper_type; - HAT::t_tagint_2d h_improper_atom1,h_improper_atom2, - h_improper_atom3,h_improper_atom4; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 53a7f69346f..83a8ff37ff4 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -33,8 +33,17 @@ using namespace MathConst; AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecSphere(lmp) { - no_border_vel_flag = 0; - unpack_exchange_indices_flag = 1; +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::init() +{ + AtomVecSphere::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -143,1517 +152,7 @@ void AtomVecSphereKokkos::sort_kokkos(Kokkos::BinSort &Sorte /* ---------------------------------------------------------------------- */ -template -struct AtomVecSphereKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSphereKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t elements = 5; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.view().data(),maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _buf(i,3) = _radius(j); - _buf(i,4) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_kokkos( - const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) - return AtomVecKokkos::pack_comm_kokkos(n,list,buf,pbc_flag,pbc); - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - double _h_rate[6]; - const int _deform_vremap; - - AtomVecSphereKokkos_PackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::tdual_int_1d &mask, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::ttransform_kkfloat_1d_3 &v, - const typename DAT::ttransform_kkfloat_1d_3 &omega, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc, - const double * const h_rate, - const int &deform_vremap): - _x(x.view()), - _mask(mask.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _v(v.view()), - _omega(omega.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz), - _deform_vremap(deform_vremap) - { - const size_t elements = 9 + 2 * RADVARY; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.view().data(),maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; - _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - if (DEFORM_VREMAP == 0) { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); - } else { - if (_mask(i) & _deform_vremap) { - _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; - _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; - _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; - } else { - _buf(i,3) = _v(j,0); - _buf(i,4) = _v(j,1); - _buf(i,5) = _v(j,2); - } - } - _buf(i,6) = _omega(j,0); - _buf(i,7) = _omega(j,1); - _buf(i,8) = _omega(j,2); - if (RADVARY) { - _buf(i,9) = _radius(j); - _buf(i,10) = _rmass(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_vel_kokkos( - const int &n, - const DAT::tdual_int_1d &list, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, - const int* const pbc) -{ - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (pbc_flag) { - if (deform_vremap) { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } else { - if (domain->triclinic) { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } else { - if (radvary == 0) { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommVel f( - atomKK->k_x,atomKK->k_mask, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); - Kokkos::parallel_for(n,f); - } - } - } - } - return n*(size_forward+size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackCommSelf { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d _radius,_rmass; - int _nfirst; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSphereKokkos_PackCommSelf( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const int &nfirst, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_xw(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _nfirst(nfirst),_list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _radius(i+_nfirst) = _radius(j); - _rmass(i+_nfirst) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_comm_self( - const int &n, const DAT::tdual_int_1d &list, - const int nfirst, const int &pbc_flag, const int* const pbc) { - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) - return AtomVecKokkos::pack_comm_self(n,list,nfirst,pbc_flag,pbc); - if (lmp->kokkos->forward_comm_on_host) { - atomKK->sync(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } else { - atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - if (pbc_flag) { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if (domain->triclinic) { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_PackCommSelf f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - nfirst,list, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - } - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_double_2d_lr_const_um _buf; - int _first; - - AtomVecSphereKokkos_UnpackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _first(first) - { - const size_t elements = 5; - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.view().data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _radius(i+_first) = _buf(i,3); - _rmass(i+_first) = _buf(i,4); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_comm_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - // Fallback to AtomVecKokkos if radvary == 0 - if (radvary == 0) { - AtomVecKokkos::unpack_comm_kokkos(n,first,buf); - return; - } - if (lmp->kokkos->forward_comm_on_host) { - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK); - struct AtomVecSphereKokkos_UnpackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,first); - Kokkos::parallel_for(n,f); - } else { - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - struct AtomVecSphereKokkos_UnpackComm f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - buf,first); - Kokkos::parallel_for(n,f); - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackCommVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - typename AT::t_double_2d_lr_const _buf; - int _first; - - AtomVecSphereKokkos_UnpackCommVel( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d &radius, - const typename DAT::ttransform_kkfloat_1d &rmass, - const typename DAT::ttransform_kkfloat_1d_3 &v, - const typename DAT::ttransform_kkfloat_1d_3 &omega, - const typename DAT::tdual_double_2d_lr &buf, - const int& first): - _x(x.view()), - _radius(radius.view()), - _rmass(rmass.view()), - _v(v.view()), - _omega(omega.view()), - _first(first) - { - const size_t elements = 9 + 2 * RADVARY; - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/elements; - buffer_view(_buf,buf,maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _v(i+_first,0) = _buf(i,3); - _v(i+_first,1) = _buf(i,4); - _v(i+_first,2) = _buf(i,5); - _omega(i+_first,0) = _buf(i,6); - _omega(i+_first,1) = _buf(i,7); - _omega(i+_first,2) = _buf(i,8); - if (RADVARY) { - _radius(i+_first) = _buf(i,9); - _rmass(i+_first) = _buf(i,10); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_comm_vel_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf) { - if (lmp->kokkos->forward_comm_on_host) { - atomKK->modified(HostKK,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (radvary == 0) { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } - } else { - atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); - if (radvary == 0) { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackCommVel f( - atomKK->k_x, - atomKK->k_radius,atomKK->k_rmass, - atomKK->k_v,atomKK->k_omega, - buf,first); - Kokkos::parallel_for(n,f); - } - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_um _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - double _dx,_dy,_dz; - - AtomVecSphereKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _dx(dx),_dy(dy),_dz(dz) - { - const size_t elements = 8; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _radius(j); - _buf(i,7) = _rmass(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_border_kokkos( - int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (space==Host) { - AtomVecSphereKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } else { - dx = dy = dz = 0; - if (space==Host) { - AtomVecSphereKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_um _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v, _omega; - double _dx,_dy,_dz, _dvx, _dvy, _dvz; - const int _deform_groupbit; - - AtomVecSphereKokkos_PackBorderVel( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const typename AT::t_kkfloat_1d_3 &v, - const typename AT::t_kkfloat_1d_3 &omega, - const double &dx, const double &dy, const double &dz, - const double &dvx, const double &dvy, const double &dvz, - const int &deform_groupbit): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _v(v), _omega(omega), - _dx(dx),_dy(dy),_dz(dz), - _dvx(dvx),_dvy(dvy),_dvz(dvz), - _deform_groupbit(deform_groupbit) - { - const size_t elements = 14; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_um(buf.data(),maxsend,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _radius(j); - _buf(i,7) = _rmass(j); - if (DEFORM_VREMAP) { - if (_mask(i) & _deform_groupbit) { - _buf(i,8) = _v(j,0) + _dvx; - _buf(i,9) = _v(j,1) + _dvy; - _buf(i,10) = _v(j,2) + _dvz; - } - } - else { - _buf(i,8) = _v(j,0); - _buf(i,9) = _v(j,1); - _buf(i,10) = _v(j,2); - } - _buf(i,11) = _omega(j,0); - _buf(i,12) = _omega(j,1); - _buf(i,13) = _omega(j,2); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_border_vel_kokkos( - int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx=0,dy=0,dz=0; - double dvx=0,dvy=0,dvz=0; - - // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - atomKK->sync(space,ALL_MASK); - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (!deform_vremap) { - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - } else { - if (space==Host) { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } else { - AtomVecSphereKokkos_PackBorderVel f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - dx,dy,dz,dvx,dvy,dvz, - deform_groupbit); - Kokkos::parallel_for(n,f); - } - } - - return n*(size_border + size_velocity); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_const_um _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - int _first; - - AtomVecSphereKokkos_UnpackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _first(first) - { - const size_t elements = 8; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (d_ubuf(_buf(i,3)).i); - _type(i+_first) = static_cast (d_ubuf(_buf(i,4)).i); - _mask(i+_first) = static_cast (d_ubuf(_buf(i,5)).i); - _radius(i+_first) = _buf(i,6); - _rmass(i+_first) = _buf(i,7); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecSphereKokkos_UnpackBorder f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackBorder f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - RADIUS_MASK|RMASS_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackBorderVel { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr_const_um _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d _radius,_rmass; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_kkfloat_1d_3 _omega; - int _first; - - AtomVecSphereKokkos_UnpackBorderVel( - const typename AT::t_double_2d_lr_const &buf, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d &radius, - const typename AT::t_kkfloat_1d &rmass, - const typename AT::t_kkfloat_1d_3 &v, - const typename AT::t_kkfloat_1d_3 &omega, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _radius(radius), - _rmass(rmass), - _v(v), _omega(omega), - _first(first) - { - const size_t elements = 14; - const int maxsend = (buf.extent(0)*buf.extent(1))/elements; - _buf = typename AT::t_double_2d_lr_const_um(buf.data(),maxsend,elements); - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (d_ubuf(_buf(i,3)).i); - _type(i+_first) = static_cast (d_ubuf(_buf(i,4)).i); - _mask(i+_first) = static_cast (d_ubuf(_buf(i,5)).i); - _radius(i+_first) = _buf(i,6); - _rmass(i+_first) = _buf(i,7); - _v(i+_first,0) = _buf(i,8); - _v(i+_first,1) = _buf(i,9); - _v(i+_first,2) = _buf(i,10); - _omega(i+_first,0) = _buf(i,11); - _omega(i+_first,1) = _buf(i,12); - _omega(i+_first,2) = _buf(i,13); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::unpack_border_vel_kokkos( - const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - while (first+n >= nmax) grow(0); - if (space==Host) { - struct AtomVecSphereKokkos_UnpackBorderVel f(buf.view_host(), - h_x,h_tag,h_type,h_mask, - h_radius,h_rmass, - h_v, h_omega, - first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSphereKokkos_UnpackBorderVel f(buf.view_device(), - d_x,d_tag,d_type,d_mask, - d_radius,d_rmass, - d_v, d_omega, - first); - Kokkos::parallel_for(n,f); - } - - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_randomread _radius,_rmass; - typename AT::t_kkfloat_1d_3_randomread _omega; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d _radiusw,_rmassw; - typename AT::t_kkfloat_1d_3 _omegaw; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecSphereKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _radiusw(atom->k_radius.view()), - _rmassw(atom->k_rmass.view()), - _omegaw(atom->k_omega.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsend = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - _buf = typename AT::t_double_2d_lr_um(buf.template view().data(),maxsend,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _radius[i]; - _buf(mysend,12) = _rmass[i]; - _buf(mysend,13) = _omega(i,0); - _buf(mysend,14) = _omega(i,1); - _buf(mysend,15) = _omega(i,2); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - _radiusw[i] = _radius(j); - _rmassw[i] = _rmass(j); - _omegaw(i,0) = _omega(j,0); - _omegaw(i,1) = _omega(j,1); - _omegaw(i,2) = _omega(j,2); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::pack_exchange_kokkos( - const int &nsend, - DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 16; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*17/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | - OMEGA_MASK); - - if (space == HostKK) { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } else { - AtomVecSphereKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - } - return nsend*size_exchange; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSphereKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d _radius; - typename AT::t_kkfloat_1d _rmass; - typename AT::t_kkfloat_1d_3 _omega; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - typename AT::t_int_1d _indices; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecSphereKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - DAT::tdual_int_1d indices, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _radius(atom->k_radius.view()), - _rmass(atom->k_rmass.view()), - _omega(atom->k_omega.view()), - _nlocal(nlocal.template view()), - _indices(indices.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const size_t size_exchange = 16; - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/size_exchange; - - buffer_view(_buf,buf,maxsendlist,size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - int i = -1; - if (x >= _lo && x < _hi) { - i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _radius[i] = _buf(myrecv,11); - _rmass[i] = _buf(myrecv,12); - _omega(i,0) = _buf(myrecv,13); - _omega(i,1) = _buf(myrecv,14); - _omega(i,2) = _buf(myrecv,15); - } - if (OUTPUT_INDICES) - _indices(myrecv) = i; - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &k_indices) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if (space == HostKK) { - k_count.view_host()(0) = nlocal; - if (k_indices.view_host().data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - if (k_indices.view_host().data()) { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } else { - AtomVecSphereKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,k_indices,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - } - k_count.modify_device(); - k_count.sync_host(); - } - - atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | - OMEGA_MASK); - - return k_count.view_host()(0); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecSphereKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -1696,7 +195,7 @@ void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) @@ -1749,7 +248,7 @@ void AtomVecSphereKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, i /* ---------------------------------------------------------------------- */ -void AtomVecSphereKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecSphereKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 157d4f3f07d..7728c847dfc 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -32,69 +32,17 @@ namespace LAMMPS_NS { class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { public: AtomVecSphereKokkos(class LAMMPS *); + void init() override; void grow(int) override; void grow_pointers() override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - - int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist, - const DAT::tdual_double_2d_lr &buf, - const int &pbc_flag, const int pbc[]) override; - void unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf) override; - int pack_comm_self(const int &n, const DAT::tdual_int_1d &list, - const int nfirst, - const int &pbc_flag, const int pbc[]) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_border_vel_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; private: double **torque; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - DAT::t_kkfloat_1d d_radius; - HAT::t_kkfloat_1d h_radius; - DAT::t_kkfloat_1d d_rmass; - HAT::t_kkfloat_1d h_rmass; - DAT::t_kkfloat_1d_3 d_omega; - HAT::t_kkfloat_1d_3 h_omega; - DAT::t_kkacc_1d_3 d_torque; - HAT::t_kkacc_1d_3 h_torque; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 5b79e803669..9c004fec140 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -41,7 +41,17 @@ static constexpr int DELTA = 10; AtomVecSpinKokkos::AtomVecSpinKokkos(LAMMPS *lmp) : AtomVec(lmp), AtomVecKokkos(lmp), AtomVecSpin(lmp) { +} + +/* ---------------------------------------------------------------------- + process field strings to initialize data structs for all other methods +------------------------------------------------------------------------- */ +void AtomVecSpinKokkos::init() +{ + AtomVecSpin::init(); + + set_atom_masks(); } /* ---------------------------------------------------------------------- @@ -147,442 +157,6 @@ void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK); } -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackComm { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _list; - double _xprd,_yprd,_zprd,_xy,_xz,_yz; - double _pbc[6]; - - AtomVecSpinKokkos_PackComm( - const typename DAT::ttransform_kkfloat_1d_3_lr &x, - const typename DAT::ttransform_kkfloat_1d_4 &sp, - const typename DAT::tdual_double_2d_lr &buf, - const typename DAT::tdual_int_1d &list, - const double &xprd, const double &yprd, const double &zprd, - const double &xy, const double &xz, const double &yz, const int* const pbc): - _x(x.view()),_sp(sp.view()), - _list(list.view()), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().extent(0)*buf.view().extent(1))/3; - const size_t elements = 7; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - _buf(i,3) = _sp(j,0); - _buf(i,4) = _sp(j,1); - _buf(i,5) = _sp(j,2); - _buf(i,6) = _sp(j,3); - } - } - } -}; - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - typename AT::t_double_2d_lr _buf; - const typename AT::t_int_1d_const _list; - const typename AT::t_kkfloat_1d_3_lr_randomread _x; - const typename AT::t_tagint_1d _tag; - const typename AT::t_int_1d _type; - const typename AT::t_int_1d _mask; - const typename AT::t_kkfloat_1d_4_randomread _sp; - double _dx,_dy,_dz; - - AtomVecSpinKokkos_PackBorder( - const typename AT::t_double_2d_lr &buf, - const typename AT::t_int_1d_const &list, - const typename AT::t_kkfloat_1d_3_lr &x, - const typename AT::t_tagint_1d &tag, - const typename AT::t_int_1d &type, - const typename AT::t_int_1d &mask, - const typename AT::t_kkfloat_1d_4 &sp, - const double &dx, const double &dy, const double &dz): - _buf(buf),_list(list), - _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - _buf(i,3) = d_ubuf(_tag(j)).d; - _buf(i,4) = d_ubuf(_type(j)).d; - _buf(i,5) = d_ubuf(_mask(j)).d; - _buf(i,6) = _sp(j,0); - _buf(i,7) = _sp(j,1); - _buf(i,8) = _sp(j,2); - _buf(i,9) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - double dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - - } else { - dx = dy = dz = 0; - if(space==Host) { - AtomVecSpinKokkos_PackBorder f( - buf.view_host(), k_sendlist.view_host(), - h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } else { - AtomVecSpinKokkos_PackBorder f( - buf.view_device(), k_sendlist.view_device(), - d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz); - Kokkos::parallel_for(n,f); - } - } - return n*size_border; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackBorder { - typedef DeviceType device_type; - typedef ArrayTypes AT; - - const typename AT::t_double_2d_lr_const _buf; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_kkfloat_1d_4 _sp; - int _first; - - - AtomVecSpinKokkos_UnpackBorder( - const typename AT::t_double_2d_lr_const &buf, - typename AT::t_kkfloat_1d_3_lr &x, - typename AT::t_tagint_1d &tag, - typename AT::t_int_1d &type, - typename AT::t_int_1d &mask, - typename AT::t_kkfloat_1d_4 &sp, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),_first(first){ - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = (tagint) d_ubuf(_buf(i,3)).i; - _type(i+_first) = (int) d_ubuf(_buf(i,4)).i; - _mask(i+_first) = (int) d_ubuf(_buf(i,5)).i; - _sp(i+_first,0) = _buf(i,6); - _sp(i+_first,1) = _buf(i,7); - _sp(i+_first,2) = _buf(i,8); - _sp(i+_first,3) = _buf(i,9); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecSpinKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_double_2d_lr &buf,ExecutionSpace space) { - if (first+n >= nmax) { - grow(first+n+100); - } - if(space==Host) { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view_host(),h_x,h_tag,h_type,h_mask,h_sp,first); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecSpinKokkos_UnpackBorder - f(buf.view_device(),d_x,d_tag,d_type,d_mask,d_sp,first); - Kokkos::parallel_for(n,f); - } - atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|SP_MASK); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr_randomread _x; - typename AT::t_kkfloat_1d_3_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_kkfloat_1d_4_randomread _sp; - typename AT::t_kkfloat_1d_3_lr _xw; - typename AT::t_kkfloat_1d_3 _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_kkfloat_1d_4 _spw; - - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _size_exchange; - - AtomVecSpinKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d sendlist, - DAT::tdual_int_1d copylist): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _spw(atom->k_sp.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)* - buf.template view().extent(1))/_size_exchange; - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = _size_exchange; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = d_ubuf(_tag[i]).d; - _buf(mysend,8) = d_ubuf(_type[i]).d; - _buf(mysend,9) = d_ubuf(_mask[i]).d; - _buf(mysend,10) = d_ubuf(_image[i]).d; - _buf(mysend,11) = _sp(i,0); - _buf(mysend,12) = _sp(i,1); - _buf(mysend,13) = _sp(i,2); - _buf(mysend,14) = _sp(i,3); - const int j = _copylist(mysend); - - if (j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw(i) = _tag(j); - _typew(i) = _type(j); - _maskw(i) = _mask(j); - _imagew(i) = _image(j); - _spw(i,0) = _sp(j,0); - _spw(i,1) = _sp(j,1); - _spw(i,2) = _sp(j,2); - _spw(i,3) = _sp(j,3); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &k_buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) -{ - size_exchange = 15; - - if (nsend > (int) (k_buf.view_host().extent(0)*k_buf.view_host().extent(1))/size_exchange) { - int newsize = nsend*size_exchange/k_buf.view_host().extent(1)+1; - k_buf.resize(newsize,k_buf.view_host().extent(1)); - } - if (space == HostKK) { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } else { - AtomVecSpinKokkos_PackExchangeFunctor - f(atomKK,k_buf,k_sendlist,k_copylist); - Kokkos::parallel_for(nsend,f); - return nsend*size_exchange; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecSpinKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_kkfloat_1d_3_lr _x; - typename AT::t_kkfloat_1d_3 _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_kkfloat_1d_4 _sp; - typename AT::t_double_2d_lr_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - double _lo,_hi; - int _size_exchange; - - AtomVecSpinKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const DAT::tdual_double_2d_lr buf, - DAT::tdual_int_1d nlocal, - int dim, double lo, double hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _sp(atom->k_sp.view()), - _nlocal(nlocal.template view()), - _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) { - const int maxsendlist = (buf.template view().extent(0)*buf.template view().extent(1))/_size_exchange; - - buffer_view(_buf,buf,maxsendlist,_size_exchange); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - double x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; - _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; - _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; - _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; - _sp(i,0) = _buf(myrecv,11); - _sp(i,1) = _buf(myrecv,12); - _sp(i,2) = _buf(myrecv,13); - _sp(i,3) = _buf(myrecv,14); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, int nlocal, - int dim, double lo, double hi, ExecutionSpace space, - DAT::tdual_int_1d &/*k_indices*/) -{ - while (nlocal + nrecv/size_exchange >= nmax) grow(0); - - if(space == HostKK) { - k_count.view_host()(0) = nlocal; - AtomVecSpinKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - return k_count.view_host()(0); - } else { - k_count.view_host()(0) = nlocal; - k_count.modify_host(); - k_count.sync_device(); - AtomVecSpinKokkos_UnpackExchangeFunctor - f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/size_exchange,f); - k_count.modify_device(); - k_count.sync_host(); - - return k_count.view_host()(0); - } -} - /* ---------------------------------------------------------------------- clear extra forces starting at atom N nbytes = # of bytes to clear for a per-atom vector @@ -617,7 +191,7 @@ void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes) /* ---------------------------------------------------------------------- */ -void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) +void AtomVecSpinKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync_device(); @@ -657,7 +231,7 @@ void AtomVecSpinKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) +void AtomVecSpinKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (mask & X_MASK) atomKK->k_x.modify_device(); @@ -695,7 +269,7 @@ void AtomVecSpinKokkos::modified(ExecutionSpace space, unsigned int mask) } } -void AtomVecSpinKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void AtomVecSpinKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & X_MASK) && atomKK->k_x.need_sync_device()) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 1b20614a768..75d5e77ea98 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -32,50 +32,15 @@ namespace LAMMPS_NS { class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { public: AtomVecSpinKokkos(class LAMMPS *); + void init() override; + void grow(int) override; void grow_pointers() override; void force_clear(int, size_t) override; void sort_kokkos(Kokkos::BinSort &Sorter) override; - int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, - DAT::tdual_double_2d_lr buf, - int pbc_flag, int *pbc, ExecutionSpace space) override; - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_double_2d_lr &buf, - ExecutionSpace space) override; - int pack_exchange_kokkos(const int &nsend,DAT::tdual_double_2d_lr &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space) override; - int unpack_exchange_kokkos(DAT::tdual_double_2d_lr &k_buf, int nrecv, - int nlocal, int dim, double lo, double hi, - ExecutionSpace space, - DAT::tdual_int_1d &k_indices) override; - - void sync(ExecutionSpace space, unsigned int mask) override; - void modified(ExecutionSpace space, unsigned int mask) override; - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0) override; - - protected: - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - - DAT::t_kkfloat_1d_3_lr d_x; - DAT::t_kkfloat_1d_3 d_v; - DAT::t_kkacc_1d_3 d_f; - - DAT::t_kkfloat_1d_4 d_sp; - DAT::t_kkacc_1d_3 d_fm; - DAT::t_kkacc_1d_3 d_fm_long; - - HAT::t_kkfloat_1d_4 h_sp; - HAT::t_kkacc_1d_3 h_fm; - HAT::t_kkacc_1d_3 h_fm_long; + void sync(ExecutionSpace space, uint64_t mask) override; + void modified(ExecutionSpace space, uint64_t mask) override; + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0) override; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 20510ba2844..82a346b4457 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -115,16 +115,6 @@ void CommKokkos::init() reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommBrick::init(); - - if (!comm_f_only) {// not all Kokkos atom_vec styles have reverse pack/unpack routines yet - reverse_comm_legacy = true; - lmp->kokkos->reverse_comm_legacy = 1; - } - - if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) { // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet - forward_comm_legacy = true; - lmp->kokkos->forward_comm_legacy = 1; - } } /* ---------------------------------------------------------------------- @@ -142,18 +132,15 @@ void CommKokkos::forward_comm(int dummy) k_sendlist.sync_host(); - if (comm_x_only) { - atomKK->sync(Host,X_MASK); - atomKK->modified(Host,X_MASK); - } else if (ghost_velocity) { - atomKK->sync(Host,X_MASK | V_MASK); - atomKK->modified(Host,X_MASK | V_MASK); - } else { - atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); - } - + if (ghost_velocity) + atomKK->sync(Host,atomKK->avecKK->datamask_comm_vel); + else + atomKK->sync(Host,atomKK->avecKK->datamask_comm); CommBrick::forward_comm(dummy); + if (ghost_velocity) + atomKK->modified(Host,atomKK->avecKK->datamask_comm_vel); + else + atomKK->modified(Host,atomKK->avecKK->datamask_comm); } /* ---------------------------------------------------------------------- */ @@ -191,14 +178,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(), n,MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) + if (size_forward_recv[iswap]) { MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else if (ghost_velocity) { if (size_forward_recv[iswap]) { @@ -209,14 +198,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType().fence(); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view().data(), @@ -225,13 +216,16 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); - if (n) + if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } atomKK->avecKK->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType().fence(); } } else { if (!ghost_velocity) { @@ -239,15 +233,12 @@ void CommKokkos::forward_comm_device() auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_self(sendnum[iswap],k_sendlist_iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); } } else { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType().fence(); } } } @@ -269,17 +260,9 @@ void CommKokkos::reverse_comm() k_sendlist.sync_host(); - if (comm_f_only) - atomKK->sync(Host,F_MASK); - else - atomKK->sync(Host,ALL_MASK); - + atomKK->sync(Host,atomKK->avecKK->datamask_reverse); CommBrick::reverse_comm(); - - if (comm_f_only) - atomKK->modified(Host,F_MASK); - else - atomKK->modified(Host,ALL_MASK); + atomKK->modified(Host,atomKK->avecKK->datamask_reverse); } /* ---------------------------------------------------------------------- */ @@ -307,11 +290,14 @@ void CommKokkos::reverse_comm_device() buf = (double*)atomKK->k_f.view().data() + firstrecv[iswap]*atomKK->k_f.view().extent(1); + DeviceType().fence(); MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, recvproc[iswap],0,world); } - if (size_reverse_recv[iswap]) + if (size_reverse_recv[iswap]) { MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else { if (size_reverse_recv[iswap]) @@ -319,16 +305,19 @@ void CommKokkos::reverse_comm_device() size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); n = atomKK->avecKK->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType().fence(); - if (n) + if (n) { + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),n, MPI_DOUBLE,recvproc[iswap],0,world); - if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (size_reverse_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_recv); - DeviceType().fence(); } else { if (sendnum[iswap]) { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); @@ -386,7 +375,6 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = fixKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send_fix,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -408,9 +396,15 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) MPI_Irecv(buf_recv_fix,nsize*recvnum[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } - if (sendnum[iswap]) + if (sendnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_fix,n,MPI_DOUBLE,sendproc[iswap],0,world); - if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (recvnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_fix.modify_host(); @@ -422,7 +416,6 @@ void CommKokkos::forward_comm_device(Fix *fix, int size) // unpack buffer fixKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); - DeviceType().fence(); } } @@ -561,7 +554,6 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -583,9 +575,15 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) MPI_Irecv(buf_recv_pair,nsize*recvnum[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } - if (sendnum[iswap]) + if (sendnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); - if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (recvnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_pair.modify_host(); @@ -597,7 +595,6 @@ void CommKokkos::forward_comm_device(Pair *pair, int size) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_tmp); - DeviceType().fence(); } } @@ -657,7 +654,6 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) // pack buffer n = pairKKBase->pack_reverse_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send_pair); - DeviceType().fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -677,9 +673,14 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) if (sendproc[iswap] != me) { if (sendnum[iswap]) MPI_Irecv(buf_recv_pair,nsize*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world,&request); - if (recvnum[iswap]) + if (recvnum[iswap]) { + DeviceType().fence(); MPI_Send(buf_send_pair,n,MPI_DOUBLE,recvproc[iswap],0,world); - if (sendnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + } + if (sendnum[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_pair.modify_host(); @@ -693,7 +694,6 @@ void CommKokkos::reverse_comm_device(Pair *pair, int size) auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist_iswap, k_buf_tmp); - DeviceType().fence(); } } @@ -742,17 +742,10 @@ void CommKokkos::exchange() } } - if (!atomKK->avecKK->unpack_exchange_indices_flag || !flag) { - if (!atomKK->avecKK->unpack_exchange_indices_flag) { - if (comm->me == 0) { - error->warning(FLERR,"Atom style not compatible with fix sending data in Kokkos communication, " - "switching to legacy exchange/border communication"); - } - } else if (!flag) { - if (comm->me == 0) { - error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " - "switching to legacy exchange/border communication"); - } + if (!flag) { + if (comm->me == 0) { + error->warning(FLERR,"Fix with atom-based arrays not compatible with sending data in Kokkos communication, " + "switching to legacy exchange/border communication"); } exchange_comm_legacy = true; lmp->kokkos->exchange_comm_legacy = 1; @@ -766,12 +759,12 @@ void CommKokkos::exchange() return; } - atomKK->sync(Host,ALL_MASK); + atomKK->sync(Host,atomKK->avecKK->datamask_exchange); int prev_auto_sync = lmp->kokkos->auto_sync; lmp->kokkos->auto_sync = 1; CommBrick::exchange(); lmp->kokkos->auto_sync = prev_auto_sync; - atomKK->modified(Host,ALL_MASK); + atomKK->modified(Host,atomKK->avecKK->datamask_exchange); } /* ---------------------------------------------------------------------- */ @@ -845,7 +838,7 @@ void CommKokkos::exchange_device() subhi = domain->subhi_lamda; } - atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); // loop over dimensions for (int dim = 0; dim < 3; dim++) { @@ -913,7 +906,6 @@ void CommKokkos::exchange_device() atomKK->avecKK->pack_exchange_kokkos(count,k_buf_send, k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); - DeviceType().fence(); atom->nlocal = nlocal; // send/recv atoms in both directions @@ -940,21 +932,26 @@ void CommKokkos::exchange_device() MPI_Irecv(k_buf_recv.view().data(),nrecv1, MPI_DOUBLE,procneigh[dim][1],0, world,&request); + + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),nsend, MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); if (procgrid[dim] > 2) { MPI_Irecv(k_buf_recv.view().data()+nrecv1, nrecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); + MPI_Send(k_buf_send.view().data(),nsend, MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); } + DeviceType().fence(); if (nrecv) { - if (atom->nextra_grow) { if ((int) k_indices.extent(0) < nrecv/data_size) MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size); @@ -965,8 +962,6 @@ void CommKokkos::exchange_device() atom->nlocal = atomKK->avecKK-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space,k_indices); - - DeviceType().fence(); } } @@ -982,7 +977,6 @@ void CommKokkos::exchange_device() nextrasend = kkbase->pack_exchange_kokkos( count,k_buf_send,k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice::space); - DeviceType().fence(); } int nextrarecv,nextrarecv1,nextrarecv2; @@ -1007,31 +1001,35 @@ void CommKokkos::exchange_device() MPI_Irecv(k_buf_recv.view().data(),nextrarecv1, MPI_DOUBLE,procneigh[dim][1],0, world,&request); + + DeviceType().fence(); MPI_Send(k_buf_send.view().data(),nextrasend, MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); if (procgrid[dim] > 2) { MPI_Irecv(k_buf_recv.view().data()+nextrarecv1, nextrarecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); + MPI_Send(k_buf_send.view().data(),nextrasend, MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); } + DeviceType().fence(); if (nextrarecv) { kkbase->unpack_exchange_kokkos( k_buf_recv,k_indices,nrecv/data_size, nrecv1/data_size,nextrarecv1, ExecutionSpaceFromDevice::space); - DeviceType().fence(); } } } } } - atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); } if (atom->firstgroupname) { @@ -1056,8 +1054,7 @@ void CommKokkos::borders() { if (!exchange_comm_legacy) { - if (atom->nextra_border || mode != Comm::SINGLE || bordergroup || - (ghost_velocity && atomKK->avecKK->no_border_vel_flag)) { + if (atom->nextra_border || mode != Comm::SINGLE || bordergroup) { if (comm->me == 0) { error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " @@ -1072,14 +1069,20 @@ void CommKokkos::borders() if (exchange_comm_on_host) borders_device(); else borders_device(); } else { - atomKK->sync(Host,ALL_MASK); + if (ghost_velocity) + atomKK->sync(Host,atomKK->avecKK->datamask_border_vel); + else + atomKK->sync(Host,atomKK->avecKK->datamask_border); k_sendlist.sync_host(); int prev_auto_sync = lmp->kokkos->auto_sync; lmp->kokkos->auto_sync = 1; CommBrick::borders(); lmp->kokkos->auto_sync = prev_auto_sync; k_sendlist.modify_host(); - atomKK->modified(Host,ALL_MASK); + if (ghost_velocity) + atomKK->modified(Host,atomKK->avecKK->datamask_border_vel); + else + atomKK->modified(Host,atomKK->avecKK->datamask_border); } if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_legacy) @@ -1140,16 +1143,15 @@ struct BuildBorderListFunctor { template void CommKokkos::borders_device() { - int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax; - int nsend,nrecv,sendflag,nfirst,nlast,ngroup; + int n,iswap,dim,ineed,twoneed,smax,rmax; + int nsend,nrecv,sendflag,nfirst,nlast; double lo,hi; - int *type; - double **x; double *mlo,*mhi; MPI_Request request; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - atomKK->sync(exec_space,ALL_MASK); + atomKK->sync(exec_space,X_MASK); + k_sendlist.sync(); int team_size = 1; @@ -1172,12 +1174,10 @@ void CommKokkos::borders_device() { // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in list for use in future timesteps - x = atom->x; if (mode == Comm::SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { - type = atom->type; mlo = multilo[iswap]; mhi = multihi[iswap]; } @@ -1238,19 +1238,19 @@ void CommKokkos::borders_device() { } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); - for (i = nfirst; i < nlast; i++) { + /*for (i = nfirst; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } - } + }*/ } } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); - if (mode == Comm::SINGLE) { + /*if (mode == Comm::SINGLE) { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { @@ -1278,7 +1278,7 @@ void CommKokkos::borders_device() { sendlist[iswap][nsend++] = i; } } - } + }*/ } } @@ -1291,13 +1291,11 @@ void CommKokkos::borders_device() { n = atomKK->avecKK-> pack_border_vel_kokkos(nsend,k_sendlist_iswap,k_buf_send, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType().fence(); } else { auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL); n = atomKK->avecKK-> pack_border_kokkos(nsend,k_sendlist_iswap,k_buf_send, pbc_flag[iswap],pbc[iswap],exec_space); - DeviceType().fence(); } // swap atoms with other proc @@ -1312,9 +1310,15 @@ void CommKokkos::borders_device() { if (nrecv) MPI_Irecv(k_buf_recv.view().data(), nrecv*size_border,MPI_DOUBLE, recvproc[iswap],0,world,&request); - if (n) MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - if (nrecv) MPI_Wait(&request,MPI_STATUS_IGNORE); + if (n) { + DeviceType().fence(); + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + } + if (nrecv) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + DeviceType().fence(); + } } else { nrecv = nsend; } @@ -1325,21 +1329,17 @@ void CommKokkos::borders_device() { if (sendproc[iswap] != me) { atomKK->avecKK->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType().fence(); } else { atomKK->avecKK->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType().fence(); } } else { if (sendproc[iswap] != me) { atomKK->avecKK->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - DeviceType().fence(); } else { atomKK->avecKK->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); - DeviceType().fence(); } } // set all pointers & counters @@ -1364,8 +1364,6 @@ void CommKokkos::borders_device() { max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv_kokkos(max); - atomKK->modified(exec_space,ALL_MASK); - // reset global->local map if (map_style != Atom::MAP_NONE) diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp index 3a542681233..9664f750621 100644 --- a/src/KOKKOS/comm_tiled_kokkos.cpp +++ b/src/KOKKOS/comm_tiled_kokkos.cpp @@ -82,11 +82,6 @@ void CommTiledKokkos::init() reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host; CommTiled::init(); - - if (!comm_f_only) { // not all Kokkos atom_vec styles have reverse pack/unpack routines yet - reverse_comm_legacy = true; - lmp->kokkos->reverse_comm_legacy = 1; - } } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/dynamical_matrix_kokkos.cpp b/src/KOKKOS/dynamical_matrix_kokkos.cpp index 57aca408921..83a5015dd82 100644 --- a/src/KOKKOS/dynamical_matrix_kokkos.cpp +++ b/src/KOKKOS/dynamical_matrix_kokkos.cpp @@ -165,7 +165,7 @@ void DynamicalMatrixKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_host = 0; + uint64_t datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 3d1e672d74f..82dbdc3aae6 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -129,7 +129,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) +void FixPropertyAtomKokkos::sync(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync_device(); @@ -151,7 +151,7 @@ void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag) +void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag) { if (space == Device) { if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync_device()) @@ -176,7 +176,7 @@ void FixPropertyAtomKokkos::sync_pinned(ExecutionSpace space, unsigned int mask, /* ---------------------------------------------------------------------- */ -void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask) +void FixPropertyAtomKokkos::modified(ExecutionSpace space, uint64_t mask) { if (space == Device) { if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify_device(); diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h index 25d4001b419..887e09d5d98 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.h +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -33,9 +33,9 @@ class FixPropertyAtomKokkos : public FixPropertyAtom { ~FixPropertyAtomKokkos() override; void grow_arrays(int) override; - void sync(ExecutionSpace space, unsigned int mask); - void modified(ExecutionSpace space, unsigned int mask); - void sync_pinned(ExecutionSpace space, unsigned int mask, int async_flag = 0); + void sync(ExecutionSpace space, uint64_t mask); + void modified(ExecutionSpace space, uint64_t mask); + void sync_pinned(ExecutionSpace space, uint64_t mask, int async_flag = 0); private: int dvector_flag; diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index b84fe0e33cf..fad2aba98cd 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -426,7 +426,7 @@ template static std::enable_if_t realloc_kokkos(TYPE &data, const char *name, Indices... ns) { data = TYPE(); - data = TYPE(std::string(name), ns...); + data = TYPE(Kokkos::NoInit(std::string(name)), ns...); } template @@ -434,7 +434,7 @@ static std::enable_if_t::grow_neigharrays() { /* ---------------------------------------------------------------------- */ template -void MLIAPDataKokkos::modified(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync) { +void MLIAPDataKokkos::modified(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync) { if (space == Device) { if (mask & IATOMS_MASK ) k_iatoms .modify_device(); if (mask & IELEMS_MASK ) k_ielems .modify_device(); @@ -316,7 +316,7 @@ void MLIAPDataKokkos::modified(ExecutionSpace space, unsigned int ma /* ---------------------------------------------------------------------- */ template -void MLIAPDataKokkos::sync(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync) { +void MLIAPDataKokkos::sync(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync) { if (space == Device) { if (lmp->kokkos->auto_sync && !ignore_auto_sync) modified(Host, mask, true); diff --git a/src/KOKKOS/mliap_data_kokkos.h b/src/KOKKOS/mliap_data_kokkos.h index 5ca22fc1705..b34d8e099e1 100644 --- a/src/KOKKOS/mliap_data_kokkos.h +++ b/src/KOKKOS/mliap_data_kokkos.h @@ -66,9 +66,9 @@ template class MLIAPDataKokkos : public MLIAPData { void generate_neighdata(class NeighList *, int = 0, int = 0) override; void grow_neigharrays() override; - void modified(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync = false); + void modified(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync = false); - void sync(ExecutionSpace space, unsigned int mask, bool ignore_auto_sync = false); + void sync(ExecutionSpace space, uint64_t mask, bool ignore_auto_sync = false); PairMLIAPKokkos *k_pairmliap; diff --git a/src/KOKKOS/neigh_bond_kokkos.h b/src/KOKKOS/neigh_bond_kokkos.h index 852dd984053..650df4b9579 100644 --- a/src/KOKKOS/neigh_bond_kokkos.h +++ b/src/KOKKOS/neigh_bond_kokkos.h @@ -75,7 +75,7 @@ class NeighBondKokkos : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read,datamask_modify; + uint64_t datamask_read,datamask_modify; int maxbond,maxangle,maxdihedral,maximproper; // size of bond lists int me,nprocs; diff --git a/src/KOKKOS/third_order_kokkos.cpp b/src/KOKKOS/third_order_kokkos.cpp index 06d68934b38..b52dd07fee8 100644 --- a/src/KOKKOS/third_order_kokkos.cpp +++ b/src/KOKKOS/third_order_kokkos.cpp @@ -165,7 +165,7 @@ void ThirdOrderKokkos::update_force() } bool execute_on_host = false; - unsigned int datamask_read_host = 0; + uint64_t datamask_read_host = 0; if (pair_compute_flag) { if (force->pair->execution_space==Host) { diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 81c42441919..a54d72db092 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -381,8 +381,8 @@ void VerletKokkos::run(int n) } bool execute_on_host = false; - unsigned int datamask_read_host = 0; - unsigned int datamask_exclude = 0; + uint64_t datamask_read_host = 0; + uint64_t datamask_exclude = 0; int allow_overlap = lmp->kokkos->allow_overlap; if (allow_overlap && atomKK->k_f.view_hostkk().data() != atomKK->k_f.view_device().data()) { diff --git a/src/angle.h b/src/angle.h index 58ca188df57..83ba6a236bc 100644 --- a/src/angle.h +++ b/src/angle.h @@ -43,7 +43,7 @@ class Angle : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Angle(class LAMMPS *); diff --git a/src/atom_masks.h b/src/atom_masks.h index 0058c3f57f7..009c5924d68 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -17,93 +17,53 @@ // per-atom data masks -#define EMPTY_MASK 0x00000000 -#define ALL_MASK 0xffffffff -#define SAMETAG_MASK 0x40000000 -#define EXTENDED_MASK 0x80000000 +#define EMPTY_MASK 0x0000000000000000 +#define ALL_MASK 0xffffffffffffffff // standard -#define X_MASK 0x00000001 -#define V_MASK 0x00000002 -#define F_MASK 0x00000004 -#define TAG_MASK 0x00000008 -#define TYPE_MASK 0x00000010 -#define MASK_MASK 0x00000020 -#define IMAGE_MASK 0x00000040 -#define Q_MASK 0x00000080 -#define MOLECULE_MASK 0x00000100 -#define RMASS_MASK 0x00000200 -#define BOND_MASK 0x00000400 -#define ANGLE_MASK 0x00000800 -#define DIHEDRAL_MASK 0x00001000 -#define IMPROPER_MASK 0x00002000 -#define SPECIAL_MASK 0x00004000 -#define MAP_MASK 0x00008000 -#define ENERGY_MASK 0x00010000 -#define VIRIAL_MASK 0x00020000 -#define MU_MASK 0x00040000 +#define X_MASK 0x0000000000000001 +#define V_MASK 0x0000000000000002 +#define F_MASK 0x0000000000000004 +#define TAG_MASK 0x0000000000000008 +#define TYPE_MASK 0x0000000000000010 +#define MASK_MASK 0x0000000000000020 +#define IMAGE_MASK 0x0000000000000040 +#define Q_MASK 0x0000000000000080 +#define MOLECULE_MASK 0x0000000000000100 +#define RMASS_MASK 0x0000000000000200 +#define BOND_MASK 0x0000000000000400 +#define ANGLE_MASK 0x0000000000000800 +#define DIHEDRAL_MASK 0x0000000000001000 +#define IMPROPER_MASK 0x0000000000002000 +#define SPECIAL_MASK 0x0000000000004000 +#define ENERGY_MASK 0x0000000000008000 +#define VIRIAL_MASK 0x0000000000010000 +#define MU_MASK 0x0000000000020000 // SPIN -#define SP_MASK 0x00000001 -#define FM_MASK 0x00000002 -#define FML_MASK 0x00000004 +#define SP_MASK 0x0000000000040000 +#define FM_MASK 0x0000000000080000 +#define FML_MASK 0x0000000000100000 // DPD -#define DPDRHO_MASK 0x00040000 -#define DPDTHETA_MASK 0x00080000 -#define UCOND_MASK 0x00100000 -#define UMECH_MASK 0x00200000 -#define UCHEM_MASK 0x00400000 -#define UCG_MASK 0x00800000 -#define UCGNEW_MASK 0x01000000 -#define DUCHEM_MASK 0x02000000 -#define DVECTOR_MASK 0x04000000 +#define DPDRHO_MASK 0x0000000000200000 +#define DPDTHETA_MASK 0x0000000000400000 +#define UCOND_MASK 0x0000000000800000 +#define UMECH_MASK 0x0000000001000000 +#define UCHEM_MASK 0x0000000002000000 +#define UCG_MASK 0x0000000004000000 +#define UCGNEW_MASK 0x0000000008000000 +#define DUCHEM_MASK 0x0000000010000000 +#define DVECTOR_MASK 0x0000000020000000 // granular -#define RADIUS_MASK 0x00100000 -#define DENSITY_MASK 0x00200000 -#define OMEGA_MASK 0x00400000 -#define TORQUE_MASK 0x00800000 -#define ANGMOM_MASK 0x01000000 -#define GRANULAR_MASK 0x01f00000 - -// peridynamics - -#define VFRAC_MASK 0x00000001 -#define S0_MASK 0x00000002 -#define X0_MASK 0x00000004 -#define PERI_MASK 0x00000007 - -#define ELLIPSOID_MASK 0x00000008 -#define LINE_MASK 0x00000010 -#define TRI_MASK 0x00000020 - -// electron - -#define SPIN_MASK 0x00000100 -#define ERADIUS_MASK 0x00000200 -#define ERVEL_MASK 0x00000400 -#define ERFORCE_MASK 0x00000800 -#define ERVELFORCE_MASK 0x00001000 - -#define CS_MASK 0x00002000 -#define CSFORCE_MASK 0x00004000 -#define VFORCE_MASK 0x00008000 - -#define ELECTRON_MASK 0x0000ff00 - -// SPH - -#define ETAG_MASK 0x00010000 -#define RHO_MASK 0x00020000 -#define DRHO_MASK 0x00040000 -#define E_MASK 0x00080000 -#define DE_MASK 0x00100000 -#define VEST_MASK 0x00200000 -#define CV_MASK 0x00400000 +#define RADIUS_MASK 0x0000000040000000 +#define OMEGA_MASK 0x0000000080000000 +#define TORQUE_MASK 0x0000000100000000 +#define ANGMOM_MASK 0x0000000200000000 #endif diff --git a/src/bond.h b/src/bond.h index 3b1740b2d42..c844dc9acb0 100644 --- a/src/bond.h +++ b/src/bond.h @@ -48,7 +48,7 @@ class Bond : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Bond(class LAMMPS *); diff --git a/src/compute.h b/src/compute.h index aee4a676ecd..8b0d50946cf 100644 --- a/src/compute.h +++ b/src/compute.h @@ -106,7 +106,7 @@ class Compute : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; diff --git a/src/dihedral.h b/src/dihedral.h index 7ca3379e44c..72895f0b11f 100644 --- a/src/dihedral.h +++ b/src/dihedral.h @@ -43,7 +43,7 @@ class Dihedral : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Dihedral(class LAMMPS *); diff --git a/src/fix.h b/src/fix.h index ebf52241711..950cb378704 100644 --- a/src/fix.h +++ b/src/fix.h @@ -135,7 +135,7 @@ class Fix : protected Pointers { int fuse_integrate_flag; // 1 if can fuse initial integrate with final integrate int sort_device; // 1 if sort on Device ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; Fix(class LAMMPS *, int, char **); ~Fix() override; diff --git a/src/improper.h b/src/improper.h index 0f539bdecee..da0ae1276f8 100644 --- a/src/improper.h +++ b/src/improper.h @@ -48,7 +48,7 @@ class Improper : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode, kokkosable; Improper(class LAMMPS *); diff --git a/src/kspace.h b/src/kspace.h index 0a3db05ee7a..f083b99a0fd 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -125,7 +125,7 @@ class KSpace : protected Pointers { // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int copymode; int compute_flag; // 0 if skip compute() diff --git a/src/pair.h b/src/pair.h index 0a5d2bc3b38..8d50a1760d1 100644 --- a/src/pair.h +++ b/src/pair.h @@ -124,7 +124,7 @@ class Pair : protected Pointers { // KOKKOS flags and variables ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + uint64_t datamask_read, datamask_modify; int kokkosable; // 1 if Kokkos pair int reverse_comm_device; // 1 if reverse comm on Device int fuse_force_clear_flag; // 1 if can fuse force clear with force compute From 0ef299144c88f333d1de9bcf091adeb4a0f74cc9 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 11 Nov 2025 18:30:07 -0500 Subject: [PATCH 241/604] improve scalar/vector print output only include indices when compute output is a vector --- src/compute_reduce.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 42d36c657e2..0d6aa14177b 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -697,11 +697,17 @@ std::string ComputeReduce::get_thermo_colname(int m) { if (replace && replace[m] >= 0) { auto &val1 = values[m]; auto &val2 = values[replace[m]]; - return fmt::format("c_{}:c_{}[{}]<-{}(c_{})", id, val1.id, val1.argindex, modestr, val2.id); + std::string val1string = fmt::format("c_{}", val1.id); + if (val1.argindex) val1string += fmt::format("[{}]", val1.argindex); + std::string val2string = fmt::format("c_{}", val2.id); + if (val2.argindex) val2string += fmt::format("[{}]", val2.argindex); + return fmt::format("c_{}:{}<-{}({}})", id, val1string, modestr, val2string); } else { if (m == -1) m = 0; // scalar auto &val = values[m]; - return fmt::format("c_{}:{}(c_{}[{}])", id, modestr, val.id, val.argindex); + std::string valstring = fmt::format("c_{}", val.id); + if (val.argindex) valstring += fmt::format("[{}]", val.argindex); + return fmt::format("c_{}:{}({})", id, modestr, valstring); } return "none"; } From 1cac5b2ae53e6afce2eb71081e79e0861350fc1f Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Tue, 11 Nov 2025 20:00:11 -0500 Subject: [PATCH 242/604] typo --- src/compute_reduce.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 0d6aa14177b..430175de6de 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -701,7 +701,7 @@ std::string ComputeReduce::get_thermo_colname(int m) { if (val1.argindex) val1string += fmt::format("[{}]", val1.argindex); std::string val2string = fmt::format("c_{}", val2.id); if (val2.argindex) val2string += fmt::format("[{}]", val2.argindex); - return fmt::format("c_{}:{}<-{}({}})", id, val1string, modestr, val2string); + return fmt::format("c_{}:{}<-{}({})", id, val1string, modestr, val2string); } else { if (m == -1) m = 0; // scalar auto &val = values[m]; From 869df1874743ff62d54825a8a2f449ae3da53cef Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 20:44:28 -0500 Subject: [PATCH 243/604] enable kokkos_omp test for pair style hybrid/scaled --- unittest/force-styles/tests/mol-pair-hybrid-scaled.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/force-styles/tests/mol-pair-hybrid-scaled.yaml b/unittest/force-styles/tests/mol-pair-hybrid-scaled.yaml index f45c322e4ef..1b394008fac 100644 --- a/unittest/force-styles/tests/mol-pair-hybrid-scaled.yaml +++ b/unittest/force-styles/tests/mol-pair-hybrid-scaled.yaml @@ -2,7 +2,7 @@ lammps_version: 17 Feb 2022 date_generated: Fri Mar 18 22:17:30 2022 epsilon: 5e-14 -skip_tests: gpu intel kokkos_omp omp extract_omp +skip_tests: gpu intel omp extract_omp prerequisites: ! | atom full pair lj/cut From 695ceef20361aff260a8b807edd9c8d25c55ad56 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 22:32:32 -0500 Subject: [PATCH 244/604] add pair style lj96/cut/kk to improve code coverage for hybrid pair style in KOKKOS --- doc/src/Commands_pair.rst | 2 +- doc/src/pair_lj96.rst | 3 +- src/EXTRA-PAIR/pair_lj96_cut.cpp | 2 + src/EXTRA-PAIR/pair_lj96_cut.h | 2 +- src/KOKKOS/pair_lj96_cut_kokkos.cpp | 242 ++++++++++++++++++++++++++++ src/KOKKOS/pair_lj96_cut_kokkos.h | 114 +++++++++++++ 6 files changed, 362 insertions(+), 3 deletions(-) create mode 100644 src/KOKKOS/pair_lj96_cut_kokkos.cpp create mode 100644 src/KOKKOS/pair_lj96_cut_kokkos.h diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 0e2eb490039..4b0f2e32411 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -192,7 +192,7 @@ OPT. * :doc:`lj/smooth (go) ` * :doc:`lj/smooth/linear (o) ` * :doc:`lj/switch3/coulgauss/long ` - * :doc:`lj96/cut (go) ` + * :doc:`lj96/cut (gko) ` * :doc:`local/density ` * :doc:`lubricate (o) ` * :doc:`lubricate/poly (o) ` diff --git a/doc/src/pair_lj96.rst b/doc/src/pair_lj96.rst index 47000fe70aa..06390e6de2d 100644 --- a/doc/src/pair_lj96.rst +++ b/doc/src/pair_lj96.rst @@ -1,11 +1,12 @@ .. index:: pair_style lj96/cut .. index:: pair_style lj96/cut/gpu +.. index:: pair_style lj96/cut/kk .. index:: pair_style lj96/cut/omp pair_style lj96/cut command =========================== -Accelerator Variants: *lj96/cut/gpu*, *lj96/cut/omp* +Accelerator Variants: *lj96/cut/gpu*, *lj96/cut/kk*, *lj96/cut/omp* Syntax """""" diff --git a/src/EXTRA-PAIR/pair_lj96_cut.cpp b/src/EXTRA-PAIR/pair_lj96_cut.cpp index 72470f97db6..fdd14fc75e9 100644 --- a/src/EXTRA-PAIR/pair_lj96_cut.cpp +++ b/src/EXTRA-PAIR/pair_lj96_cut.cpp @@ -49,6 +49,8 @@ PairLJ96Cut::PairLJ96Cut(LAMMPS *lmp) : Pair(lmp) PairLJ96Cut::~PairLJ96Cut() { + if (copymode) return; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/EXTRA-PAIR/pair_lj96_cut.h b/src/EXTRA-PAIR/pair_lj96_cut.h index 32ea8653a04..cab5e39f108 100644 --- a/src/EXTRA-PAIR/pair_lj96_cut.h +++ b/src/EXTRA-PAIR/pair_lj96_cut.h @@ -55,7 +55,7 @@ class PairLJ96Cut : public Pair { double **lj1, **lj2, **lj3, **lj4, **offset; double *cut_respa; - void allocate(); + virtual void allocate(); }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/pair_lj96_cut_kokkos.cpp b/src/KOKKOS/pair_lj96_cut_kokkos.cpp new file mode 100644 index 00000000000..c9c77e3ef28 --- /dev/null +++ b/src/KOKKOS/pair_lj96_cut_kokkos.cpp @@ -0,0 +1,242 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_lj96_cut_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +PairLJ96CutKokkos::PairLJ96CutKokkos(LAMMPS *lmp) : PairLJ96Cut(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairLJ96CutKokkos::~PairLJ96CutKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJ96CutKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = static_cast(force->special_lj[0]); + special_lj[1] = static_cast(force->special_lj[1]); + special_lj[2] = static_cast(force->special_lj[2]); + special_lj[3] = static_cast(force->special_lj[3]); + + // loop over neighbors of my atoms + + copymode = 1; + + EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); + + if (eflag_global) eng_vdwl += static_cast(ev.evdwl); + if (vflag_global) { + virial[0] += static_cast(ev.v[0]); + virial[1] += static_cast(ev.v[1]); + virial[2] += static_cast(ev.v[2]); + virial[3] += static_cast(ev.v[3]); + virial[4] += static_cast(ev.v[4]); + virial[5] += static_cast(ev.v[5]); + } + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.sync_host(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.sync_host(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +KK_FLOAT PairLJ96CutKokkos:: +compute_fpair(const KK_FLOAT &rsq, const int &, const int &, const int &itype, const int &jtype) const { + const KK_FLOAT r2inv = static_cast(1.0) / rsq; + const KK_FLOAT r6inv = r2inv*r2inv*r2inv; + const KK_FLOAT r3inv = sqrt(r6inv); + + const KK_FLOAT forcelj = r6inv * + ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r3inv - + (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2)); + + return forcelj*r2inv; +} + +template +template +KOKKOS_INLINE_FUNCTION +KK_FLOAT PairLJ96CutKokkos:: +compute_evdwl(const KK_FLOAT &rsq, const int &, const int &, const int &itype, const int &jtype) const { + const KK_FLOAT r2inv = static_cast(1.0) / rsq; + const KK_FLOAT r6inv = r2inv*r2inv*r2inv; + const KK_FLOAT r3inv = sqrt(r6inv); + + return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r3inv - + (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) - + (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairLJ96CutKokkos::allocate() +{ + PairLJ96Cut::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + k_params = Kokkos::DualView("PairLJ96Cut::params",n+1,n+1); + params = k_params.template view(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairLJ96CutKokkos::init_style() +{ + PairLJ96Cut::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairLJ96CutKokkos::init_one(int i, int j) +{ + double cutone = PairLJ96Cut::init_one(i,j); + + k_params.view_host()(i,j).lj1 = static_cast(lj1[i][j]); + k_params.view_host()(i,j).lj2 = static_cast(lj2[i][j]); + k_params.view_host()(i,j).lj3 = static_cast(lj3[i][j]); + k_params.view_host()(i,j).lj4 = static_cast(lj4[i][j]); + k_params.view_host()(i,j).offset = static_cast(offset[i][j]); + k_params.view_host()(i,j).cutsq = static_cast(cutone*cutone); + k_params.view_host()(j,i) = k_params.view_host()(i,j); + if (i(cutone*cutone); + } + + k_cutsq.view_host()(i,j) = k_cutsq.view_host()(j,i) = cutone*cutone; + k_cutsq.modify_host(); + k_params.modify_host(); + + return cutone; +} + +namespace LAMMPS_NS { +template class PairLJ96CutKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairLJ96CutKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_lj96_cut_kokkos.h b/src/KOKKOS/pair_lj96_cut_kokkos.h new file mode 100644 index 00000000000..a23536d6fd2 --- /dev/null +++ b/src/KOKKOS/pair_lj96_cut_kokkos.h @@ -0,0 +1,114 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(lj96/cut/kk,PairLJ96CutKokkos); +PairStyle(lj96/cut/kk/device,PairLJ96CutKokkos); +PairStyle(lj96/cut/kk/host,PairLJ96CutKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_LJ96_CUT_KOKKOS_H +#define LMP_PAIR_LJ96_CUT_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_lj96_cut.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairLJ96CutKokkos : public PairLJ96Cut { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + PairLJ96CutKokkos(class LAMMPS *); + ~PairLJ96CutKokkos() override; + + void compute(int, int) override; + + void init_style() override; + double init_one(int, int) override; + + struct params_lj{ + KOKKOS_INLINE_FUNCTION + params_lj() {cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + KOKKOS_INLINE_FUNCTION + params_lj(int /*i*/) {cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;}; + KK_FLOAT cutsq,lj1,lj2,lj3,lj4,offset; + }; + + protected: + template + KOKKOS_INLINE_FUNCTION + KK_FLOAT compute_fpair(const KK_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + KK_FLOAT compute_evdwl(const KK_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + KK_FLOAT compute_ecoul(const KK_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/, + const int& /*itype*/, const int& /*jtype*/) const { return 0; } + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + params_lj m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired to space for 12 atom types + KK_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_kkfloat_1d_3_lr_randomread x; + typename AT::t_kkfloat_1d_3_lr c_x; + typename AT::t_kkacc_1d_3 f; + typename AT::t_int_1d_randomread type; + + DAT::ttransform_kkacc_1d k_eatom; + DAT::ttransform_kkacc_1d_6 k_vatom; + typename AT::t_kkacc_1d d_eatom; + typename AT::t_kkacc_1d_6 d_vatom; + + int newton_pair; + KK_FLOAT special_lj[4]; + + DAT::ttransform_kkfloat_2d k_cutsq; + typename AT::t_kkfloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate() override; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend EV_FLOAT pair_compute_neighlist(PairLJ96CutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJ96CutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJ96CutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJ96CutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairLJ96CutKokkos*,NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairLJ96CutKokkos*); +}; + +} + +#endif +#endif + From 6cedae410197f4faa62d623b3facd03b134714fe Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 22:50:57 -0500 Subject: [PATCH 245/604] improve/correct description of running force style tests. update output. --- doc/src/Build_development.rst | 116 ++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 40 deletions(-) diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 6845079f8fe..833e702576e 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -4,6 +4,8 @@ Development build options The build procedures in LAMMPS offers a few extra options which are useful during development, testing or debugging. +.. contents:: + ---------- .. _compilation: @@ -273,21 +275,22 @@ Unit tests for force styles A large part of LAMMPS are different "styles" for computing non-bonded and bonded interactions selected through the :doc:`pair_style`, :doc:`bond_style`, :doc:`angle_style`, :doc:`dihedral_style`, -:doc:`improper_style`, and :doc:`kspace_style`. Since these all share -common interfaces, it is possible to write generic test programs that -will call those common interfaces for small test systems with less than -100 atoms and compare the results with pre-recorded reference results. -A test run is then a a collection multiple individual test runs each -with many comparisons to reference results based on template input -files, individual command settings, relative error margins, and -reference data stored in a YAML format file with ``.yaml`` -suffix. Currently the programs ``test_pair_style``, ``test_bond_style``, +:doc:`improper_style`, and :doc:`kspace_style` commands. Since these +styles all share common interfaces, it is possible to write generic test +programs that will assemble LAMMPS inputs from templates with different +settings and call those common interfaces for small test systems with +less than 100 atoms and compare the results with pre-recorded reference +results. A test run is then a collection of multiple individual test +runs, each with many comparisons to reference results based on template +input files, individual command settings, relative error margins, and +reference data stored in a YAML format file with ``.yaml`` suffix. +Currently the programs ``test_pair_style``, ``test_bond_style``, ``test_angle_style``, ``test_dihedral_style``, and ``test_improper_style`` are implemented. They will compare forces, energies and (global) stress for all atoms after a ``run 0`` calculation and after a few steps of MD with :doc:`fix nve `, each in multiple variants with different settings and also for multiple -accelerated styles. If a prerequisite style or package is missing, the +accelerated styles. If a prerequisite style or package is missing, the individual tests are skipped. All force style tests will be executed on a single MPI process, so using the CMake option ``-D BUILD_MPI=off`` can significantly speed up testing, since this will skip the MPI @@ -296,36 +299,69 @@ output: .. code-block:: console - $ test_pair_style mol-pair-lj_cut.yaml - [==========] Running 6 tests from 1 test suite. - [----------] Global test environment set-up. - [----------] 6 tests from PairStyle - [ RUN ] PairStyle.plain - [ OK ] PairStyle.plain (24 ms) - [ RUN ] PairStyle.omp - [ OK ] PairStyle.omp (18 ms) - [ RUN ] PairStyle.intel - [ OK ] PairStyle.intel (6 ms) - [ RUN ] PairStyle.opt - [ SKIPPED ] PairStyle.opt (0 ms) - [ RUN ] PairStyle.single - [ OK ] PairStyle.single (7 ms) - [ RUN ] PairStyle.extract - [ OK ] PairStyle.extract (6 ms) - [----------] 6 tests from PairStyle (62 ms total) - - [----------] Global test environment tear-down - [==========] 6 tests from 1 test suite ran. (63 ms total) - [ PASSED ] 5 tests. - [ SKIPPED ] 1 test, listed below: - [ SKIPPED ] PairStyle.opt - -In this particular case, 5 out of 6 sets of tests were conducted, the -tests for the ``lj/cut/opt`` pair style was skipped, since the tests -executable did not include it. To learn what individual tests are performed, -you (currently) need to read the source code. You can use code coverage -recording (see next section) to confirm how well the tests cover the code -paths in the individual source files. + $ ctest -R MolPairStyle:lj_cut$ -V + + [...] + + Start 199: MolPairStyle:lj_cut + + 199: Test command: /home/akohlmey/compile/lammps/build-test/test_pair_style "/home/akohlmey/compile/lammps/unittest/force-styles/tests/mol-pair-lj_cut.yaml" + 199: Working Directory: /home/akohlmey/compile/lammps/build-test/unittest/force-styles + 199: Environment variables: + 199: PYTHONPATH=/home/akohlmey/compile/lammps/unittest/force-styles/tests:/home/akohlmey/compile/lammps/python: + 199: PYTHONUNBUFFERED=1 + 199: PYTHONDONTWRITEBYTECODE=1 + 199: OMP_PROC_BIND=false + 199: OMP_NUM_THREADS=4 + 199: LAMMPS_POTENTIALS=/home/akohlmey/compile/lammps/potentials + 199: LD_LIBRARY_PATH=/home/akohlmey/compile/lammps/build-test:/usr/lib64/mpich/lib:/home/akohlmey/.local/lib:: + 199: Test timeout computed to be: 1500 + 199: [==========] Running 9 tests from 1 test suite. + 199: [----------] Global test environment set-up. + 199: [----------] 9 tests from PairStyle + 199: [ RUN ] PairStyle.plain + 199: [ OK ] PairStyle.plain (17 ms) + 199: [ RUN ] PairStyle.omp + 199: [ OK ] PairStyle.omp (3 ms) + 199: [ RUN ] PairStyle.kokkos_omp + 199: [ OK ] PairStyle.kokkos_omp (6 ms) + 199: [ RUN ] PairStyle.gpu + 199: /home/akohlmey/compile/lammps/unittest/force-styles/test_pair_style.cpp:793: Skipped + 199: + 199: + 199: [ SKIPPED ] PairStyle.gpu (0 ms) + 199: [ RUN ] PairStyle.intel + 199: [ OK ] PairStyle.intel (2 ms) + 199: [ RUN ] PairStyle.opt + 199: [ OK ] PairStyle.opt (2 ms) + 199: [ RUN ] PairStyle.single + 199: [ OK ] PairStyle.single (2 ms) + 199: [ RUN ] PairStyle.extract + 199: [ OK ] PairStyle.extract (1 ms) + 199: [ RUN ] PairStyle.extract_omp + 199: [ OK ] PairStyle.extract_omp (1 ms) + 199: [----------] 9 tests from PairStyle (37 ms total) + 199: + 199: [----------] Global test environment tear-down + 199: [==========] 9 tests from 1 test suite ran. (37 ms total) + 199: [ PASSED ] 8 tests. + 199: [ SKIPPED ] 1 test, listed below: + 199: [ SKIPPED ] PairStyle.gpu + 1/1 Test #199: MolPairStyle:lj_cut .............. Passed 0.75 sec + + The following tests passed: + MolPairStyle:lj_cut + + 100% tests passed, 0 tests failed out of 1 + + Total Test time (real) = 0.76 sec + +In this particular case, 8 out of 9 sets of tests were conducted, the +tests for the ``lj/cut/gpu`` pair style was skipped, since the LAMMPS +library linked to the test executable did not include it. To learn what +individual tests are performed, you (currently) need to read the source +code. You can use code coverage recording (see next section) to confirm +how well the tests cover the code paths in the individual source files. The force style test programs have a common set of options: From 4ef98b41612dc3b8f7a310d1e262a35133b44932 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Nov 2025 22:51:54 -0500 Subject: [PATCH 246/604] whitespace --- doc/src/Build_development.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 833e702576e..45f5a62de02 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -307,7 +307,7 @@ output: 199: Test command: /home/akohlmey/compile/lammps/build-test/test_pair_style "/home/akohlmey/compile/lammps/unittest/force-styles/tests/mol-pair-lj_cut.yaml" 199: Working Directory: /home/akohlmey/compile/lammps/build-test/unittest/force-styles - 199: Environment variables: + 199: Environment variables: 199: PYTHONPATH=/home/akohlmey/compile/lammps/unittest/force-styles/tests:/home/akohlmey/compile/lammps/python: 199: PYTHONUNBUFFERED=1 199: PYTHONDONTWRITEBYTECODE=1 @@ -327,8 +327,8 @@ output: 199: [ OK ] PairStyle.kokkos_omp (6 ms) 199: [ RUN ] PairStyle.gpu 199: /home/akohlmey/compile/lammps/unittest/force-styles/test_pair_style.cpp:793: Skipped - 199: - 199: + 199: + 199: 199: [ SKIPPED ] PairStyle.gpu (0 ms) 199: [ RUN ] PairStyle.intel 199: [ OK ] PairStyle.intel (2 ms) @@ -341,7 +341,7 @@ output: 199: [ RUN ] PairStyle.extract_omp 199: [ OK ] PairStyle.extract_omp (1 ms) 199: [----------] 9 tests from PairStyle (37 ms total) - 199: + 199: 199: [----------] Global test environment tear-down 199: [==========] 9 tests from 1 test suite ran. (37 ms total) 199: [ PASSED ] 8 tests. From ad85f815d233ee8d54d7103888bc40b6b9b8d11f Mon Sep 17 00:00:00 2001 From: Meg McCarthy Date: Tue, 11 Nov 2025 23:12:44 -0700 Subject: [PATCH 247/604] describe ctest cmd in detail --- doc/src/Build_development.rst | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 45f5a62de02..2e1b1cb6f8a 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -294,8 +294,11 @@ accelerated styles. If a prerequisite style or package is missing, the individual tests are skipped. All force style tests will be executed on a single MPI process, so using the CMake option ``-D BUILD_MPI=off`` can significantly speed up testing, since this will skip the MPI -initialization for each test run. Below is an example command and -output: +initialization for each test run. + +Below is an example command and output for running tests that contain +the string ``MolPairStyle:lj_cut`` (argument to the ``-R`` option +flag) and printing with verbose output (the ``-V`` flag): .. code-block:: console @@ -358,10 +361,11 @@ output: In this particular case, 8 out of 9 sets of tests were conducted, the tests for the ``lj/cut/gpu`` pair style was skipped, since the LAMMPS -library linked to the test executable did not include it. To learn what -individual tests are performed, you (currently) need to read the source -code. You can use code coverage recording (see next section) to confirm -how well the tests cover the code paths in the individual source files. +library linked to the test executable did not include the GPU package. +To learn what individual tests are performed, you (currently) need to +read the source code. You can use code coverage recording (see next +section) to confirm how well the tests cover the code paths in the +individual source files. The force style test programs have a common set of options: From 15a42d72c1d2fe49af0f2872c5da6b8410c5105d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 06:38:29 -0500 Subject: [PATCH 248/604] further updates to the description of building and running LAMMPS integrated tests --- doc/src/Build_development.rst | 111 ++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 2e1b1cb6f8a..70701f3e83d 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -149,27 +149,19 @@ version of that library will be downloaded and compiled along with LAMMPS and the test programs) and will download and compile a specific version of the `GoogleTest `_ C++ test framework that is used to implement the tests. Those unit tests -may be combined with memory access and leak checking with valgrind -(see below for how to enable it). In that case, running so-called -death tests will create a lot of false positives and thus they can be -disabled by configuring compilation with the additional setting -``-D SKIP_DEATH_TESTS=on``. - -.. admonition:: Software version and LAMMPS configuration requirements - :class: note - - The compiler and library version requirements for the testing - framework are more strict than for the main part of LAMMPS. For - example the default GNU C++ and Fortran compilers of RHEL/CentOS 7.x - (version 4.8.x) are not sufficient. The CMake configuration will try - to detect incompatible versions and either skip incompatible tests or - stop with an error. Also the number of available tests will depend on - installed LAMMPS packages, development environment, operating system, - and configuration settings. +may be combined with memory access and leak checking with valgrind (see +below for how to enable it). In that case, running so-called death +tests will create a lot of false positives and thus they can be disabled +by configuring compilation with the additional setting ``-D +SKIP_DEATH_TESTS=on``. After compilation is complete, the unit testing is started in the build folder using the ``ctest`` command, which is part of the CMake software. -The output of this command will be looking something like this: +The number of available tests will depend on the LAMMPS versions, +installed LAMMPS packages, configuration settings, development +environment, and operating system. + +The output of the plain ``ctest`` command looks something like the following: .. code-block:: console @@ -216,14 +208,16 @@ The ``ctest`` command has many options, the most important ones are: - verbose output: display output of individual test runs * - ``-j `` - parallel run: run tests in parallel + * - ``--test-dir `` + - provide path to the CMake build folder. By default ``ctest`` uses ``.`` * - ``-R `` - - run subset of tests matching the regular expression + - run subset of tests matching the regular expression ```` * - ``-E `` - - exclude subset of tests matching the regular expression + - exclude subset of tests matching the regular expression ```` * - ``-L `` - - run subset of tests with a label matching the regular expression + - run subset of tests with a label matching the regular expression ```` * - ``-LE `` - - exclude subset of tests with a label matching the regular expression + - exclude subset of tests with a label matching the regular expression ```` * - ``-N`` - dry-run: display list of tests without running them * - ``-T memcheck`` @@ -240,8 +234,8 @@ will be skipped if prerequisite features are not available in LAMMPS. The unit test framework was added in spring 2020 and is under active development. The coverage is not complete and will be expanded over - time. Preference is given to parts of the code base that are easy to - test or commonly used. + time. Preference was given to test parts of the code base that are + easy to test or commonly used. Tests as shown by the ``ctest`` program are commands defined in the ``CMakeLists.txt`` files in the ``unittest`` directory tree. A few @@ -258,16 +252,16 @@ These special test programs are structured to perform multiple individual tests internally and each of those contains several checks (aka assertions) for internal data being changed as expected. -Tests for force computing or modifying styles (e.g. styles for non-bonded -and bonded interactions and selected fixes) are run by using a more generic -test program that reads its input from files in YAML format. The YAML file -provides the information on how to customized the test program to test -a specific style and - if needed - with specific settings. -To add a test for another, similar style (e.g. a new pair style) it is -usually sufficient to add a suitable YAML file. :doc:`Detailed -instructions for adding tests ` are provided in the -Programmer Guide part of the manual. A description of what happens -during the tests is given below. +Tests for force computing or modifying styles (e.g. styles for +non-bonded and bonded interactions and selected fixes) are run by using +a more generic test program that reads its input from files in YAML +format. The YAML file provides the information on how to customized the +test program to test a specific style and - if needed - with specific +settings. To add a test for another, similar style (e.g. a new pair +style) it is usually sufficient to add a suitable YAML file. +:doc:`Detailed instructions for adding tests ` are +provided in the Programmer Guide part of the manual. A description of +what happens during these tests is given below. Unit tests for force styles ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -294,11 +288,11 @@ accelerated styles. If a prerequisite style or package is missing, the individual tests are skipped. All force style tests will be executed on a single MPI process, so using the CMake option ``-D BUILD_MPI=off`` can significantly speed up testing, since this will skip the MPI -initialization for each test run. +initialization for each test run. -Below is an example command and output for running tests that contain -the string ``MolPairStyle:lj_cut`` (argument to the ``-R`` option -flag) and printing with verbose output (the ``-V`` flag): +Below is an example command and output for running a single test named +``MolPairStyle:lj_cut`` (argument to the ``-R`` option which selects +tests by regular expression) and printing detailed output (the ``-V`` flag): .. code-block:: console @@ -360,11 +354,11 @@ flag) and printing with verbose output (the ``-V`` flag): Total Test time (real) = 0.76 sec In this particular case, 8 out of 9 sets of tests were conducted, the -tests for the ``lj/cut/gpu`` pair style was skipped, since the LAMMPS -library linked to the test executable did not include the GPU package. -To learn what individual tests are performed, you (currently) need to -read the source code. You can use code coverage recording (see next -section) to confirm how well the tests cover the code paths in the +tests for the ``lj/cut/gpu`` pair style were skipped, since the LAMMPS +library linked to the test executable did not include the GPU package. +To learn what individual tests are performed, you (currently) need to +read the source code. You can use code coverage recording (see next +section) to confirm how well the tests cover the code paths in the individual source files. The force style test programs have a common set of options: @@ -383,14 +377,17 @@ The force style test programs have a common set of options: * - ``-v`` - verbose output: also print the executed LAMMPS commands -The ``ctest`` tool has no mechanism to directly pass flags to the individual -test programs, but a workaround has been implemented where these flags can be -set in an environment variable ``TEST_ARGS``. Example: +Since the ``ctest`` tool has no mechanism to directly pass flags to the +individual test programs, a workaround has been implemented where these +flags can be set in an environment variable ``TEST_ARGS``. Example: .. code-block:: bash env TEST_ARGS=-s ctest -V -R BondStyle +This adds output with statistics for the computed error of the various +tests relative to the reference (e.g. the per-atom force components). + To add a test for a style that is not yet covered, it is usually best to copy a YAML file for a similar style to a new file, edit the details of the style (how to call it, how to set its coefficients) and then @@ -398,13 +395,17 @@ run test command with either the ``-g`` and the replace the initial test file with the regenerated one or the ``-u`` option. The ``-u`` option will destroy the original file, if the generation run does not complete, so using ``-g`` is recommended unless the YAML file is fully tested -and working. +and working. To have the new test file recognized by ``ctest``, you +need to re-run cmake. You can verify that the new test is available +by checking the output of ``ctest -N`. Some of the force style tests are rather slow to run and some are very sensitive to small differences like CPU architecture, compiler -toolchain, compiler optimization. Those tests are flagged with a "slow" +toolchain, compiler optimization. Those tests are flagged with a "slow" and/or "unstable" label, and thus those tests can be selectively -excluded with the ``-LE`` flag or selected with the ``-L`` flag. +excluded with the ``-LE`` flag to ``ctest`` (see description of the most +commonly used ``ctest`` flags) or specifically selected using the ``-L`` +flag. .. admonition:: Recommendations and notes for YAML files :class: note @@ -481,7 +482,7 @@ YAML format test inputs. Use custom linker for faster link times when ENABLE_TESTING is active ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When compiling LAMMPS with enabled tests, most test executables will +When compiling LAMMPS with testing enabled, most test executables will need to be linked against the LAMMPS library. Since this can be a very large library with many C++ objects when many packages are enabled, link times can become very long on machines that use the GNU BFD linker (e.g. @@ -496,6 +497,12 @@ option will use the system default linker otherwise, the linker is chosen explicitly. This option is only available for the GNU or Clang C++ compilers. +A small additional improvement can be obtained by building LAMMPS as a +shared library with ``-D BUILD_SHARED_LIBS=on``. But this is a small +improvement due to reducing file I/O. Using an alternate linker has an +algorithmic improvement through using symbol resolution algorithms with +lower algorithmic complexity. + Tests for other components and utility functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -545,7 +552,9 @@ to do this to install it via pip: After post-processing with ``gen_coverage_html`` the results are in a folder ``coverage_html`` and can be viewed with a web browser. -The images below illustrate how the data is presented. +The images below illustrate how the data is presented. The coverage +data for testing the current ``develop`` branch is generated nightly +and currently available at: https://download.lammps.org/coverage/ .. only:: not latex From 2b962fa5218c37d787007022a5b3bf5ffdc46289 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 12 Nov 2025 08:50:11 -0600 Subject: [PATCH 249/604] throw error for device neigh builds on AMD shared memory GPUs, documenting the error codes in the GPU lib for consistency with src/GPU/gpu_extra.h --- lib/gpu/lal_device.cpp | 5 +++++ lib/gpu/lal_device.h | 45 +++++++++++++++++++++--------------------- src/GPU/gpu_extra.h | 8 ++++++++ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 30f291dfb8b..a5963613c3b 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -623,6 +623,11 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal, gpu_nbor=2; #endif + #ifdef USE_OPENCL + if (_ocl_config_name == "AMD_GPU" && gpu->shared_memory(_first_device) && gpu_nbor > 0) + return -17; + #endif + if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, *gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d, _block_cell_id, _block_nbor_build, threads_per_atom, diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index 04d81e406fa..b4c8a06190d 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -40,13 +40,26 @@ class Device { /// Initialize the device for use by this process /** Sets up a per-device MPI communicator for load balancing and initializes * the device (ngpu starting at first_gpu_id) that this proc will be using - * Returns: + * Returns: (consistent with src/GPU/gpu_extra.h GPU_EXTRA::check_flag()) * - 0 if successful - * - -2 if GPU not found - * - -4 if GPU library not compiled for GPU + * - -1 The package gpu command is required for gpu styles + * - -2 Could not find/initialize a specified accelerator device + * - -3 Insufficient memory on accelerator + * - -4 GPU library not compiled for this accelerator + * - -5 Double precision is not supported on card * - -6 if GPU could not be initialized for use * - -7 if accelerator sharing is not currently allowed on system - * - -11 if config_string has the wrong number of parameters **/ + * - -8 GPU particle split must be set to 1 for this pair style + * - -9 CPU neighbor lists must be used for ellipsoid/sphere mix + * - -10 Invalid threads_per_atom specified + * - -11 if config_string has the wrong number of parameters + * - -12 Invalid OpenCL platform ID + * - -13 Invalid device type flags + * - -15 PPPM was compiled for double precision floating point but GPU device supports single precision only + * - -16 GPU library was compiled for double or mixed precision floating point but GPU device supports single precision only + * - -17 Cannot use device neighbor list builds with AMD shared memory GPUs with OpenCL 2.0 + * **/ + int init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, const int first_gpu_id, const int gpu_mode, const double particle_split, const int t_per_atom, @@ -63,12 +76,8 @@ class Device { * \param vel True if velocities need to be stored * \param extra_fields Nonzero if extra fields need to be stored * - * Returns: - * - 0 if successful - * - -1 if fix gpu not found - * - -3 if there is an out of memory error - * - -4 if the GPU library was not compiled for GPU - * - -5 Double precision is not supported on card **/ + * Returns: error code the same as init_device() + **/ int init(Answer &ans, const bool charge, const bool rot, const int nlocal, const int nall, const int maxspecial, const bool vel=false, const int extra_fields=0); @@ -77,12 +86,8 @@ class Device { /** \param nlocal Total number of local particles to allocate memory for * \param nall Total number of local+ghost particles * - * Returns: - * - 0 if successful - * - -1 if fix gpu not found - * - -3 if there is an out of memory error - * - -4 if the GPU library was not compiled for GPU - * - -5 Double precision is not supported on card **/ + * Returns: error code the same as init_device() + **/ int init(Answer &ans, const int nlocal, const int nall); /// Initialize the neighbor list storage @@ -102,12 +107,8 @@ class Device { * \param threads_per_atom value to be used by the neighbor list only * \param ilist_map true if ilist mapping data structures used (3-body) * - * Returns: - * - 0 if successful - * - -1 if fix gpu not found - * - -3 if there is an out of memory error - * - -4 if the GPU library was not compiled for GPU - * - -5 Double precision is not supported on card **/ + * Returns: error code the same as init_device() + **/ int init_nbor(Neighbor *nbor, const int nlocal, const int host_nlocal, const int nall, const int maxspecial, const int gpu_host, diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h index 662bfb7b84e..ae2738f9ab4 100644 --- a/src/GPU/gpu_extra.h +++ b/src/GPU/gpu_extra.h @@ -50,6 +50,10 @@ inline void check_flag(int error_flag, LAMMPS_NS::Error *error, MPI_Comm &world) { int all_success; MPI_Allreduce(&error_flag, &all_success, 1, MPI_INT, MPI_MIN, world); + + // error messages are consistent with the returned values + // from init_device() in lib/gpu/lal_device.h + if (all_success != 0) { if (all_success == -1) error->all(FLERR, "The package gpu command is required for gpu styles"); @@ -85,6 +89,10 @@ inline void check_flag(int error_flag, LAMMPS_NS::Error *error, MPI_Comm &world) error->all(FLERR, "GPU library was compiled for double or mixed precision " "floating point but GPU device supports single precision only."); + else if (all_success == -17) + error->all(FLERR, + "Cannot use device neighbor list builds " + "with AMD shared memory GPUs with OpenCL API."); else error->all(FLERR, "Unknown error in GPU library"); } From 96a0be9e3e7531b9b8dabf83f60acd182bb99c7e Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 12 Nov 2025 10:03:49 -0600 Subject: [PATCH 250/604] add a function in the GPU library to indicate that host neighbor list is required --- lib/gpu/lal_device.cpp | 5 +++++ lib/gpu/lal_device.h | 9 +++++++++ src/GPU/fix_gpu.cpp | 2 ++ 3 files changed, 16 insertions(+) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index a5963613c3b..139ddb8d543 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -1250,3 +1250,8 @@ bool lmp_gpu_config(const std::string &category, const std::string &setting) } return false; } + +bool lmp_gpu_requires_host_neighbor() +{ + return global_device.requires_host_neighbor(); +} \ No newline at end of file diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index b4c8a06190d..651d9ef7e6d 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -282,6 +282,15 @@ class Device { inline double ptx_arch() const { return _ptx_arch; } inline void set_simd_size(int simd_sz) { _simd_size = simd_sz; } + /// Return true if host neighbor list is required (e.g. AMD shared memory GPUs with OpenCL builds) + inline bool requires_host_neighbor() { + #ifdef USE_OPENCL + if (_ocl_config_name == "AMD_GPU" && gpu->shared_memory(_first_device)) + return true; + #endif + return false; + } + // -------------------------- DEVICE DATA ------------------------- /// Geryon Device diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 6478e1db853..f59d09a70ee 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -39,6 +39,7 @@ using namespace FixConst; enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH}; +// functions provided by the GPU library extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, const int first_gpu_id, const int gpu_mode, const double particle_split, const int t_per_atom, @@ -50,6 +51,7 @@ extern double lmp_gpu_forces(double **f, double **tor, double *eatom, double **v double *virial, double &ecoul, int &err_flag); extern double lmp_gpu_update_bin_size(const double subx, const double suby, const double subz, const int nlocal, const double cut); +extern bool lmp_gpu_requires_host_neighbor(); static const char cite_gpu_package[] = "GPU package (short-range, long-range and three-body potentials): doi:10.1016/j.cpc.2010.12.021, doi:10.1016/j.cpc.2011.10.012, doi:10.1016/j.cpc.2013.08.002, doi:10.1016/j.commatsci.2014.10.068, doi:10.1016/j.cpc.2016.10.020, doi:10.3233/APC200086\n\n" From bca889ba06aecbc1345410e9099c99e19924114c Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 12 Nov 2025 10:05:50 -0600 Subject: [PATCH 251/604] fix whitespace, added a newline at EOF --- lib/gpu/lal_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 139ddb8d543..0c9b5c5c32a 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -1254,4 +1254,4 @@ bool lmp_gpu_config(const std::string &category, const std::string &setting) bool lmp_gpu_requires_host_neighbor() { return global_device.requires_host_neighbor(); -} \ No newline at end of file +} From 50ae53715d10a4d8aa13a45eeb5926ce5b52444c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 12:27:37 -0500 Subject: [PATCH 252/604] improve generic GPU error messages --- src/GPU/gpu_extra.h | 57 +++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h index ae2738f9ab4..e54195b6e76 100644 --- a/src/GPU/gpu_extra.h +++ b/src/GPU/gpu_extra.h @@ -46,7 +46,9 @@ namespace GPU_EXTRA { -inline void check_flag(int error_flag, LAMMPS_NS::Error *error, MPI_Comm &world) +using Error = LAMMPS_NS::Error; + +inline void check_flag(int error_flag, Error *error, MPI_Comm &world) { int all_success; MPI_Allreduce(&error_flag, &all_success, 1, MPI_INT, MPI_MIN, world); @@ -56,52 +58,57 @@ inline void check_flag(int error_flag, LAMMPS_NS::Error *error, MPI_Comm &world) if (all_success != 0) { if (all_success == -1) - error->all(FLERR, "The package gpu command is required for gpu styles"); + error->all(FLERR, Error::NOLASTLINE, "The package gpu command is required for gpu styles"); else if (all_success == -2) - error->all(FLERR, "Could not find/initialize a specified accelerator device"); + error->all(FLERR, Error::NOLASTLINE, + "Could not find/initialize a specified accelerator device"); else if (all_success == -3) - error->all(FLERR, "Insufficient memory on accelerator"); + error->all(FLERR, Error::NOLASTLINE, "Insufficient memory on accelerator"); else if (all_success == -4) - error->all(FLERR, "GPU library not compiled for this accelerator"); + error->all(FLERR, Error::NOLASTLINE, "GPU library not compiled for this accelerator"); else if (all_success == -5) - error->all(FLERR, "Double precision is not supported on this accelerator"); + error->all(FLERR, Error::NOLASTLINE, "Double precision is not supported on this accelerator"); else if (all_success == -6) - error->all(FLERR, "Unable to initialize accelerator for use"); + error->all(FLERR, Error::NOLASTLINE, "Unable to initialize accelerator for use"); else if (all_success == -7) - error->all(FLERR, "Accelerator sharing is not currently supported on system"); + error->all(FLERR, Error::NOLASTLINE, + "Accelerator sharing is not currently supported on system"); else if (all_success == -8) - error->all(FLERR, "GPU particle split must be set to 1 for this pair style."); + error->all(FLERR, Error::NOLASTLINE, + "GPU particle split must be set to 1 for this pair style."); else if (all_success == -9) - error->all(FLERR, "CPU neighbor lists must be used for ellipsoid/sphere mix."); + error->all(FLERR, Error::NOLASTLINE, + "CPU neighbor lists must be used for ellipsoid/sphere mix."); else if (all_success == -10) - error->all(FLERR, "Invalid threads_per_atom specified."); + error->all(FLERR, Error::NOLASTLINE, "Invalid threads_per_atom specified."); else if (all_success == -11) - error->all(FLERR, "Invalid custom OpenCL parameter string."); + error->all(FLERR, Error::NOLASTLINE, "Invalid custom OpenCL parameter string."); else if (all_success == -12) - error->all(FLERR, "Invalid OpenCL platform ID."); + error->all(FLERR, Error::NOLASTLINE, "Invalid OpenCL platform ID."); else if (all_success == -13) - error->all(FLERR, "Invalid device configuration."); + error->all(FLERR, Error::NOLASTLINE, "Invalid device configuration."); else if (all_success == -15) - error->all(FLERR, - "PPPM was compiled for double precision floating point " - "but GPU device supports single precision only."); + error->all(FLERR, Error::NOLASTLINE, + "PPPM was compiled for double precision floating point but GPU device supports " + "single precision only."); else if (all_success == -16) - error->all(FLERR, - "GPU library was compiled for double or mixed precision " - "floating point but GPU device supports single precision only."); + error->all(FLERR, Error::NOLASTLINE, + "GPU library was compiled for double or mixed precision floating point but GPU " + "device supports single precision only."); else if (all_success == -17) - error->all(FLERR, - "Cannot use device neighbor list builds " - "with AMD shared memory GPUs with OpenCL API."); + error->all( + FLERR, Error::NOLASTLINE, + "Cannot use device neighbor list builds with AMD shared memory GPUs with OpenCL API."); else - error->all(FLERR, "Unknown error in GPU library"); + error->all(FLERR, Error::NOLASTLINE, "Unknown error {} in GPU library", all_success); } } inline void gpu_ready(LAMMPS_NS::Modify *modify, LAMMPS_NS::Error *error) { int ifix = modify->find_fix("package_gpu"); - if (ifix < 0) error->all(FLERR, "The package gpu command is required for gpu styles"); + if (ifix < 0) + error->all(FLERR, Error::NOLASTLINE, "The package gpu command is required for gpu styles"); } } // namespace GPU_EXTRA From 5d992cb3b03a29abd538f99030899b2e282e27ca Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 15:23:45 -0500 Subject: [PATCH 253/604] do not call MPI in gpu library after MPI is already finalized --- lib/gpu/lal_device.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 0c9b5c5c32a..84550850281 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -877,6 +877,7 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, single[7]=ans.cpu_idle_time(); single[8]=nbor.bin_time(); + // we cannot use MPI calls after MPI is already finalized which may happen on errors. MPI_Finalized(&post_final); if (post_final) return; @@ -962,6 +963,7 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in, const double cpu_time, const double idle_time, FILE *screen) { double single[9], times[9]; + int post_final = 0; single[0]=time_out.total_seconds(); single[1]=time_in.total_seconds()+atom.transfer_time()+atom.cast_time(); @@ -973,6 +975,10 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in, single[7]=idle_time; single[8]=ans.cast_time(); + // we cannot use MPI calls after MPI is already finalized which may happen on errors. + MPI_Finalized(&post_final); + if (post_final) return; + MPI_Reduce(single,times,9,MPI_DOUBLE,MPI_SUM,0,_comm_replica); double my_max_bytes=max_bytes+atom.max_gpu_bytes(); From a31c19b31806fdd315a0eac07f2f8b200dd6dde1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 15:28:27 -0500 Subject: [PATCH 254/604] re-implement lmp_gpu_requires_host_neighbor() so we can call it before lmp_init_device() --- lib/gpu/lal_device.cpp | 20 +++++++++++++++----- lib/gpu/lal_device.h | 9 +-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 84550850281..8e64e0dcfeb 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -1184,6 +1184,21 @@ bool lmp_has_compatible_gpu_device() return compatible_gpu; } +// check if a GPU requires neighbor lists on the host. +bool lmp_gpu_requires_host_neighbor() +{ + UCL_Device gpu; + +#if USE_OPENCL + if (gpu.num_platforms() > 0) { + auto name = gpu.platform_name(); + if (name.find("AMD") && gpu.shared_memory(0)) return true; + } +#endif + + return false; +} + std::string lmp_gpu_device_info() { std::ostringstream out; @@ -1256,8 +1271,3 @@ bool lmp_gpu_config(const std::string &category, const std::string &setting) } return false; } - -bool lmp_gpu_requires_host_neighbor() -{ - return global_device.requires_host_neighbor(); -} diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index 651d9ef7e6d..a7e5b5b78ff 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -283,19 +283,12 @@ class Device { inline void set_simd_size(int simd_sz) { _simd_size = simd_sz; } /// Return true if host neighbor list is required (e.g. AMD shared memory GPUs with OpenCL builds) - inline bool requires_host_neighbor() { - #ifdef USE_OPENCL - if (_ocl_config_name == "AMD_GPU" && gpu->shared_memory(_first_device)) - return true; - #endif - return false; - } - // -------------------------- DEVICE DATA ------------------------- /// Geryon Device UCL_Device *gpu; + // must match definition in src/GPU/fix_gpu.cpp enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH}; // --------------------------- ATOM DATA -------------------------- From 7fa22f587229350875538657bebc2ceeab61e4e9 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 15:30:08 -0500 Subject: [PATCH 255/604] change GPU neighbor list handling to automatically switch to host if needed. --- src/GPU/fix_gpu.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index f59d09a70ee..58381382753 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -37,7 +37,8 @@ using namespace LAMMPS_NS; using namespace FixConst; -enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH}; +// must match definition in lib/gpu/lal_device.h +enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH, GPU_DEFAULT}; // functions provided by the GPU library extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, @@ -129,7 +130,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : // options - _gpu_mode = GPU_NEIGH; + _gpu_mode = GPU_DEFAULT; _particle_split = 1.0; int nthreads = 0; int newtonflag = force->newton_pair; @@ -145,12 +146,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : if (strcmp(arg[iarg],"neigh") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); const std::string modearg = arg[iarg+1]; - if ((modearg == "yes") || (modearg == "on") || (modearg == "true")) + if (modearg == "hybrid") + _gpu_mode = GPU_HYB_NEIGH; + else if (utils::logical(FLERR, modearg, false, lmp)) _gpu_mode = GPU_NEIGH; - else if ((modearg == "no") || (modearg == "off") || (modearg == "false")) + else _gpu_mode = GPU_FORCE; - else if (modearg == "hybrid") _gpu_mode = GPU_HYB_NEIGH; - else error->all(FLERR,"Illegal package gpu command"); iarg += 2; } else if (strcmp(arg[iarg],"newton") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); @@ -213,6 +214,17 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : } #endif + // change default setting for neighbor lists if GPU requires host neighbor lists + if (_gpu_mode == GPU_DEFAULT) { + if (lmp_gpu_requires_host_neighbor()) { + if (comm->me == 0) + error->warning(FLERR, "GPU does not support neighbor lists on device, switching to host"); + _gpu_mode = GPU_FORCE; + } else { + _gpu_mode = GPU_NEIGH; + } + } + // set newton pair flag force->newton_pair = newtonflag; From 4478781c194e04d3f8136d57ff381d04c6f5fde0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 15:30:34 -0500 Subject: [PATCH 256/604] improve error handling in package gpu command --- src/GPU/fix_gpu.cpp | 47 ++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 58381382753..be825cd5db8 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -118,12 +118,12 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : { if (lmp->citeme) lmp->citeme->add(cite_gpu_package); - if (narg < 4) error->all(FLERR,"Illegal package gpu command"); + if (narg < 4) utils::missing_cmd_args(FLERR,"package gpu", error); // If ngpu is 0, autoset ngpu to the number of devices per node matching // best device int ngpu = utils::inumeric(FLERR, arg[3], false, lmp); - if (ngpu < 0) error->all(FLERR,"Illegal number of GPUs ({}) in package gpu command", ngpu); + if (ngpu < 0) error->all(FLERR, 1, "Illegal number of GPUs ({}) in package gpu command", ngpu); // Negative value indicate GPU package should find the best device ID int first_gpu_id = -1; @@ -142,9 +142,10 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : char *device_type_flags = nullptr; int iarg = 4; + int ioffs = -2; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu neigh", error); const std::string modearg = arg[iarg+1]; if (modearg == "hybrid") _gpu_mode = GPU_HYB_NEIGH; @@ -154,65 +155,67 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : _gpu_mode = GPU_FORCE; iarg += 2; } else if (strcmp(arg[iarg],"newton") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu newton", error); newtonflag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"binsize") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu binsize", error); binsize = utils::numeric(FLERR,arg[iarg+1],false,lmp); - if (binsize <= 0.0) error->all(FLERR,"Illegal fix GPU command"); + if (binsize <= 0.0) + error->all(FLERR,iarg+1+ioffs,"Illegal package gpu binsize value {}", binsize); iarg += 2; } else if (strcmp(arg[iarg],"split") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu split", error); _particle_split = utils::numeric(FLERR,arg[iarg+1],false,lmp); if (_particle_split == 0.0 || _particle_split > 1.0) - error->all(FLERR,"Illegal package GPU command"); + error->all(FLERR,iarg+1+ioffs,"Illegal package gpu split value {}", _particle_split); iarg += 2; } else if (strcmp(arg[iarg],"gpuID") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu gpuID", error); first_gpu_id = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"tpa") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu tpa", error); threads_per_atom = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"omp") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu omp", error); nthreads = utils::inumeric(FLERR,arg[iarg+1],false,lmp); - if (nthreads < 0) error->all(FLERR,"Illegal fix GPU command"); + if (nthreads < 0) + error->all(FLERR,iarg+1+ioffs,"Illegal package gpu omp value {}", nthreads); iarg += 2; } else if (strcmp(arg[iarg],"platform") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu platform", error); ocl_platform = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"device_type") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu device_type", error); device_type_flags = arg[iarg+1]; iarg += 2; } else if (strcmp(arg[iarg],"blocksize") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu blocksize", error); block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"pair/only") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu pair/only", error); lmp->pair_only_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"ocl_args") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); + if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"package gpu ocl_args", error); opencl_args = arg[iarg+1]; iarg += 2; - } else error->all(FLERR,"Illegal package gpu command"); + } else error->all(FLERR,iarg+ioffs,"Unknown package gpu keyword {}", arg[iarg]); } - #if (LAL_USE_OMP == 0) +#if (LAL_USE_OMP == 0) if (nthreads > 1) - error->all(FLERR,"No OpenMP support compiled into the GPU package"); - #else + error->all(FLERR, Error::NOPOINTER, "No OpenMP support compiled into the GPU package"); +#else if (nthreads > 0) { omp_set_num_threads(nthreads); comm->nthreads = nthreads; } - #endif +#endif // change default setting for neighbor lists if GPU requires host neighbor lists if (_gpu_mode == GPU_DEFAULT) { From 9dc83d11fa72b47743dab805acda650353aa5dee Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 15:45:47 -0500 Subject: [PATCH 257/604] improve comment --- src/error.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/error.cpp b/src/error.cpp index bb3ed8488d3..f22264bd7c7 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -136,7 +136,7 @@ void Error::all(const std::string &file, int line, int failed, const std::string if (showerror && (me == 0)) utils::logmesg(lmp,mesg); utils::flush_buffers(lmp); - // allow commands if an exception was caught in a run + // allow commands again if an exception was caught in a run // update may be a null pointer when catching command-line errors if (update) update->whichflag = 0; From 43c39d8256d4c19f364d07157a8fe547ddd3bae2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 16:50:42 -0500 Subject: [PATCH 258/604] improve a few more error messages --- src/GPU/fix_gpu.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index be825cd5db8..f918291f3c8 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -276,13 +276,13 @@ void FixGPU::init() // GPU package cannot be used with atom_style template if (atom->molecular == Atom::TEMPLATE) - error->all(FLERR,"GPU package does not (yet) work with " - "atom_style template"); + error->all(FLERR, Error::NOLASTLINE, + "GPU package does not (yet) work with atom_style template"); // give a warning if no pair style is defined if (!force->pair && (comm->me == 0)) - error->warning(FLERR,"Using package gpu without any pair style defined"); + error->warning(FLERR, "Using package gpu without any pair style defined"); // make sure fdotr virial is not accumulated multiple times // also disallow GPU neighbor lists for hybrid styles @@ -293,7 +293,8 @@ void FixGPU::init() if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) force->pair->no_virial_fdotr_compute = 1; if (_gpu_mode != GPU_FORCE) - error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style"); + error->all(FLERR, Error::NOLASTLINE, + "Must not use GPU neighbor lists with hybrid pair style"); } // rRESPA support @@ -321,7 +322,8 @@ void FixGPU::setup(int vflag) if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) if (neighbor->exclude_setting() != 0) - error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds"); + error->all(FLERR, Error::NOLASTLINE, + "Cannot use neigh_modify exclude with GPU neighbor builds"); if (utils::strmatch(update->integrate_style,"^verlet")) { if (overlap_topo) neighbor->set_overlap_topo(1); @@ -354,7 +356,8 @@ void FixGPU::post_force(int /* vflag */) double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom, force->pair->vatom, lvirial, force->pair->eng_coul, err_flag); if (err_flag==1) - error->one(FLERR,"Neighbor list problem on the GPU. Try increasing the value of 'neigh_modify one' " + error->one(FLERR, Error::NOLASTLINE, + "Neighbor list problem on the GPU. Try increasing the value of 'neigh_modify one' " "or the GPU neighbor list 'binsize'."); force->pair->eng_vdwl += my_eng; From 9be03a34f6cab4b6bf0f5318523f3fde6f835c7e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 17:14:58 -0500 Subject: [PATCH 259/604] update package gpu docs --- doc/src/package.rst | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/doc/src/package.rst b/doc/src/package.rst index 02138f73260..0d98ffcec96 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -19,9 +19,10 @@ Syntax Ngpu = # of GPUs per node zero or more keyword/value pairs may be appended keywords = *neigh* or *newton* or *pair/only* or *binsize* or *split* or *gpuID* or *tpa* or *blocksize* or *omp* or *platform* or *device_type* or *ocl_args* - *neigh* value = *yes* or *no* + *neigh* value = *yes* or *no* or *hybrid* *yes* = neighbor list build on GPU (default) *no* = neighbor list build on CPU + *hybrid* = perform binning on the CPU but build neighbor list on the GPU *newton* = *off* or *on* *off* = set Newton pairwise flag off (default and required) *on* = set Newton pairwise flag on (currently not allowed) @@ -197,8 +198,8 @@ simulations. ---------- -The *gpu* style invokes settings associated with the use of the GPU -package. +The *gpu* style invokes settings associated with the use of the +:ref:`GPU package `. The *Ngpu* argument sets the number of GPUs per node. If *Ngpu* is 0 and no other keywords are specified, GPU or accelerator devices are @@ -216,15 +217,25 @@ tasks (per node) than GPUs, multiple MPI tasks will share each GPU. Optional keyword/value pairs can also be specified. Each has a default value as listed below. +.. versionchanged:: TBD + + Updated description to the current state of the GPU package + The *neigh* keyword specifies where neighbor lists for pair style computation will be built. If *neigh* is *yes*, which is the default, neighbor list building is performed on the GPU. If *neigh* is *no*, -neighbor list building is performed on the CPU. GPU neighbor list -building currently cannot be used with a triclinic box. GPU neighbor -lists are not compatible with commands that are not GPU-enabled. When -a non-GPU enabled command requires a neighbor list, it will also be -built on the CPU. In these cases, it will typically be more efficient -to only use CPU neighbor list builds. +neighbor list building is instead performed on the CPU. If *neigh* is +*hybrid* the binning step of the neighbor list build is performed on the +CPU and the list themselves on the GPU. GPU neighbor list building +currently is not fully compatible with a triclinic box; if the behavior +is significantly different from the CPU case, use the *neigh no* +setting. GPU neighbor lists are not accessible for commands that are +not GPU-enabled. When a non-GPU enabled command requires a neighbor +list, it will be built on the CPU. In these cases, it can be more +efficient to only use CPU neighbor list builds, particularly if the CPU +neighbor list is perpetual, i.e. used in every step. If a GPU +environment does not support building neighbor lists on the GPU, the +default setting it will automatically change to *neigh no*. The *newton* keyword sets the Newton flags for pairwise (not bonded) interactions to *off* or *on*, the same as the :doc:`newton ` From e0cf81baa5356fa0907c31bca631a12f08f536d0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 Nov 2025 17:28:10 -0500 Subject: [PATCH 260/604] add section headers for individual package command and table of contents --- doc/src/package.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/src/package.rst b/doc/src/package.rst index 0d98ffcec96..52177e8759b 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -3,6 +3,8 @@ package command =============== +.. contents:: + Syntax """""" @@ -196,7 +198,8 @@ See the :doc:`Accelerator packages ` page for more details about using the various accelerator packages for speeding up LAMMPS simulations. ----------- +GPU package settings +^^^^^^^^^^^^^^^^^^^^ The *gpu* style invokes settings associated with the use of the :ref:`GPU package `. @@ -366,7 +369,8 @@ For OpenCL, the routines are compiled at runtime for the specified GPU or accelerator architecture. The *ocl\_args* keyword can be used to specify additional flags for the runtime build. ----------- +INTEL package settings +^^^^^^^^^^^^^^^^^^^^^^ The *intel* style invokes settings associated with the use of the INTEL package. The keywords *balance*, *ghost*, *tpc*, and *tptask* are @@ -469,7 +473,8 @@ to prevent MPI tasks and OpenMP threads from being on separate NUMA domains and to prevent offload threads from interfering with other processes/threads used for LAMMPS. ----------- +KOKKOS package settings +^^^^^^^^^^^^^^^^^^^^^^^ The *kokkos* style invokes settings associated with the use of the KOKKOS package. @@ -660,7 +665,8 @@ The *bond/block/size* keyword sets the number of GPU threads per block used for launching the bond force kernel on the GPU. The default value of this parameter is determined based on the GPU architecture at runtime. ----------- +OPENMP package settings +^^^^^^^^^^^^^^^^^^^^^^^ The *omp* style invokes settings associated with the use of the OPENMP package. From 7a49bac177b57a73b4b52cf590b16bd9ccdab209 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 13 Nov 2025 20:57:51 -0500 Subject: [PATCH 261/604] add custom target "tools" and make "tgz" and "dmg" targets depend on it --- cmake/CMakeLists.txt | 4 ++-- cmake/Modules/Tools.cmake | 25 +++++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c721487ea65..5c3e89584ba 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -515,11 +515,11 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA endif() if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS) - if (NOT USE_INTERNAL_LINALG) + if(NOT USE_INTERNAL_LINALG) find_package(LAPACK) find_package(BLAS) endif() - if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG) + if((NOT LAPACK_FOUND) OR (NOT BLAS_FOUND) OR USE_INTERNAL_LINALG) file(GLOB LINALG_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp) add_library(linalg STATIC ${LINALG_SOURCES}) set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE}) diff --git a/cmake/Modules/Tools.cmake b/cmake/Modules/Tools.cmake index 4dfa09c6f5b..65b47d58bbd 100644 --- a/cmake/Modules/Tools.cmake +++ b/cmake/Modules/Tools.cmake @@ -10,6 +10,9 @@ if(BUILD_TOOLS) target_include_directories(reformat-json PRIVATE ${LAMMPS_SOURCE_DIR}) install(TARGETS reformat-json DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_custom_target(tools ALL COMMENT "Building tools") + add_dependencies(tools binary2txt stl_bin2txt reformat-json) + include(CheckGeneratorSupport) if(CMAKE_GENERATOR_SUPPORT_FORTRAN) include(CheckLanguage) @@ -21,6 +24,7 @@ if(BUILD_TOOLS) add_executable(micelle2d.x ${LAMMPS_TOOLS_DIR}/micelle2d.f90) target_link_libraries(micelle2d.x PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) install(TARGETS chain.x micelle2d.x DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_dependencies(tools chain.x micelle2d.x) else() message(WARNING "No suitable Fortran compiler found, skipping build of 'chain.x' and 'micelle2d.x'") endif() @@ -39,6 +43,7 @@ if(BUILD_TOOLS) install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) add_subdirectory(${LAMMPS_TOOLS_DIR}/phonon ${CMAKE_BINARY_DIR}/phana_build) + add_dependencies(tools msi2lmp phana) endif() if(BUILD_LAMMPS_GUI) @@ -170,7 +175,7 @@ if(BUILD_LAMMPS_GUI) COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_DIR}/doc/lammps.1 ${APP_CONTENTS}/share/lammps/man/man1/ COMMAND ${CMAKE_COMMAND} -E create_symlink lammps.1 ${APP_CONTENTS}/share/lammps/man/man1/lmp.1 COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_DIR}/doc/msi2lmp.1 ${APP_CONTENTS}/share/lammps/man/man1 - DEPENDS lammps lmp binary2txt stl_bin2txt msi2lmp phana lammps-gui_build + DEPENDS lammps lmp tools lammps-gui_build COMMENT "Copying additional files into macOS app bundle tree" WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) @@ -265,12 +270,16 @@ if(BUILD_LAMMPS_GUI) endif() ]] ) - add_custom_target(tgz - COMMAND ${LAMMPS_DIR}/cmake/packaging/build_linux_tgz.sh ${LAMMPS_RELEASE} - DEPENDS lmp lammps-gui_build ${WHAM_EXE} - COMMENT "Create compressed tar file of LAMMPS-GUI with dependent libraries and wrapper" - BYPRODUCT LAMMPS-Linux-x86_64-GUI-${LAMMPS_RELEASE}.tar.gz - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - ) + if (USE_INTERNAL_LINALG) + add_custom_target(tgz + COMMAND ${LAMMPS_DIR}/cmake/packaging/build_linux_tgz.sh ${LAMMPS_RELEASE} + DEPENDS lmp tools lammps-gui_build ${WHAM_EXE} + COMMENT "Create compressed tar file of LAMMPS-GUI with dependent libraries and wrapper" + BYPRODUCT LAMMPS-Linux-x86_64-GUI-${LAMMPS_RELEASE}.tar.gz + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + else() + message(FATAL_ERROR "Must use -D USE_INTERNAL_LINALG=ON for building Linux tgz package") + endif() endif() endif() From b818e9392793dd42d0a6f232d910c3c3d4a57f92 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 13 Nov 2025 21:02:32 -0500 Subject: [PATCH 262/604] make USE_INTERNAL_LINALG setting an option, so it is cached --- cmake/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 5c3e89584ba..76c04ca0d37 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -514,12 +514,14 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA PROPERTIES COMPILE_OPTIONS "-std=c++14") endif() +option(USE_INTERNAL_LINALG "Prefer internal library with BLAS/LAPACK subset over system BLAS/LAPACK" OFF) if(PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS) if(NOT USE_INTERNAL_LINALG) find_package(LAPACK) find_package(BLAS) endif() if((NOT LAPACK_FOUND) OR (NOT BLAS_FOUND) OR USE_INTERNAL_LINALG) + set(USE_INTERNAL_LINALG ON) file(GLOB LINALG_SOURCES CONFIGURE_DEPENDS ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp) add_library(linalg STATIC ${LINALG_SOURCES}) set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE}) From 8fa3e790200fc538d201cd30f0a97878551caf21 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 13 Nov 2025 21:19:53 -0500 Subject: [PATCH 263/604] done download and bundle oversize potential files --- cmake/Modules/Tools.cmake | 8 ++++++-- cmake/packaging/build_linux_tgz.sh | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/Tools.cmake b/cmake/Modules/Tools.cmake index 65b47d58bbd..5064f41b640 100644 --- a/cmake/Modules/Tools.cmake +++ b/cmake/Modules/Tools.cmake @@ -270,7 +270,7 @@ if(BUILD_LAMMPS_GUI) endif() ]] ) - if (USE_INTERNAL_LINALG) + if(USE_INTERNAL_LINALG AND (NOT DOWNLOAD_POTENTIALS)) add_custom_target(tgz COMMAND ${LAMMPS_DIR}/cmake/packaging/build_linux_tgz.sh ${LAMMPS_RELEASE} DEPENDS lmp tools lammps-gui_build ${WHAM_EXE} @@ -279,7 +279,11 @@ if(BUILD_LAMMPS_GUI) WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) else() - message(FATAL_ERROR "Must use -D USE_INTERNAL_LINALG=ON for building Linux tgz package") + if(DOWNLOAD_POTENTIALS) + message(FATAL_ERROR "Must use -D DOWNLOAD_POTENTIALS=OFF for building Linux tgz package") + else() + message(FATAL_ERROR "Must use -D USE_INTERNAL_LINALG=ON for building Linux tgz package") + endif() endif() endif() endif() diff --git a/cmake/packaging/build_linux_tgz.sh b/cmake/packaging/build_linux_tgz.sh index 97320fe51f9..b9a78bdf25d 100755 --- a/cmake/packaging/build_linux_tgz.sh +++ b/cmake/packaging/build_linux_tgz.sh @@ -24,6 +24,8 @@ rm -f ${DESTDIR}/lib/lib{c,dl,rt,m,pthread}-[0-9].[0-9]*.so rm -f ${DESTDIR}/lib/libX* ${DESTDIR}/lib/libxcb* rm -f ${DESTDIR}/lib/libgcc_s* rm -f ${DESTDIR}/lib/libstdc++* +echo "Remove oversize potential files" +rm -f ${DESTDIR}/share/lammps/potentials/C_10_10.mesocnt # get qt dir QTDIR=$(ldd ${DESTDIR}/bin/lammps-gui | grep libQt.Core | sed -e 's/^.*=> *//' -e 's/libQt\(.\)Core.so.*$/qt\1/') From 16e1ef727f74985b4a94e39da6761f7831d714e0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 13 Nov 2025 21:37:04 -0500 Subject: [PATCH 264/604] correct docs for kspace style zero and add force style test for it --- doc/src/kspace_style.rst | 8 +- unittest/force-styles/tests/kspace-zero.yaml | 90 ++++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 unittest/force-styles/tests/kspace-zero.yaml diff --git a/doc/src/kspace_style.rst b/doc/src/kspace_style.rst index 2ff1f9ed56a..94ced5f03a2 100644 --- a/doc/src/kspace_style.rst +++ b/doc/src/kspace_style.rst @@ -122,7 +122,8 @@ Syntax *scafacos* values = method accuracy method = fmm or p2nfft or p3m or ewald or direct accuracy = desired relative error in forces - *zero* value = none + *zero* value = accuracy + accuracy = desired relative error in forces Examples """""""" @@ -134,7 +135,7 @@ Examples kspace_style msm 1.0e-4 kspace_style scafacos fmm 1.0e-4 kspace_style none - kspace_style zero + kspace_style zero 1.0e-6 Used in input scripts: @@ -382,6 +383,9 @@ other ScaFaCoS options currently exposed to LAMMPS. The *zero* style does not do any calculations, but is compatible with all pair styles that require some version of a kspace style. +The accuracy argument is required for some internal calculations +but has no impact of forces or energy, since those will always +be zero. ---------- diff --git a/unittest/force-styles/tests/kspace-zero.yaml b/unittest/force-styles/tests/kspace-zero.yaml new file mode 100644 index 00000000000..3112851ca1c --- /dev/null +++ b/unittest/force-styles/tests/kspace-zero.yaml @@ -0,0 +1,90 @@ +--- +lammps_version: 10 Sep 2025 +date_generated: Thu Nov 13 21:32:05 2025 +epsilon: 7.5e-14 +skip_tests: +prerequisites: ! | + atom full + pair coul/long + kspace zero +pre_commands: ! "" +post_commands: ! | + pair_modify mix arithmetic + pair_modify table 0 + pair_modify compute no + kspace_style zero 1.0e-4 +input_file: in.fourmol +pair_style: coul/long 8.0 +pair_coeff: ! | + * * +extract: ! "" +natoms: 29 +init_vdwl: 0 +init_coul: 0 +init_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +init_forces: ! |2 + 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +run_vdwl: 0 +run_coul: 0 +run_stress: ! |2- + 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +run_forces: ! |2 + 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 + 29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +... From 859eb45146fd3d83c7a51f8bbfb393149507940a Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 14 Nov 2025 08:10:27 -0600 Subject: [PATCH 265/604] updated other coreshell styles --- lib/gpu/geryon/ocl_kernel.h | 13 +++++++++++++ lib/gpu/lal_born_coul_wolf_cs_ext.cpp | 4 ++-- lib/gpu/lal_coul_long_cs_ext.cpp | 4 ++-- src/GPU/pair_born_coul_wolf_cs_gpu.cpp | 4 ++-- src/GPU/pair_coul_long_cs_gpu.cpp | 4 ++-- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/gpu/geryon/ocl_kernel.h b/lib/gpu/geryon/ocl_kernel.h index 7b7fca9dfcd..a0e39a6e85a 100644 --- a/lib/gpu/geryon/ocl_kernel.h +++ b/lib/gpu/geryon/ocl_kernel.h @@ -67,9 +67,22 @@ class UCL_Program { /** \note Must call init() after each clear **/ inline void clear() { if (_init_done) { + #if CL_VERSION_2_0 + cl_context ctx_from_queue = nullptr; + cl_int err = clGetCommandQueueInfo(_cq, + CL_QUEUE_CONTEXT, + sizeof(ctx_from_queue), + &ctx_from_queue, + nullptr); + if (err == CL_SUCCESS) + CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq)); + CL_DESTRUCT_CALL(clReleaseProgram(_program)); + CL_DESTRUCT_CALL(clReleaseContext(_context)); + #else CL_DESTRUCT_CALL(clReleaseProgram(_program)); CL_DESTRUCT_CALL(clReleaseContext(_context)); CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq)); + #endif _init_done=false; } } diff --git a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp index be075047276..a553fc6681f 100644 --- a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp +++ b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp @@ -109,11 +109,11 @@ int** borncwcs_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return BornCWCST.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/lal_coul_long_cs_ext.cpp b/lib/gpu/lal_coul_long_cs_ext.cpp index be4630516c2..66df48046be 100644 --- a/lib/gpu/lal_coul_long_cs_ext.cpp +++ b/lib/gpu/lal_coul_long_cs_ext.cpp @@ -120,11 +120,11 @@ int** clcs_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return CLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void clcs_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp index 128863527c2..cc0b7a5d4a5 100644 --- a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp +++ b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp @@ -49,7 +49,7 @@ int **borncwcs_gpu_compute_n(const int ago, const int inum_full, const int nall, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, - double *boxlo, double *prd); + double *boxlo, double *prd, int* periodicity); void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -105,7 +105,7 @@ void PairBornCoulWolfCSGPU::compute(int eflag, int vflag) firstneigh = borncwcs_gpu_compute_n( neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->q, domain->boxlo, domain->prd); + cpu_time, success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; diff --git a/src/GPU/pair_coul_long_cs_gpu.cpp b/src/GPU/pair_coul_long_cs_gpu.cpp index 1b8c3351fd9..b77d886bf1a 100644 --- a/src/GPU/pair_coul_long_cs_gpu.cpp +++ b/src/GPU/pair_coul_long_cs_gpu.cpp @@ -57,7 +57,7 @@ int **clcs_gpu_compute_n(const int ago, const int inum, const int nall, double * tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd); + double *prd, int* periodicity); void clcs_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -111,7 +111,7 @@ void PairCoulLongCSGPU::compute(int eflag, int vflag) firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, domain->prd); + success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; From 6716cfb18e31bb0224419894ba323e8c37580918 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 14 Nov 2025 11:39:53 -0500 Subject: [PATCH 266/604] don't fail on tgz target when not using it --- cmake/Modules/Tools.cmake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/Tools.cmake b/cmake/Modules/Tools.cmake index 5064f41b640..dd497895c52 100644 --- a/cmake/Modules/Tools.cmake +++ b/cmake/Modules/Tools.cmake @@ -280,9 +280,11 @@ if(BUILD_LAMMPS_GUI) ) else() if(DOWNLOAD_POTENTIALS) - message(FATAL_ERROR "Must use -D DOWNLOAD_POTENTIALS=OFF for building Linux tgz package") + add_custom_target(tgz + COMMAND ${CMAKE_COMMAND} -E echo "Must use -D DOWLOAD_POTENTIALS=OFF for building Linux tgz package") else() - message(FATAL_ERROR "Must use -D USE_INTERNAL_LINALG=ON for building Linux tgz package") + add_custom_target(tgz + COMMAND ${CMAKE_COMMAND} -E echo "Must use -D USE_INTERNAL_LINALG=ON for building Linux tgz package") endif() endif() endif() From fbf9ece5a0df43e74375a300b02d6ae0c7588ad4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 14 Nov 2025 12:35:57 -0500 Subject: [PATCH 267/604] improve binary packaging on Linux --- cmake/packaging/build_linux_tgz.sh | 16 ++++++++++++++-- cmake/packaging/linux_wrapper.sh | 4 +++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cmake/packaging/build_linux_tgz.sh b/cmake/packaging/build_linux_tgz.sh index b9a78bdf25d..bf2a8aceba5 100755 --- a/cmake/packaging/build_linux_tgz.sh +++ b/cmake/packaging/build_linux_tgz.sh @@ -14,9 +14,13 @@ cp lammps-gui_build-prefix/bin/lammps-gui ${DESTDIR}/bin/ echo "Remove debug info" for s in ${DESTDIR}/bin/* ${DESTDIR}/lib/liblammps* do \ - test -f $s && strip --strip-debug $s + test -f $s && strip --strip-debug $s done +echo "Move LAMMPS shared library to its own folder" +mkdir -p ${DESTDIR}/libexec/lammps +mv -v ${DESTDIR}/lib/liblammps* ${DESTDIR}/libexec/lammps/ + echo "Remove libc, gcc, and X11 related shared libs" rm -f ${DESTDIR}/lib/ld*.so ${DESTDIR}/lib/ld*.so.[0-9] rm -f ${DESTDIR}/lib/lib{c,dl,rt,m,pthread}.so.? @@ -27,13 +31,21 @@ rm -f ${DESTDIR}/lib/libstdc++* echo "Remove oversize potential files" rm -f ${DESTDIR}/share/lammps/potentials/C_10_10.mesocnt -# get qt dir +# get Qt dir QTDIR=$(ldd ${DESTDIR}/bin/lammps-gui | grep libQt.Core | sed -e 's/^.*=> *//' -e 's/libQt\(.\)Core.so.*$/qt\1/') + +# configure some settings files for Qt cat > ${DESTDIR}/bin/qt.conf < ${DESTDIR}/bin/qtlogging.ini < Date: Fri, 14 Nov 2025 15:41:03 -0500 Subject: [PATCH 268/604] stop coverity scan from complaining about using uninitialized variables --- src/region_cone.cpp | 6 ++---- src/region_cylinder.cpp | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/region_cone.cpp b/src/region_cone.cpp index 56d7330cef9..0f343c4869c 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -826,16 +826,14 @@ void RegCone::bbox_update() ymax = c1 + maxradius; zmin = c2 - maxradius; zmax = c2 + maxradius; - } - if (axis == 'y') { + } else if (axis == 'y') { xmin = c1 - maxradius; xmax = c1 + maxradius; ymin = lo; ymax = hi; zmin = c2 - maxradius; zmax = c2 + maxradius; - } - if (axis == 'z') { + } else { // (axis == 'z') { xmin = c1 - maxradius; xmax = c1 + maxradius; ymin = c2 - maxradius; diff --git a/src/region_cylinder.cpp b/src/region_cylinder.cpp index 37b6a30444d..580b1908594 100644 --- a/src/region_cylinder.cpp +++ b/src/region_cylinder.cpp @@ -809,16 +809,14 @@ void RegCylinder::bbox_update() ymax = c1 + radius; zmin = c2 - radius; zmax = c2 + radius; - } - if (axis == 'y') { + } else if (axis == 'y') { xmin = c1 - radius; xmax = c1 + radius; ymin = lo; ymax = hi; zmin = c2 - radius; zmax = c2 + radius; - } - if (axis == 'z') { + } else { // (axis == 'z') xmin = c1 - radius; xmax = c1 + radius; ymin = c2 - radius; From cda824ee225763f8ab8e46da29142e4ac8cc28f8 Mon Sep 17 00:00:00 2001 From: jtclemm Date: Fri, 14 Nov 2025 14:10:06 -0700 Subject: [PATCH 269/604] EdLimiting cv to be > 0 --- src/RHEO/fix_rheo_thermal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RHEO/fix_rheo_thermal.cpp b/src/RHEO/fix_rheo_thermal.cpp index 61f4ade90ab..64b93b7f73e 100644 --- a/src/RHEO/fix_rheo_thermal.cpp +++ b/src/RHEO/fix_rheo_thermal.cpp @@ -128,7 +128,7 @@ FixRHEOThermal::FixRHEOThermal(LAMMPS *lmp, int narg, char **arg) : utils::missing_cmd_args(FLERR, "fix rheo/thermal specific/heat constant", error); double cv_one = utils::numeric(FLERR, arg[iarg + 3], false, lmp); - if (cv_one < 0.0) error->all(FLERR, "The specific heat must be positive"); + if (cv_one <= 0.0) error->all(FLERR, "The specific heat must be greater than zero"); iarg += 2; for (i = nlo; i <= nhi; i++) { From 3f7c7348b5bd0c223855e1b36282c623f8553f38 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 14 Nov 2025 17:23:38 -0600 Subject: [PATCH 270/604] fixed typo, updated lj/cut/coul/long --- lib/gpu/geryon/ocl_kernel.h | 2 +- lib/gpu/lal_lj_coul_long_ext.cpp | 4 ++-- src/GPU/pair_lj_cut_coul_long_gpu.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/gpu/geryon/ocl_kernel.h b/lib/gpu/geryon/ocl_kernel.h index a0e39a6e85a..22547291fa0 100644 --- a/lib/gpu/geryon/ocl_kernel.h +++ b/lib/gpu/geryon/ocl_kernel.h @@ -67,7 +67,7 @@ class UCL_Program { /** \note Must call init() after each clear **/ inline void clear() { if (_init_done) { - #if CL_VERSION_2_0 + #ifdef CL_VERSION_2_0 cl_context ctx_from_queue = nullptr; cl_int err = clGetCommandQueueInfo(_cq, CL_QUEUE_CONTEXT, diff --git a/lib/gpu/lal_lj_coul_long_ext.cpp b/lib/gpu/lal_lj_coul_long_ext.cpp index 578e38e4a05..ec5c4e3e6db 100644 --- a/lib/gpu/lal_lj_coul_long_ext.cpp +++ b/lib/gpu/lal_lj_coul_long_ext.cpp @@ -127,11 +127,11 @@ int** ljcl_gpu_compute_n(const int ago, const int inum_full, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd) { + double *prd, int* periodicity) { return LJCLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd); + host_q, boxlo, prd, periodicity); } void ljcl_gpu_compute(const int ago, const int inum_full, const int nall, diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp index d121f7f9b02..d0072c54929 100644 --- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp @@ -49,7 +49,7 @@ int **ljcl_gpu_compute_n(const int ago, const int inum, const int nall, double * tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, - double *prd); + double *prd, int* periodicity); void ljcl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -104,7 +104,7 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag) firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, domain->prd); + success, atom->q, domain->boxlo, domain->prd, domain->periodicity); } else { inum = list->inum; ilist = list->ilist; From d7b16bd3cfa3c547321baf208808c30a05f97333 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sat, 15 Nov 2025 18:26:39 -0500 Subject: [PATCH 271/604] bond/react output file --- src/REACTION/fix_bond_react.cpp | 81 ++++++++++++++++++++++++++++++++- src/REACTION/fix_bond_react.h | 4 +- src/output.cpp | 22 ++++++++- src/output.h | 21 ++++++--- 4 files changed, 118 insertions(+), 10 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index b3c2d40f60d..c0edad4c91e 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -35,6 +35,7 @@ Contributing Author: Jacob Gissinger (jgissing@stevens.edu) #include "molecule.h" #include "neigh_list.h" #include "neighbor.h" +#include "output.h" #include "pair.h" #include "random_mars.h" #include "reset_atoms_mol.h" @@ -97,6 +98,9 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : fix2 = nullptr; fix3 = nullptr; reset_mol_ids = nullptr; + fpout = nullptr; + json_init = 0; + outflag = 0; if (narg < 8) utils::missing_cmd_args(FLERR,"fix bond/react", error); @@ -229,6 +233,26 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"fix bond/react seed", error); shuffle_seed = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; + } else if (strcmp(arg[iarg], "file") == 0) { + if (iarg + 2 > narg) + utils::missing_cmd_args(FLERR, std::string("Fix bond/react ") + arg[iarg], error); + outflag = 1; + if (comm->me == 0) { + fpout = fopen(arg[iarg + 1], "w"); + if (fpout == nullptr) + error->one(FLERR, "Cannot open fix bond/react output file {}: {}", arg[iarg + 1], + utils::getsyserror()); + // header for 'delete' keyword JSON output + fprintf(fpout, "{\n"); + fprintf(fpout, " \"application\": \"LAMMPS\",\n"); + fprintf(fpout, " \"format\": \"dump\",\n"); + fprintf(fpout, " \"style\": \"molecules\",\n"); + fprintf(fpout, " \"title\": \"fix bond/react\",\n"); + fprintf(fpout, " \"revision\": 1,\n"); + fprintf(fpout, " \"timesteps\": [\n"); + fflush(fpout); + } + iarg += 2; } else if (strcmp(arg[iarg],"react") == 0) { break; } else error->all(FLERR, iarg, "Unknown fix bond/react command keyword {}", arg[iarg]); @@ -612,6 +636,11 @@ FixBondReact::~FixBondReact() delete[] set; + if (comm->me == 0) { + if (outflag == 1) fprintf(fpout, " }\n ]\n}"); + if (fpout) fclose(fpout); + } + if (group) { group->assign(master_group + " delete"); if (stabilization_flag == 1) group->assign(exclude_group + " delete"); @@ -634,6 +663,9 @@ let's add an internal nve/limit fix for relaxation of reaction sites also let's add our per-atom property fix here! this per-atom property will state the timestep an atom was 'limited' it will have the name 'i_limit_tags' and will be intitialized to 0 (not in group) +'i_react_tags' holds reaction ID for reacting atoms +'i_rxn_instance' is unique tag for each ongoing reaction. use first initiator atom ID! +'i_statted_tags' is 1 for non-reacting atoms ------------------------------------------------------------------------- */ void FixBondReact::post_constructor() @@ -641,7 +673,7 @@ void FixBondReact::post_constructor() // let's add the limit_tags per-atom property fix id_fix2 = "bond_react_props_internal"; if (!modify->get_fix_by_id(id_fix2)) - fix2 = modify->add_fix(id_fix2 + " all property/atom i_limit_tags i_react_tags ghost yes"); + fix2 = modify->add_fix(id_fix2 + " all property/atom i_limit_tags i_react_tags i_rxn_instance ghost yes"); // create master_group if not already existing // NOTE: limit_tags and react_tags automaticaly intitialized to zero (unless read from restart) @@ -2723,6 +2755,9 @@ void FixBondReact::unlimit_bond() int index3 = atom->find_custom("react_tags",flag,cols); int *i_react_tags = atom->ivector[index3]; + int index4 = atom->find_custom("rxn_instance",flag,cols); + int *i_rxn_instance = atom->ivector[index4]; + int unlimitflag = 0; for (int i = 0; i < atom->nlocal; i++) { // unlimit atoms for next step! this resolves # of procs disparity, mostly @@ -2732,6 +2767,7 @@ void FixBondReact::unlimit_bond() i_limit_tags[i] = 0; if (stabilization_flag == 1) i_statted_tags[i] = 1; i_react_tags[i] = 0; + i_rxn_instance[i] = 0; } } @@ -2926,6 +2962,9 @@ void FixBondReact::update_everything() int index3 = atom->find_custom("react_tags",flag,cols); int *i_react_tags = atom->ivector[index3]; + int index4 = atom->find_custom("rxn_instance",flag,cols); + int *i_rxn_instance = atom->ivector[index4]; + // pass through twice // redefining 'update_num_mega' and 'update_mega_glove' each time // first pass: when glove is all local atoms @@ -3137,6 +3176,7 @@ void FixBondReact::update_everything() i_limit_tags[ilocal] = update->ntimestep + 1; if (stabilization_flag == 1) i_statted_tags[ilocal] = 0; i_react_tags[ilocal] = rxn.ID; + i_rxn_instance[ilocal] = update_mega_glove[rxn.ibonding+1][i]; if (rxn.atoms[j].landlocked == 1) type[ilocal] = rxn.product->type[j]; @@ -3560,6 +3600,45 @@ void FixBondReact::update_everything() } + //need to take out of update_everything to print for all stabilization steps + //also should probably give choice to print just on step that reaction starts + if (outflag == 1) { + std::string indent; + int json_level = 2, tab = 4; + if (comm->me == 0) { + indent.resize(json_level*tab, ' '); + if (json_init > 0) { + fprintf(fpout, "%s},\n%s{\n", indent.c_str(), indent.c_str()); + } else { + fprintf(fpout, "%s{\n", indent.c_str()); + json_init = 1; + } + indent.resize(++json_level*tab, ' '); + utils::print(fpout, "{}\"timestep\": {},\n", indent, update->ntimestep); + utils::print(fpout, "{}\"molecules\": [\n", indent); + indent.resize(++json_level*tab, ' '); + } + + // add Metadata struct to print out react-ID, using 'reaction' JSON key + Output::JSON_Metadata rxn_metadata; + rxn_metadata.metaflag = true; + rxn_metadata.key = "reaction"; + std::vector rxn_names; + rxn_names.reserve(rxns.size()); + for (auto const& rxn : rxns) + rxn_names.push_back(rxn.name); + rxn_metadata.values = rxn_names; + rxn_metadata.ivec = i_react_tags; + + output->write_molecule_json(fpout, json_level, json_init, i_rxn_instance, rxn_metadata); + if (json_init == 1) json_init++; + if (comm->me == 0) { + indent.resize(--json_level*tab, ' '); + fprintf(fpout, "%s]\n", indent.c_str()); + fflush(fpout); + } + } + memory->destroy(update_mega_glove); if (rescale_charges_anyflag) memory->destroy(sim_total_charges); diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index 2d1e617857f..c6f4d71b6b2 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -67,6 +67,8 @@ class FixBondReact : public Fix { int newton_bond; FILE *fp; tagint lastcheck; + FILE *fpout; + int outflag, json_init; int stabilization_flag; Reset_Mol_IDs molid_mode; int custom_exclude_flag; @@ -78,7 +80,7 @@ class FixBondReact : public Fix { Status status; struct Reaction { - int ID; + int ID; // indexed from 0 class Molecule *reactant; // pre-reacted molecule template class Molecule *product; // post-reacted molecule template std::string name, constraintstr; diff --git a/src/output.cpp b/src/output.cpp index 756682e9c7f..70e28d5cdc0 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -635,7 +635,7 @@ void Output::write_restart(bigint ntimestep) atoms with integer array value of 0 assumed to not belong to a molecule ------------------------------------------------------------------------- */ -void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *ivec) +void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *ivec, JSON_Metadata metadata) { std::string indent; int tab = 4; @@ -666,6 +666,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i for (int ival = 0; ival < nvals; ival++) { int thisval = loop_ivals[ival]; + std::string metadata_val; Particle myatom; int n2send = 0, n2recv = 0; @@ -673,6 +674,8 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i if (ivec[i] == thisval) { myatom.type = atom->type[i]; myatom.tag = (int) atom->tag[i]; + if (metadata.metaflag && metadata_val.empty() && comm->me == sendr) + metadata_val = metadata.values[metadata.ivec[atom->map(myatom.tag)]]; for (int k = 0; k < 3; k++) myatom.x[k] = atom->x[i][k]; atoms_local.push_back(myatom); @@ -681,6 +684,11 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i } #if !defined(MPI_STUBS) if (comm->me != 0) { + if (metadata.metaflag && comm->me == sendr) { + int len = metadata_val.size(); + MPI_Send(&len, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); + MPI_Send(metadata_val.data(), len, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + } MPI_Send(&n2send, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Send(atoms_local.data(), n2send, ParticleStructType, 0, 0, MPI_COMM_WORLD); } @@ -689,6 +697,12 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i if (comm->me == 0) { #if !defined(MPI_STUBS) for (int i = 1; i < comm->nprocs; i++) { + if (metadata.metaflag && i == sendr) { + int len; + MPI_Recv(&len, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + metadata_val.resize(len); + MPI_Recv(&metadata_val[0], len, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } MPI_Recv(&n2recv, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); std::vector atoms_recv(n2recv); MPI_Recv(atoms_recv.data(), n2recv, ParticleStructType, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); @@ -705,6 +719,12 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i json_init = 1; } + if (metadata.metaflag) { + indent.resize(++json_level*tab, ' '); + utils::print(fp, "{}\"{}\": \"{}\",\n", indent.c_str(), metadata.key, metadata_val); + indent.resize(--json_level*tab, ' '); + } + indent.resize(++json_level*tab, ' '); fprintf(fp, "%s\"types\": {\n", indent.c_str()); indent.resize(++json_level*tab, ' '); diff --git a/src/output.h b/src/output.h index 574c2566c9f..645d1625926 100644 --- a/src/output.h +++ b/src/output.h @@ -77,16 +77,23 @@ class Output : protected Pointers { } Particle; MPI_Datatype createParticleStructType(); + struct JSON_Metadata { + bool metaflag = false; // indicates if there is metadata + std::string key; // JSON key + std::vector values; // JSON value + int *ivec; // per-atom vector, indices for 'values' + }; + Output(class LAMMPS *); ~Output() override; void init(); - void setup(int memflag = 1); // initial output before run/min - void write(bigint); // output for current timestep - void write_dump(bigint); // force output of dump snapshots - void write_restart(bigint); // force output of a restart file - void write_molecule_json(FILE *, int, int, int *); // output molecule JSON objects to file - void reset_timestep(bigint); // reset output which depends on timestep - void reset_dt(); // reset output which depends on timestep size + void setup(int memflag = 1); // initial output before run/min + void write(bigint); // output for current timestep + void write_dump(bigint); // force output of dump snapshots + void write_restart(bigint); // force output of a restart file + void write_molecule_json(FILE *, int, int, int *, JSON_Metadata); // output molecule JSON objects to file + void reset_timestep(bigint); // reset output which depends on timestep + void reset_dt(); // reset output which depends on timestep size Dump *add_dump(int, char **); // add a Dump to Dump list void modify_dump(int, char **); // modify a Dump From 287f53c702dd93044e8811d6e52bb7e69c9ae92b Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 00:37:33 -0500 Subject: [PATCH 272/604] overloading write_molecule_json compatible with reaxff_species --- src/output.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/output.h b/src/output.h index 9bda32b1f9b..bcb891d58c6 100644 --- a/src/output.h +++ b/src/output.h @@ -73,10 +73,11 @@ class Output : protected Pointers { MPI_Datatype createParticleStructType(); struct JSON_Metadata { - bool metaflag = false; // indicates if there is metadata + bool metaflag; // indicates if there is metadata std::string key; // JSON key std::vector values; // JSON value int *ivec; // per-atom vector, indices for 'values' + JSON_Metadata() : metaflag(false) {} }; Output(class LAMMPS *); @@ -86,7 +87,7 @@ class Output : protected Pointers { void write(bigint); // output for current timestep void write_dump(bigint); // force output of dump snapshots void write_restart(bigint); // force output of a restart file - void write_molecule_json(FILE *, int, int, int *, JSON_Metadata); // output molecule JSON objects to file + void write_molecule_json(FILE *, int, int, int *, JSON_Metadata = JSON_Metadata{}); // JSON dump molecules void reset_timestep(bigint); // reset output which depends on timestep void reset_dt(); // reset output which depends on timestep size From 78639297a3ba46ef8126ed4170b30359ce2c39fb Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 13:05:51 -0500 Subject: [PATCH 273/604] slight speedup --- src/REACTION/fix_bond_react.cpp | 33 +++++++++++++++++---------------- src/REACTION/fix_bond_react.h | 5 ++++- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index dae04009dcc..fd9d8f9aaf9 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -100,7 +100,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : reset_mol_ids = nullptr; fpout = nullptr; json_init = 0; - outflag = 0; + outflag = false; if (narg < 8) utils::missing_cmd_args(FLERR,"fix bond/react", error); @@ -236,7 +236,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : } else if (strcmp(arg[iarg], "file") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, std::string("Fix bond/react ") + arg[iarg], error); - outflag = 1; + outflag = true; if (comm->me == 0) { fpout = fopen(arg[iarg + 1], "w"); if (fpout == nullptr) @@ -439,6 +439,18 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : } } + if (outflag) { + // add Metadata struct to print out react-ID to JSON molecules dump + // adds 'reaction' JSON key to each molecule + rxn_metadata.metaflag = true; + rxn_metadata.key = "reaction"; + std::vector rxn_names; + rxn_names.reserve(rxns.size()); + for (auto const& rxn : rxns) + rxn_names.push_back(rxn.name); + rxn_metadata.values = rxn_names; + } + for (auto &rlm : rate_limits) { for (int i = 0; i < rlm.Nrxns; i++) { int existflag = 0; @@ -637,7 +649,7 @@ FixBondReact::~FixBondReact() delete[] set; if (comm->me == 0) { - if (outflag == 1) fprintf(fpout, " }\n ]\n}"); + if (outflag) fprintf(fpout, " }\n ]\n}"); if (fpout) fclose(fpout); } @@ -3600,9 +3612,8 @@ void FixBondReact::update_everything() } - //need to take out of update_everything to print for all stabilization steps - //also should probably give choice to print just on step that reaction starts - if (outflag == 1) { + // currently dumping each reaction once, on step that reaction occurs + if (outflag) { std::string indent; int json_level = 2, tab = 4; if (comm->me == 0) { @@ -3619,17 +3630,7 @@ void FixBondReact::update_everything() indent.resize(++json_level*tab, ' '); } - // add Metadata struct to print out react-ID, using 'reaction' JSON key - Output::JSON_Metadata rxn_metadata; - rxn_metadata.metaflag = true; - rxn_metadata.key = "reaction"; - std::vector rxn_names; - rxn_names.reserve(rxns.size()); - for (auto const& rxn : rxns) - rxn_names.push_back(rxn.name); - rxn_metadata.values = rxn_names; rxn_metadata.ivec = i_react_tags; - output->write_molecule_json(fpout, json_level, json_init, i_rxn_instance, rxn_metadata); if (json_init == 1) json_init++; if (comm->me == 0) { diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index c6f4d71b6b2..d8a785ca77b 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -25,6 +25,7 @@ FixStyle(bond/react,FixBondReact); #define LMP_FIX_BOND_REACT_H #include "fix.h" +#include "output.h" #include #include @@ -68,7 +69,9 @@ class FixBondReact : public Fix { FILE *fp; tagint lastcheck; FILE *fpout; - int outflag, json_init; + bool outflag; + int json_init; + Output::JSON_Metadata rxn_metadata; int stabilization_flag; Reset_Mol_IDs molid_mode; int custom_exclude_flag; From a2fe4f9c4c59beabf97bbac47fe7e5f49aee7366 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 14:04:40 -0500 Subject: [PATCH 274/604] JSON dump molecules docs add generic JSON dump molecules description --- doc/src/Run_formats.rst | 69 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/doc/src/Run_formats.rst b/doc/src/Run_formats.rst index d03227f091a..145c6837b1b 100644 --- a/doc/src/Run_formats.rst +++ b/doc/src/Run_formats.rst @@ -78,7 +78,7 @@ For floating point numbers in scientific notation, the Fortran double precision notation "1.1d3" is not accepted; you have to use "1100", "1100.0" or "1.1e3". -Input file +Input File ^^^^^^^^^^ A LAMMPS input file is a text file with commands. It is read @@ -161,7 +161,7 @@ recommended to always have an empty line at the end of an input file. The specific details describing how LAMMPS input is processed and parsed are explained in :doc:`Commands_parse`. -Data file +Data File ^^^^^^^^^ A LAMMPS data file contains a description of a system suitable for @@ -302,7 +302,7 @@ Molecule-ID', one for each atom in the system. For adding charges to atom style molecular with fix property/atom, the "Atoms" section is now formatted according to the atom style and a "Charges" section is added. -Molecule file +Molecule File ^^^^^^^^^^^^^ Molecule files for use with the :doc:`molecule command ` look @@ -376,7 +376,7 @@ if the molecule command is issued *before* the simulation box is defined. Otherwise, the molecule command can derive the required settings internally. -Restart file +Restart File ^^^^^^^^^^^^ LAMMPS restart files are binary files and not available in text format. @@ -409,6 +409,67 @@ are in the ``lmprestart.h`` header file. LAMMPS restart files are not expected to be portable between platforms or LAMMPS versions, but changes to the file format are rare. +JSON Dump Files +^^^^^^^^^^^^^^^ + +LAMMPS can print information about molecules and other sets of atoms during +a run, in JSON format. Several fixes currently support dumping JSON files +in the 'molecules' style, including :doc:`fix bond/react ` +and the *delete* keyword of :doc:`fix reaxff/species `. +The JSON 'dump molecules' format lists sets of atoms in the style of the +:doc:`JSON molecule file `, where more discussion of JSON schema +can be found. Here is an generic example of a JSON output file that dumped +one water molecule on the first timestep: + +.. code-block:: json + + { + "application": "LAMMPS", + "units": "real", + "format": "dump", + "style": "molecules", + "revision": 1, + "timesteps": [ + { + "timestep": 1, + "molecules": [ + { + "types": { + "format": ["atom-id", "type"], + "data": [ + [1368, "H"], + [1366, "O"], + [1367, "H"] + ] + }, + "coords": { + "format": ["atom-id", "x", "y", "z"], + "data": [ + [1368, 26.787767440427466, 29.785528640296768, 25.85197353660144], + [1366, 26.641801222582824, 29.868106247702887, 24.91285138212243], + [1367, 25.69611192416744, 30.093425787807448, 24.914380215672846] + ] + } + } + ] + } + ] + } + +The required first-level keys of the JSON format output are "application", +"format", "style", "revision", and "timesteps", and optional keys are +"units" and "title". The value of the "timesteps" key is an array of +objects that contain data for each timestep on which a molecule was dumped, +and the other first-level keys identify this JSON schema. The objects in +"timesteps" contains two mandatory keys, "timestep" and "molecules". The +"molecules" key is an array of :doc:`LAMMPS molecule JSON ` +objects, and may contain other keys that contain metadata for each +molecule. The "format" keys within molecule JSON objects are only printed +once per output file, for brevity. The "atom-id" values are atom IDs from +the simulation, and the "type" values are atom types. In the above +example, the types were reported as strings corresponding to elements using +:doc:`type labels `. + .. Native Dump file .. ^^^^^^^^^^^^^^^^ .. From d0badba82d180e0e03cda51bbbbc1dd59ab2b664 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 14:29:22 -0500 Subject: [PATCH 275/604] add docs --- doc/src/Run_formats.rst | 2 ++ doc/src/fix_bond_react.rst | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/doc/src/Run_formats.rst b/doc/src/Run_formats.rst index 145c6837b1b..e2bd86f37d4 100644 --- a/doc/src/Run_formats.rst +++ b/doc/src/Run_formats.rst @@ -409,6 +409,8 @@ are in the ``lmprestart.h`` header file. LAMMPS restart files are not expected to be portable between platforms or LAMMPS versions, but changes to the file format are rare. +.. _json-dump-files: + JSON Dump Files ^^^^^^^^^^^^^^^ diff --git a/doc/src/fix_bond_react.rst b/doc/src/fix_bond_react.rst index 2df98621784..961717aa3f0 100644 --- a/doc/src/fix_bond_react.rst +++ b/doc/src/fix_bond_react.rst @@ -41,6 +41,8 @@ Syntax Nlimit = maximum total number of reactions allowed to occur *shuffle_seed* value = seed seed = random # seed (positive integer) for choosing between eligible reactions + *file* value = filename + filename = name of the JSON file that records reaction occurrences * react = mandatory argument indicating new reaction specification * react-ID = user-assigned name for the reaction @@ -244,6 +246,52 @@ if available; reactions are chosen deterministically if a positive integer is specified for the 'shuffle_seed' keyword. Multiple *max_rxn* keywords can be specified. +The *file* keyword can be used to dump information about each reaction that +occurs during the simulation. The atom IDs, types, and coordinates of all +atoms in the reaction site are printed out on the timestep that the +reaction is initiated. The output file follows the :ref:`JSON dump +molecules format `, with one extra key added to each +molecule object to identify the reaction. The added key is "reaction" and +its value is the reaction name (react-ID). Here is an example output for a +hypothetical reaction involving one water molecule: + +.. code-block:: json + + { + "application": "LAMMPS", + "units": "real", + "format": "dump", + "style": "molecules", + "revision": 1, + "title": "fix bond/react", + "timesteps": [ + { + "timestep": 1, + "molecules": [ + { + "reaction": "water_dissociation", + "types": { + "format": ["atom-id", "type"], + "data": [ + [1368, "H"], + [1366, "O"], + [1367, "H"] + ] + }, + "coords": { + "format": ["atom-id", "x", "y", "z"], + "data": [ + [1368, 26.787767440427466, 29.785528640296768, 25.85197353660144], + [1366, 26.641801222582824, 29.868106247702887, 24.91285138212243], + [1367, 25.69611192416744, 30.093425787807448, 24.914380215672846] + ] + } + } + ] + } + ] + } + The following comments pertain to each *react* argument (in other words, they can be customized for each reaction, or reaction step): From 2fc9d7ba864e8c4e32e7781e2fdf0b200488d2b9 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 16:16:01 -0500 Subject: [PATCH 276/604] update installed header list and directory for using the LAMMPS CMake target --- cmake/CMakeLists.txt | 19 ++++++++++++++----- cmake/LAMMPSConfig.cmake.in | 3 +++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 76c04ca0d37..3ef3b3df459 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -779,16 +779,17 @@ if(_index GREATER -1) target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) endif() set(LAMMPS_CXX_HEADERS angle.h atom.h bond.h citeme.h comm.h command.h compute.h dihedral.h domain.h - error.h exceptions.h fix.h force.h group.h improper.h input.h info.h kspace.h lammps.h lattice.h - library.h lmppython.h lmptype.h memory.h modify.h neighbor.h neigh_list.h output.h pair.h - platform.h pointers.h region.h timer.h universe.h update.h utils.h variable.h) + error.h exceptions.h fix.h force.h group.h improper.h input.h info.h json.h json_fwd.h kspace.h + lammps.h lattice.h library.h lmppython.h lmptype.h memory.h modify.h neighbor.h neigh_list.h + output.h pair.h platform.h pointers.h region.h timer.h universe.h update.h utils.h variable.h) set(LAMMPS_FMT_HEADERS core.h format.h) +set(LAMMPS_JSON_HEADERS json_fwd.hpp json.hpp) set_target_properties(lammps PROPERTIES OUTPUT_NAME lammps${LAMMPS_MACHINE}) set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION}) set_target_properties(lammps PROPERTIES PREFIX "lib") -target_include_directories(lammps PUBLIC $) -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/fmt) +target_include_directories(lammps PUBLIC $) +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/fmt ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/nlohmann) foreach(_HEADER ${LAMMPS_CXX_HEADERS}) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/${_HEADER} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_SOURCE_DIR}/${_HEADER} ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/${_HEADER} DEPENDS ${LAMMPS_SOURCE_DIR}/${_HEADER}) add_custom_target(${_HEADER} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/${_HEADER}) @@ -805,6 +806,14 @@ foreach(_HEADER ${LAMMPS_FMT_HEADERS}) install(FILES ${LAMMPS_SOURCE_DIR}/fmt/${_HEADER} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lammps/fmt) endif() endforeach() +foreach(_HEADER ${LAMMPS_JSON_HEADERS}) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/nlohmann/${_HEADER} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_SOURCE_DIR}/nlohmann/${_HEADER} ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/nlohmann/${_HEADER} DEPENDS ${LAMMPS_SOURCE_DIR}/nlohmann/${_HEADER}) + add_custom_target(json_${_HEADER} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/includes/lammps/nlohmann/${_HEADER}) + add_dependencies(lammps json_${_HEADER}) + if(BUILD_SHARED_LIBS) + install(FILES ${LAMMPS_SOURCE_DIR}/nlohmann/${_HEADER} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lammps/nlohmann) + endif() +endforeach() target_include_directories(lammps INTERFACE $) add_library(LAMMPS::lammps ALIAS lammps) get_target_property(LAMMPS_DEFINES lammps INTERFACE_COMPILE_DEFINITIONS) diff --git a/cmake/LAMMPSConfig.cmake.in b/cmake/LAMMPSConfig.cmake.in index 0dacfc20896..f87b1083736 100644 --- a/cmake/LAMMPSConfig.cmake.in +++ b/cmake/LAMMPSConfig.cmake.in @@ -2,4 +2,7 @@ include(CMakeFindDependencyMacro) if(@BUILD_MPI@) find_dependency(MPI REQUIRED CXX) endif() +if(@BUILD_OMP@) + find_dependency(OpenMP REQUIRED CXX) +endif() include("${CMAKE_CURRENT_LIST_DIR}/LAMMPS_Targets.cmake") From de742b2f01a0fbe6513172a1035790013b9da3c2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 17:03:44 -0500 Subject: [PATCH 277/604] don't try to include nonexisting packages --- cmake/packaging/org.lammps.lammps-gui.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmake/packaging/org.lammps.lammps-gui.yml b/cmake/packaging/org.lammps.lammps-gui.yml index 01aedc416b7..b63b2403873 100644 --- a/cmake/packaging/org.lammps.lammps-gui.yml +++ b/cmake/packaging/org.lammps.lammps-gui.yml @@ -33,7 +33,6 @@ modules: - -D DOWNLOAD_POTENTIALS=no - -D PKG_AMOEBA=yes - -D PKG_ASPHERE=yes - - -D PKG_AWPMD=yes - -D PKG_BOCS=yes - -D PKG_BODY=yes - -D PKG_BPM=yes @@ -90,7 +89,6 @@ modules: - -D PKG_PERI=yes - -D PKG_PHONON=yes - -D PKG_PLUGIN=yes - - -D PKG_POEMS=yes - -D PKG_PTM=yes - -D PKG_PYTHON=yes - -D PKG_QEQ=yes From 26e1abd8d53f4e1f1242772aca73697a8a4c3fd6 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 18:41:27 -0500 Subject: [PATCH 278/604] forward declare json_metadata --- src/REACTION/fix_bond_react.cpp | 11 ++++++----- src/REACTION/fix_bond_react.h | 5 +++-- src/REAXFF/fix_reaxff_species.cpp | 2 +- src/json_metadata.h | 26 ++++++++++++++++++++++++++ src/output.cpp | 15 ++++++++------- src/output.h | 11 ++--------- 6 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 src/json_metadata.h diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index fd9d8f9aaf9..a1ea67f57c0 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -28,6 +28,7 @@ Contributing Author: Jacob Gissinger (jgissing@stevens.edu) #include "force.h" #include "group.h" #include "input.h" +#include "json_metadata.h" #include "math_const.h" #include "math_extra.h" #include "memory.h" @@ -442,13 +443,13 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) : if (outflag) { // add Metadata struct to print out react-ID to JSON molecules dump // adds 'reaction' JSON key to each molecule - rxn_metadata.metaflag = true; - rxn_metadata.key = "reaction"; + rxn_metadata = std::make_unique(); + rxn_metadata->key = "reaction"; std::vector rxn_names; rxn_names.reserve(rxns.size()); for (auto const& rxn : rxns) rxn_names.push_back(rxn.name); - rxn_metadata.values = rxn_names; + rxn_metadata->values = rxn_names; } for (auto &rlm : rate_limits) { @@ -3630,8 +3631,8 @@ void FixBondReact::update_everything() indent.resize(++json_level*tab, ' '); } - rxn_metadata.ivec = i_react_tags; - output->write_molecule_json(fpout, json_level, json_init, i_rxn_instance, rxn_metadata); + rxn_metadata->ivec = i_react_tags; + output->write_molecule_json(fpout, json_level, json_init, i_rxn_instance, rxn_metadata.get()); if (json_init == 1) json_init++; if (comm->me == 0) { indent.resize(--json_level*tab, ' '); diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h index d8a785ca77b..663af6fd7de 100644 --- a/src/REACTION/fix_bond_react.h +++ b/src/REACTION/fix_bond_react.h @@ -25,7 +25,6 @@ FixStyle(bond/react,FixBondReact); #define LMP_FIX_BOND_REACT_H #include "fix.h" -#include "output.h" #include #include @@ -34,6 +33,8 @@ FixStyle(bond/react,FixBondReact); namespace LAMMPS_NS { +struct json_metadata; // forward declaration. full declaration in json_metadata.h + class FixBondReact : public Fix { public: FixBondReact(class LAMMPS *, int, char **); @@ -71,7 +72,7 @@ class FixBondReact : public Fix { FILE *fpout; bool outflag; int json_init; - Output::JSON_Metadata rxn_metadata; + std::unique_ptr rxn_metadata; int stabilization_flag; Reset_Mol_IDs molid_mode; int custom_exclude_flag; diff --git a/src/REAXFF/fix_reaxff_species.cpp b/src/REAXFF/fix_reaxff_species.cpp index a0351b2fabc..ec6f5d610cd 100644 --- a/src/REAXFF/fix_reaxff_species.cpp +++ b/src/REAXFF/fix_reaxff_species.cpp @@ -1151,7 +1151,7 @@ void FixReaxFFSpecies::DeleteSpecies(int Nmole, int Nspec) indent.resize(++json_level*tab, ' '); } - output->write_molecule_json(fdel, json_level, deljson_init, mark); + output->write_molecule_json(fdel, json_level, deljson_init, mark, nullptr); if (deljson_init == 1) deljson_init++; if (comm->me == 0) { diff --git a/src/json_metadata.h b/src/json_metadata.h new file mode 100644 index 00000000000..548350dad4d --- /dev/null +++ b/src/json_metadata.h @@ -0,0 +1,26 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_JSON_METADATA_H +#define LMP_JSON_METADATA_H + +// full declaration of JSON metadata struct, used in JSON dump molecules format + +namespace LAMMPS_NS { + struct json_metadata { + std::string key; // JSON key + std::vector values; // JSON value + int *ivec; // per-atom vector, indices for 'values' + }; +} +#endif \ No newline at end of file diff --git a/src/output.cpp b/src/output.cpp index 9a19e7f0cec..1797421709c 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -27,6 +27,7 @@ #include "group.h" #include "info.h" #include "input.h" +#include "json_metadata.h" #include "label_map.h" #include "memory.h" #include "modify.h" @@ -643,7 +644,7 @@ void Output::write_restart(bigint ntimestep) atoms with integer array value of 0 assumed to not belong to a molecule ------------------------------------------------------------------------- */ -void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *ivec, JSON_Metadata metadata) +void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *ivec, json_metadata *metadata) { std::string indent; int tab = 4; @@ -682,8 +683,8 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i if (ivec[i] == thisval) { myatom.type = atom->type[i]; myatom.tag = (int) atom->tag[i]; - if (metadata.metaflag && metadata_val.empty() && comm->me == sendr) - metadata_val = metadata.values[metadata.ivec[atom->map(myatom.tag)]]; + if (metadata && metadata_val.empty() && comm->me == sendr) + metadata_val = metadata->values[metadata->ivec[atom->map(myatom.tag)]]; for (int k = 0; k < 3; k++) myatom.x[k] = atom->x[i][k]; atoms_local.push_back(myatom); @@ -692,7 +693,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i } #if !defined(MPI_STUBS) if (comm->me != 0) { - if (metadata.metaflag && comm->me == sendr) { + if (metadata && comm->me == sendr) { int len = metadata_val.size(); MPI_Send(&len, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Send(metadata_val.data(), len, MPI_CHAR, 0, 0, MPI_COMM_WORLD); @@ -705,7 +706,7 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i if (comm->me == 0) { #if !defined(MPI_STUBS) for (int i = 1; i < comm->nprocs; i++) { - if (metadata.metaflag && i == sendr) { + if (metadata && i == sendr) { int len; MPI_Recv(&len, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); metadata_val.resize(len); @@ -727,9 +728,9 @@ void Output::write_molecule_json(FILE *fp, int json_level, int printflag, int *i json_init = 1; } - if (metadata.metaflag) { + if (metadata) { indent.resize(++json_level*tab, ' '); - utils::print(fp, "{}\"{}\": \"{}\",\n", indent.c_str(), metadata.key, metadata_val); + utils::print(fp, "{}\"{}\": \"{}\",\n", indent.c_str(), metadata->key, metadata_val); indent.resize(--json_level*tab, ' '); } diff --git a/src/output.h b/src/output.h index bcb891d58c6..847de833138 100644 --- a/src/output.h +++ b/src/output.h @@ -21,6 +21,7 @@ namespace LAMMPS_NS { class Dump; +struct json_metadata; class Output : protected Pointers { public: @@ -72,14 +73,6 @@ class Output : protected Pointers { MPI_Datatype createParticleStructType(); - struct JSON_Metadata { - bool metaflag; // indicates if there is metadata - std::string key; // JSON key - std::vector values; // JSON value - int *ivec; // per-atom vector, indices for 'values' - JSON_Metadata() : metaflag(false) {} - }; - Output(class LAMMPS *); ~Output() override; void init(); @@ -87,7 +80,7 @@ class Output : protected Pointers { void write(bigint); // output for current timestep void write_dump(bigint); // force output of dump snapshots void write_restart(bigint); // force output of a restart file - void write_molecule_json(FILE *, int, int, int *, JSON_Metadata = JSON_Metadata{}); // JSON dump molecules + void write_molecule_json(FILE *, int, int, int *, json_metadata *); // JSON dump molecules void reset_timestep(bigint); // reset output which depends on timestep void reset_dt(); // reset output which depends on timestep size From 4ce3f8d975b65d1c46e1d0394b972fb44d76971d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 20:16:10 -0500 Subject: [PATCH 279/604] generate and install a (minimal) FindLAMMPS.cmake file --- cmake/CMakeLists.txt | 5 +++-- cmake/FindLAMMPS.cmake.in | 17 +++++++++++++++++ cmake/LAMMPSConfig.cmake.in | 3 +++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 cmake/FindLAMMPS.cmake.in diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3ef3b3df459..345ac16ac2c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -836,9 +836,10 @@ if(BUILD_SHARED_LIBS) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblammps${LAMMPS_MACHINE}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) install(EXPORT LAMMPS_Targets FILE LAMMPS_Targets.cmake NAMESPACE LAMMPS:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) include(CMakePackageConfigHelpers) - configure_file(LAMMPSConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake @ONLY) + configure_file(LAMMPSConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" @ONLY) + configure_file(FindLAMMPS.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS.cmake" @ONLY) write_basic_package_version_file("LAMMPSConfigVersion.cmake" VERSION ${PROJECT_VERSION} COMPATIBILITY ExactVersion) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfigVersion.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) endif() install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) diff --git a/cmake/FindLAMMPS.cmake.in b/cmake/FindLAMMPS.cmake.in new file mode 100644 index 00000000000..49955ada46b --- /dev/null +++ b/cmake/FindLAMMPS.cmake.in @@ -0,0 +1,17 @@ +# created by cmake version @CMAKE_VERSION@ for LAMMPS version @PROJECT_VERSION@ + +include("${CMAKE_CURRENT_LIST_DIR}/LAMMPSConfigVersion.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/LAMMPSConfig.cmake") + +get_target_property(LAMMPS_CONFIG LAMMPS::lammps IMPORTED_CONFIGURATIONS) +get_target_property(LAMMPS_LIBRARY LAMMPS::lammps IMPORTED_LOCATION_${LAMMPS_CONFIG}) +get_target_property(LAMMPS_INCLUDE_DIR LAMMPS::lammps INTERFACE_INCLUDE_DIRECTORIES) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LAMMPS + REQUIRED_VARS LAMMPS_LIBRARY LAMMPS_INCLUDE_DIR + VERSION_VAR PACKAGE_VERSION + HANDLE_VERSION_RANGE +) + + diff --git a/cmake/LAMMPSConfig.cmake.in b/cmake/LAMMPSConfig.cmake.in index f87b1083736..b3613cd8aa7 100644 --- a/cmake/LAMMPSConfig.cmake.in +++ b/cmake/LAMMPSConfig.cmake.in @@ -1,3 +1,5 @@ +# created by cmake version @CMAKE_VERSION@ for LAMMPS version @PROJECT_VERSION@ + include(CMakeFindDependencyMacro) if(@BUILD_MPI@) find_dependency(MPI REQUIRED CXX) @@ -6,3 +8,4 @@ if(@BUILD_OMP@) find_dependency(OpenMP REQUIRED CXX) endif() include("${CMAKE_CURRENT_LIST_DIR}/LAMMPS_Targets.cmake") +add_library(LAMMPS::LAMMPS ALIAS LAMMPS::lammps) From 7e08655f9ff9425d174d0313e600dd18d99ef473 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 21:50:14 -0500 Subject: [PATCH 280/604] merge FindLAMMPS.cmake.in into LAMMPSConfig.cmake.in. we need only one --- cmake/CMakeLists.txt | 3 +-- cmake/FindLAMMPS.cmake.in | 17 ----------------- cmake/LAMMPSConfig.cmake.in | 11 +++++++++++ unittest/CMakeLists.txt | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 19 deletions(-) delete mode 100644 cmake/FindLAMMPS.cmake.in diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 345ac16ac2c..6a658470efb 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -837,9 +837,8 @@ if(BUILD_SHARED_LIBS) install(EXPORT LAMMPS_Targets FILE LAMMPS_Targets.cmake NAMESPACE LAMMPS:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) include(CMakePackageConfigHelpers) configure_file(LAMMPSConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" @ONLY) - configure_file(FindLAMMPS.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS.cmake" @ONLY) write_basic_package_version_file("LAMMPSConfigVersion.cmake" VERSION ${PROJECT_VERSION} COMPATIBILITY ExactVersion) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/FindLAMMPS.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/LAMMPSConfigVersion.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LAMMPS) endif() install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) diff --git a/cmake/FindLAMMPS.cmake.in b/cmake/FindLAMMPS.cmake.in deleted file mode 100644 index 49955ada46b..00000000000 --- a/cmake/FindLAMMPS.cmake.in +++ /dev/null @@ -1,17 +0,0 @@ -# created by cmake version @CMAKE_VERSION@ for LAMMPS version @PROJECT_VERSION@ - -include("${CMAKE_CURRENT_LIST_DIR}/LAMMPSConfigVersion.cmake") -include("${CMAKE_CURRENT_LIST_DIR}/LAMMPSConfig.cmake") - -get_target_property(LAMMPS_CONFIG LAMMPS::lammps IMPORTED_CONFIGURATIONS) -get_target_property(LAMMPS_LIBRARY LAMMPS::lammps IMPORTED_LOCATION_${LAMMPS_CONFIG}) -get_target_property(LAMMPS_INCLUDE_DIR LAMMPS::lammps INTERFACE_INCLUDE_DIRECTORIES) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(LAMMPS - REQUIRED_VARS LAMMPS_LIBRARY LAMMPS_INCLUDE_DIR - VERSION_VAR PACKAGE_VERSION - HANDLE_VERSION_RANGE -) - - diff --git a/cmake/LAMMPSConfig.cmake.in b/cmake/LAMMPSConfig.cmake.in index b3613cd8aa7..70e2233deca 100644 --- a/cmake/LAMMPSConfig.cmake.in +++ b/cmake/LAMMPSConfig.cmake.in @@ -9,3 +9,14 @@ if(@BUILD_OMP@) endif() include("${CMAKE_CURRENT_LIST_DIR}/LAMMPS_Targets.cmake") add_library(LAMMPS::LAMMPS ALIAS LAMMPS::lammps) + +get_target_property(LAMMPS_CONFIG LAMMPS::lammps IMPORTED_CONFIGURATIONS) +get_target_property(LAMMPS_LIBRARY LAMMPS::lammps IMPORTED_LOCATION_${LAMMPS_CONFIG}) +get_target_property(LAMMPS_INCLUDE_DIR LAMMPS::lammps INTERFACE_INCLUDE_DIRECTORIES) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LAMMPS + REQUIRED_VARS LAMMPS_LIBRARY LAMMPS_INCLUDE_DIR + VERSION_VAR PACKAGE_VERSION + HANDLE_VERSION_RANGE +) diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 499a748a434..b1bee767188 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -66,6 +66,21 @@ set_tests_properties(InvalidFlag PROPERTIES ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" PASS_REGULAR_EXPRESSION "ERROR: Invalid command-line argument.*") +# create an executable using the CMake configuration +add_custom_target(use_cmake_files + ${CMAKE_COMMAND} -E rm -rf ${CMAKE_BINARY_DIR}/use-cmake-files + COMMAND ${CMAKE_COMMAND} -S ${CMAKE_CURRENT_SOURCE_DIR}/use-cmake-files + -B ${CMAKE_BINARY_DIR}/use-cmake-files + -D LAMMPS_DIR=${CMAKE_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/use-cmake-files + DEPENDS lammps lmp + BYPRODUCTS ${CMAKE_BINARY_DIR}/use-cmake-files/testlmp + COMMENT "LAMMPS test generated CMake configuration with compilation" +) + +add_test(NAME FindLAMMPS + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target use_cmake_files) + # convenience function for adding tests requiring to be run in parallel with MPI if(BUILD_MPI) function(add_mpi_test) From 50cb20d9969616cf168c38a2c314eefcfb66d4f2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 21:50:44 -0500 Subject: [PATCH 281/604] add test for generated CMake files --- unittest/CMakeLists.txt | 8 ++++---- unittest/use-cmake-files/CMakeLists.txt | 6 ++++++ unittest/use-cmake-files/main.cpp | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 unittest/use-cmake-files/CMakeLists.txt create mode 100644 unittest/use-cmake-files/main.cpp diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index b1bee767188..b3ee0da94ab 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -49,21 +49,21 @@ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/in.empty "") add_test(NAME RunLammps COMMAND $ -log none -echo none -in in.empty) set_tests_properties(RunLammps PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" - PASS_REGULAR_EXPRESSION "LAMMPS \\([0-9]+ [A-Za-z]+ 2[0-9][0-9][0-9]( - Update [0-9]+)?( - Development.*)?( - Maintenance.*)?\\)") + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" + PASS_REGULAR_EXPRESSION "LAMMPS \\([0-9]+ [A-Za-z]+ 2[0-9][0-9][0-9]( - Update [0-9]+)?( - Development.*)?( - Maintenance.*)?\\)") # check if the compiled executable will print the help message add_test(NAME HelpMessage COMMAND $ -h) set_tests_properties(HelpMessage PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" PASS_REGULAR_EXPRESSION ".*Large-scale Atomic/Molecular Massively Parallel Simulator -.*Usage example:.*") # check if the compiled executable will error out on an invalid command line flag add_test(NAME InvalidFlag COMMAND $ -xxx) set_tests_properties(InvalidFlag PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" PASS_REGULAR_EXPRESSION "ERROR: Invalid command-line argument.*") # create an executable using the CMake configuration diff --git a/unittest/use-cmake-files/CMakeLists.txt b/unittest/use-cmake-files/CMakeLists.txt new file mode 100644 index 00000000000..98904f4e5d5 --- /dev/null +++ b/unittest/use-cmake-files/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 3.20) +project(testlmp CXX) + +find_package(LAMMPS REQUIRED) +add_executable(testlmp main.cpp) +target_link_libraries(testlmp PRIVATE LAMMPS::LAMMPS) diff --git a/unittest/use-cmake-files/main.cpp b/unittest/use-cmake-files/main.cpp new file mode 100644 index 00000000000..ded0e9cf899 --- /dev/null +++ b/unittest/use-cmake-files/main.cpp @@ -0,0 +1,20 @@ + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + try { + auto *lmp = new LAMMPS_NS::LAMMPS(argc, argv, MPI_COMM_WORLD); + lmp->input->file(); + delete lmp; + } catch (std::exception &) { + } + return 0; +} + From d523c90dac3232261f655a3d7a3e3bb6c923120c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 22:07:53 -0500 Subject: [PATCH 282/604] small tweak to try passing automated tests --- unittest/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index b3ee0da94ab..8eab94f4130 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -66,20 +66,24 @@ set_tests_properties(InvalidFlag PROPERTIES ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" PASS_REGULAR_EXPRESSION "ERROR: Invalid command-line argument.*") +# the custom target below is incompatible with building LAMMPS-GUI at the same time +if(NOT BUILD_LAMMPS_GUI) # create an executable using the CMake configuration add_custom_target(use_cmake_files - ${CMAKE_COMMAND} -E rm -rf ${CMAKE_BINARY_DIR}/use-cmake-files + ${CMAKE_COMMAND} -E rm -rf ${CMAKE_BINARY_DIR}/use-cmake-files ${CMAKE_BINARY_DIR}/dummy-install + COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR} --prefix ${CMAKE_BINARY_DIR}/dummy-install COMMAND ${CMAKE_COMMAND} -S ${CMAKE_CURRENT_SOURCE_DIR}/use-cmake-files -B ${CMAKE_BINARY_DIR}/use-cmake-files -D LAMMPS_DIR=${CMAKE_BINARY_DIR} COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/use-cmake-files DEPENDS lammps lmp BYPRODUCTS ${CMAKE_BINARY_DIR}/use-cmake-files/testlmp - COMMENT "LAMMPS test generated CMake configuration with compilation" + COMMENT "Test generated LAMMPS CMake configuration with compilation" ) add_test(NAME FindLAMMPS COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target use_cmake_files) +endif() # convenience function for adding tests requiring to be run in parallel with MPI if(BUILD_MPI) From edfbbff18b6828f9fccfa8afaa95fe92892ef7ff Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 22:23:16 -0500 Subject: [PATCH 283/604] another tweak --- unittest/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 8eab94f4130..84f3a46d4e4 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -74,7 +74,7 @@ add_custom_target(use_cmake_files COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR} --prefix ${CMAKE_BINARY_DIR}/dummy-install COMMAND ${CMAKE_COMMAND} -S ${CMAKE_CURRENT_SOURCE_DIR}/use-cmake-files -B ${CMAKE_BINARY_DIR}/use-cmake-files - -D LAMMPS_DIR=${CMAKE_BINARY_DIR} + -D LAMMPS_DIR=${CMAKE_BINARY_DIR}/dummy-install/lib/cmake/LAMMPS COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/use-cmake-files DEPENDS lammps lmp BYPRODUCTS ${CMAKE_BINARY_DIR}/use-cmake-files/testlmp From c4a5a24e0c826a67a7f5a6c6ddec1cdfd7b8c95f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 16 Nov 2025 22:38:35 -0500 Subject: [PATCH 284/604] make CMake config test compatible with non-MPI compilations --- unittest/CMakeLists.txt | 1 + unittest/use-cmake-files/CMakeLists.txt | 3 +++ unittest/use-cmake-files/main.cpp | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 84f3a46d4e4..9a981f8adfe 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -75,6 +75,7 @@ add_custom_target(use_cmake_files COMMAND ${CMAKE_COMMAND} -S ${CMAKE_CURRENT_SOURCE_DIR}/use-cmake-files -B ${CMAKE_BINARY_DIR}/use-cmake-files -D LAMMPS_DIR=${CMAKE_BINARY_DIR}/dummy-install/lib/cmake/LAMMPS + -D USE_MPI=${USE_MPI} COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/use-cmake-files DEPENDS lammps lmp BYPRODUCTS ${CMAKE_BINARY_DIR}/use-cmake-files/testlmp diff --git a/unittest/use-cmake-files/CMakeLists.txt b/unittest/use-cmake-files/CMakeLists.txt index 98904f4e5d5..597894479a5 100644 --- a/unittest/use-cmake-files/CMakeLists.txt +++ b/unittest/use-cmake-files/CMakeLists.txt @@ -3,4 +3,7 @@ project(testlmp CXX) find_package(LAMMPS REQUIRED) add_executable(testlmp main.cpp) +if(USE_MPI) + target_compile_definitions(testlmp PRIVATE USE_MPI) +endif() target_link_libraries(testlmp PRIVATE LAMMPS::LAMMPS) diff --git a/unittest/use-cmake-files/main.cpp b/unittest/use-cmake-files/main.cpp index ded0e9cf899..7671beea624 100644 --- a/unittest/use-cmake-files/main.cpp +++ b/unittest/use-cmake-files/main.cpp @@ -1,5 +1,9 @@ +#ifdef USE_MPI #include +#else +#define MPI_COMM_WORLD 0 +#endif #include #include #include From b76b8fde46a69fef62b187c7794c09ab5ba651c9 Mon Sep 17 00:00:00 2001 From: Jacob Gissinger Date: Sun, 16 Nov 2025 22:57:25 -0500 Subject: [PATCH 285/604] Update json_metadata.h --- src/json_metadata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/json_metadata.h b/src/json_metadata.h index 548350dad4d..b197a479bad 100644 --- a/src/json_metadata.h +++ b/src/json_metadata.h @@ -23,4 +23,4 @@ namespace LAMMPS_NS { int *ivec; // per-atom vector, indices for 'values' }; } -#endif \ No newline at end of file +#endif From 66fd0067cbddb7a5221ff53493caea495c7ec678 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Nov 2025 09:15:00 -0500 Subject: [PATCH 286/604] remove some error messages that no longer exist --- doc/src/Errors_messages.rst | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/doc/src/Errors_messages.rst b/doc/src/Errors_messages.rst index 008ee26ae02..526c74fa66e 100644 --- a/doc/src/Errors_messages.rst +++ b/doc/src/Errors_messages.rst @@ -1123,12 +1123,6 @@ Please also see the page with :doc:`Warning messages `. *Cannot yet use fix bond/create with this improper style* This is a current restriction in LAMMPS. -*Cannot yet use minimize with Kokkos* - This feature is not yet supported. - -*Cannot yet use pair hybrid with Kokkos* - This feature is not yet supported. - *Cannot zero Langevin force of 0 atoms* The group has zero atoms, so you cannot request its force be zeroed. @@ -2092,9 +2086,6 @@ Please also see the page with :doc:`Warning messages `. *Fix langevin gjf cannot have period equal to dt/2* If the period is equal to dt/2 then division by zero will happen. -*Fix langevin gjf with tbias is not yet implemented with kokkos* - This option is not yet available. - *Fix langevin omega is not yet implemented with kokkos* This option is not yet available. @@ -2224,10 +2215,6 @@ Please also see the page with :doc:`Warning messages `. The principal moments of inertia computed for a rigid body are not within the required tolerances. -*Fix shake cannot be used with minimization* - Cannot use fix shake while doing an energy minimization since - it turns off bonds that should contribute to the energy. - *Fix shake molecule template must have shake info* The defined molecule does not specify SHAKE information. From c3844395006afae172cf695878ef83f3e645e4d2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Nov 2025 09:45:36 -0500 Subject: [PATCH 287/604] remove test for checking CMake configuration as it causes too much problems --- unittest/CMakeLists.txt | 20 -------------------- unittest/use-cmake-files/CMakeLists.txt | 9 --------- unittest/use-cmake-files/main.cpp | 24 ------------------------ 3 files changed, 53 deletions(-) delete mode 100644 unittest/use-cmake-files/CMakeLists.txt delete mode 100644 unittest/use-cmake-files/main.cpp diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 9a981f8adfe..fe84bda74f7 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -66,26 +66,6 @@ set_tests_properties(InvalidFlag PROPERTIES ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=2" PASS_REGULAR_EXPRESSION "ERROR: Invalid command-line argument.*") -# the custom target below is incompatible with building LAMMPS-GUI at the same time -if(NOT BUILD_LAMMPS_GUI) -# create an executable using the CMake configuration -add_custom_target(use_cmake_files - ${CMAKE_COMMAND} -E rm -rf ${CMAKE_BINARY_DIR}/use-cmake-files ${CMAKE_BINARY_DIR}/dummy-install - COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR} --prefix ${CMAKE_BINARY_DIR}/dummy-install - COMMAND ${CMAKE_COMMAND} -S ${CMAKE_CURRENT_SOURCE_DIR}/use-cmake-files - -B ${CMAKE_BINARY_DIR}/use-cmake-files - -D LAMMPS_DIR=${CMAKE_BINARY_DIR}/dummy-install/lib/cmake/LAMMPS - -D USE_MPI=${USE_MPI} - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/use-cmake-files - DEPENDS lammps lmp - BYPRODUCTS ${CMAKE_BINARY_DIR}/use-cmake-files/testlmp - COMMENT "Test generated LAMMPS CMake configuration with compilation" -) - -add_test(NAME FindLAMMPS - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target use_cmake_files) -endif() - # convenience function for adding tests requiring to be run in parallel with MPI if(BUILD_MPI) function(add_mpi_test) diff --git a/unittest/use-cmake-files/CMakeLists.txt b/unittest/use-cmake-files/CMakeLists.txt deleted file mode 100644 index 597894479a5..00000000000 --- a/unittest/use-cmake-files/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -cmake_minimum_required(VERSION 3.20) -project(testlmp CXX) - -find_package(LAMMPS REQUIRED) -add_executable(testlmp main.cpp) -if(USE_MPI) - target_compile_definitions(testlmp PRIVATE USE_MPI) -endif() -target_link_libraries(testlmp PRIVATE LAMMPS::LAMMPS) diff --git a/unittest/use-cmake-files/main.cpp b/unittest/use-cmake-files/main.cpp deleted file mode 100644 index 7671beea624..00000000000 --- a/unittest/use-cmake-files/main.cpp +++ /dev/null @@ -1,24 +0,0 @@ - -#ifdef USE_MPI -#include -#else -#define MPI_COMM_WORLD 0 -#endif -#include -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - MPI_Init(&argc, &argv); - try { - auto *lmp = new LAMMPS_NS::LAMMPS(argc, argv, MPI_COMM_WORLD); - lmp->input->file(); - delete lmp; - } catch (std::exception &) { - } - return 0; -} - From 03662387ed2c4bb0149d2bc7c4ad7a6b2e1ae5cc Mon Sep 17 00:00:00 2001 From: Jeremy Fersula Date: Mon, 17 Nov 2025 16:20:57 +0100 Subject: [PATCH 288/604] Add propel/selfalign fix to BROWNIAN package Signed-off-by: Jeremy Fersula --- src/BROWNIAN/fix_propel_selfalign.cpp | 174 ++++++++++++++++++++++++++ src/BROWNIAN/fix_propel_selfalign.h | 46 +++++++ 2 files changed, 220 insertions(+) create mode 100644 src/BROWNIAN/fix_propel_selfalign.cpp create mode 100644 src/BROWNIAN/fix_propel_selfalign.h diff --git a/src/BROWNIAN/fix_propel_selfalign.cpp b/src/BROWNIAN/fix_propel_selfalign.cpp new file mode 100644 index 00000000000..81047461629 --- /dev/null +++ b/src/BROWNIAN/fix_propel_selfalign.cpp @@ -0,0 +1,174 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/ Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------- + Contributed by: Jeremy Fersula @ Sorbonne University +----------------------------------------------------------------------- */ + +#include "fix_propel_selfalign.h" + +#include "atom.h" +#include "atom_vec_ellipsoid.h" +#include "domain.h" +#include "error.h" +#include "math_extra.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum { DIPOLE, QUAT }; + +/* ---------------------------------------------------------------------- */ + +FixPropelSelfAlign::FixPropelSelfAlign(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), avec(nullptr) +{ + + if (narg != 5 && narg != 9) error->all(FLERR, "Illegal fix propel/selfalign command"); + + if (strcmp(arg[3], "dipole") == 0) { + mode = DIPOLE; + } else if (strcmp(arg[3], "quat") == 0) { + mode = QUAT; + } else { + error->all(FLERR, "Illegal fix propel/selfalign command"); + } + + magnitude = utils::numeric(FLERR, arg[4], false, lmp); + + // check for keyword + + if (narg == 9) { + if (mode != QUAT) { error->all(FLERR, "Illegal fix propel/selfalign command"); } + if (strcmp(arg[5], "qvector") == 0) { + sx = utils::numeric(FLERR, arg[6], false, lmp); + sy = utils::numeric(FLERR, arg[7], false, lmp); + sz = utils::numeric(FLERR, arg[8], false, lmp); + double snorm = sqrt(sx * sx + sy * sy + sz * sz); + sx = sx / snorm; + sy = sy / snorm; + sz = sz / snorm; + } else { + error->all(FLERR, "Illegal fix propel/selfalign command"); + } + } else { + sx = 1.0; + sy = 0.0; + sz = 0.0; + } +} + +/* ---------------------------------------------------------------------- */ + +int FixPropelSelfAlign::setmask() +{ + int mask = 0; + mask |= POST_FORCE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixPropelSelfAlign::init() +{ + if (mode == DIPOLE && !atom->mu_flag && !atom->torque_flag) + error->all(FLERR, "Fix propel/selfalign requires atom attributes mu + torque with option dipole"); + + if (mode == QUAT) { + avec = dynamic_cast(atom->style_match("ellipsoid")); + if (!avec) error->all(FLERR, "Fix propel/selfalign requires atom style ellipsoid with option quat"); + + // check that all particles are finite-size ellipsoids + // no point particles allowed, spherical is OK + + int *ellipsoid = atom->ellipsoid; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + if (ellipsoid[i] < 0) + error->one(FLERR, "Fix propel/selfalign requires extended particles with option quat"); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixPropelSelfAlign::post_force(int vflag) +{ + if (mode == DIPOLE) + post_force_dipole(vflag); + else if (mode == QUAT) + post_force_quaternion(vflag); +} + +/* ---------------------------------------------------------------------- */ + +void FixPropelSelfAlign::post_force_dipole(int vflag) +{ + double **torque = atom->torque; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double **mu = atom->mu; + double selfTorque[3]; + + double a[3], b[3], c[3]; + + // Add the active torque to the atom torques: + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + MathExtra::cross3(mu[i], v[i], selfTorque); + + torque[i][0] += selfTorque[0] * magnitude; + torque[i][1] += selfTorque[1] * magnitude; + torque[i][2] += selfTorque[2] * magnitude; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixPropelSelfAlign::post_force_quaternion(int vflag) +{ + double **torque = atom->torque; + double **v = atom->v; + int *mask = atom->mask; + int nlocal = atom->nlocal; + int *ellipsoid = atom->ellipsoid; + + // ellipsoidal properties + AtomVecEllipsoid::Bonus *bonus = avec->bonus; + double f_act[3] = {sx, sy, sz}; + double f_rot[3]; + double *quat; + double Q[3][3]; + double selfTorque[3]; + + // Add the active torque to the atom torques: + for (int i = 0; i < nlocal; ++i) { + if (mask[i] & groupbit) { + + quat = bonus[ellipsoid[i]].quat; + MathExtra::quat_to_mat(quat, Q); + MathExtra::matvec(Q, f_act, f_rot); + + MathExtra::cross3(f_rot, v[i], selfTorque); + + torque[i][0] += selfTorque[0] * magnitude; + torque[i][1] += selfTorque[1] * magnitude; + torque[i][2] += selfTorque[2] * magnitude; + } + } +} diff --git a/src/BROWNIAN/fix_propel_selfalign.h b/src/BROWNIAN/fix_propel_selfalign.h new file mode 100644 index 00000000000..7d268871f68 --- /dev/null +++ b/src/BROWNIAN/fix_propel_selfalign.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(propel/selfalign,FixPropelSelfAlign); +// clang-format on +#else + +#ifndef LMP_FIX_PROPEL_SELFALIGN_H +#define LMP_FIX_PROPEL_SELFALIGN_H + +#include "fix.h" +namespace LAMMPS_NS { + +class FixPropelSelfAlign : public Fix { + public: + FixPropelSelfAlign(class LAMMPS *, int, char **); + + void init() override; + void post_force(int) override; + int setmask() override; + + private: + double magnitude; + double sx, sy, sz; + int mode; + + void post_force_dipole(int); + void post_force_quaternion(int); + + class AtomVecEllipsoid *avec; +}; +} // namespace LAMMPS_NS +#endif +#endif From 0a032435af774efeb7c47ea931d39c2d950cbc40 Mon Sep 17 00:00:00 2001 From: Thomas Friedrich Date: Sun, 16 Nov 2025 02:29:24 +0100 Subject: [PATCH 289/604] Fix formatting in pair_srp File was missing the Description underline --- doc/src/pair_srp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/pair_srp.rst b/doc/src/pair_srp.rst index b1c3c140d35..b18336dd04a 100644 --- a/doc/src/pair_srp.rst +++ b/doc/src/pair_srp.rst @@ -57,7 +57,7 @@ Examples Description - +""""""""""" Style *srp* computes a soft segmental repulsive potential (SRP) that acts between pairs of bonds. This potential is useful for preventing From 2a157acfc27f5825f98361e024ca69fce8b03c14 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Nov 2025 15:10:38 -0500 Subject: [PATCH 290/604] add some documentation for how to import the installed CMake config --- doc/src/Build_cmake.rst | 72 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/doc/src/Build_cmake.rst b/doc/src/Build_cmake.rst index 05fe976e80b..6f283223399 100644 --- a/doc/src/Build_cmake.rst +++ b/doc/src/Build_cmake.rst @@ -126,6 +126,78 @@ defaults to ``${HOME}/.local``. root) the `ldconfig` program to update the cache file for fast lookup of system shared libraries. +.. admonition:: Using the installed library + :class: Hint + + The CMake installation functionality is an experimental work in + progress and thus not without problems, especially when writing your + own program that is trying to use the LAMMPS C++ classes directly. + + While there is a well-defined :ref:`C-language interface + ` with the ``library.h`` header file, there is no + equivalent for the C++ interface yet. When installing LAMMPS, + only the core header files are copied into the installation folder + and thus only high-level access to C++ features is available. + + The following is a minimal CMake example file for using the installed + LAMMPS package which represents the current state of development: + + .. code-block:: cmake + + cmake_minimum_required(VERSION 3.20) + project(simpleCC CXX) + # set this to the LAMMPS installation location + if(NOT CMAKE_PREFIX_PATH) + set(CMAKE_PREFIX_PATH $ENV{HOME}/.local) + endif() + find_package(LAMMPS REQUIRED) + add_executable(simpleCC simple.cpp) + target_link_libraries(simpleCC PRIVATE LAMMPS::LAMMPS) + + The ``CMAKE_PREFIX_PATH`` setting tells CMake where to find the + generated CMake configuration files for the `find_package() + `_ + CMake command. You can also specify a required minimal version or + version range. For that a numeric representation in the "YYYY.MM.DD" + format has to be used: the 10 September 2025 release thus becomes + version 2025.09.10. The include statements in the ``simple.cpp`` + source file have to be prefixed with ``lammps/`` as follows: + + .. code-block:: C++ + + #include + #include + #include + #include + + Using the ``LAMMPS::LAMMPS`` target imported from the installed + LAMMPS CMake configuration files should set up the include and linker + flags and folders automatically. Below is the output for an example + session (note how it checks for and includes MPI and OpenMP support + since that specific LAMMPS library was set up this way): + + .. code-block:: console + + $ cmake -S . -B build -D CMAKE_PREFIX_PATH=$HOME/Downloads/test-install + -- The CXX compiler identification is GNU 15.2.1 + -- Detecting CXX compiler ABI info + -- Detecting CXX compiler ABI info - done + -- Check for working CXX compiler: /usr/lib64/ccache/c++ - skipped + -- Detecting CXX compile features + -- Detecting CXX compile features - done + -- Found MPI_CXX: /usr/lib64/mpich/lib/libmpicxx.so (found version "4.1") + -- Found MPI: TRUE (found version "4.1") found components: CXX + -- Found OpenMP_CXX: -fopenmp (found version "4.5") + -- Found OpenMP: TRUE (found version "4.5") found components: CXX + -- Found LAMMPS: /home/akohlmey/Downloads/test-install/lib64/liblammps.so.0 + -- Configuring done (0.9s) + -- Generating done (0.0s) + -- Build files have been written to: /home/akohlmey/Downloads/test-simple/build + $ cmake --build build + [ 50%] Building CXX object CMakeFiles/simpleCC.dir/simple.cpp.o + [100%] Linking CXX executable simpleCC + [100%] Built target simpleCC + .. _cmake_options: Configuration and build options From 468f3cc89a0de1dae99cbd946dfbf3709b5d18fb Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Nov 2025 15:25:21 -0500 Subject: [PATCH 291/604] modernize manual for using CMake --- doc/src/Build_cmake.rst | 70 +++++++++++++++++++---------------- doc/src/Build_development.rst | 2 +- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/doc/src/Build_cmake.rst b/doc/src/Build_cmake.rst index 05fe976e80b..03b99e33438 100644 --- a/doc/src/Build_cmake.rst +++ b/doc/src/Build_cmake.rst @@ -16,11 +16,11 @@ environments is on a :doc:`separate page `. .. note:: - LAMMPS currently requires that CMake version 3.20 or later is available. + LAMMPS currently requires CMake version 3.20 or later. .. warning:: - You must not mix the :doc:`traditional make based ` + You **must not** mix the :doc:`traditional make based ` LAMMPS build procedure with using CMake. No packages may be installed or a build been previously attempted in the LAMMPS source directory by using ``make ``. CMake will detect if this is @@ -73,46 +73,47 @@ with no add-on packages enabled and no customization: .. code-block:: bash - cd lammps # change to the LAMMPS distribution directory - mkdir build; cd build # create and use a build directory - cmake ../cmake # configuration reading CMake scripts from ../cmake - cmake --build . # compilation (or type "make") - -This will create and change into a folder called ``build``, then run the -configuration step to generate build files for the default build command -and then launch that build command to compile LAMMPS. During the -configuration step CMake will try to detect whether support for MPI, -OpenMP, FFTW, gzip, JPEG, PNG, and ffmpeg are available and enable the -corresponding configuration settings. The progress of this -configuration can be followed on the screen and a summary of selected -options and settings will be printed at the end. The ``cmake --build -.`` command will launch the compilation, which, if successful, will -ultimately produce a library ``liblammps.a`` and the LAMMPS executable -``lmp`` inside the ``build`` folder. + cd lammps # change to the LAMMPS source distribution directory + cmake -S cmake -B build # configure the "build" folder with CMake scripts from "cmake" + cmake --build build # compilation (or type "make -C build") + +This will create a folder called ``build``, then run the configuration +step to generate build files for the default build command and then +launch that build command to compile LAMMPS. During the configuration +step CMake will try to detect whether support for MPI, OpenMP, FFTW, +gzip, JPEG, PNG, and ffmpeg are available and enable the corresponding +configuration settings. The progress of this configuration can be +followed on the screen and a summary of selected options and settings +will be printed at the end. The ``cmake --build build`` command will +launch the compilation, which, if successful, will ultimately produce a +library ``liblammps.a`` and the LAMMPS executable ``lmp`` inside the +``build`` folder. Compilation can take a long time, since LAMMPS is a large project with many features. If your machine has multiple CPU cores (most do these days), you can speed this up by compiling sources in parallel with -``make -j N`` (with N being the maximum number of concurrently executed -tasks). Installation of the `ccache `_ (= Compiler -Cache) software may speed up repeated compilation even more, e.g. during -code development, especially when repeatedly switching between branches. +adding ``--parallel N`` to the ``cmake`` command line (with *N* being +the maximum number of concurrently executed tasks). Installation of the +`ccache `_ (= Compiler Cache) software may speed up +repeated compilation even more, e.g. during code development, especially +when repeatedly switching between branches. After the initial build, whenever you edit LAMMPS source files, enable or disable packages, change compiler flags or build options, you must -re-compile and relink the LAMMPS executable with ``cmake --build .`` (or -``make``). If the compilation fails for some reason, try running -``cmake .`` and then compile again. The included dependency tracking -should make certain that only the necessary subset of files is +re-compile and relink the LAMMPS executable with ``cmake --build build`` +(or ``make -C build``). If the compilation fails for some reason, try +running ``cmake build`` and then compile again. The included dependency +tracking should make certain that only the necessary subset of files is re-compiled. You can also delete compiled objects, libraries, and -executables with ``cmake --build . --target clean`` (or ``make clean``). +executables with ``cmake --build build --target clean`` (or ``make -C +build clean``). After compilation, you may optionally install the LAMMPS executable into your system with: .. code-block:: bash - make install # optional, copy compiled files into installation location + cmake --install build # optional, copy compiled files into installation location This will install the LAMMPS executable and library, some tools (if configured) and additional files like LAMMPS API headers, manpages, @@ -122,9 +123,9 @@ defaults to ``${HOME}/.local``. .. note:: If you have set `-D CMAKE_INSTALL_PREFIX` to install LAMMPS into a - system location on a Linux machine , you may also have to run (as - root) the `ldconfig` program to update the cache file for fast lookup - of system shared libraries. + system location on a Linux machine, you also have to run (as root) + the `ldconfig` program to update the cache file for fast lookup of + system shared libraries. .. _cmake_options: @@ -133,9 +134,14 @@ Configuration and build options The CMake commands have one mandatory argument: a folder containing a file called ``CMakeLists.txt`` (for LAMMPS it is located in the -``cmake`` folder) or a build folder containing a file called +``cmake`` folder, in that case the current working directory becomes +the build folder) or a build folder containing a file called ``CMakeCache.txt``, which is generated at the end of the CMake configuration step. The cache file contains all current CMake settings. +This is a "legacy mode" or running CMake and thus often found +when searching the web. We recommend to use the ``-S`` and ``-B`` +folders to explicitly set the path to the folder containing the +``CMakeLists.txt`` file and the build folder, respectively. To modify settings, enable or disable features, you need to set *variables* with either the ``-D`` command-line flag (``-D diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 70701f3e83d..1447cd4d7d8 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -397,7 +397,7 @@ will destroy the original file, if the generation run does not complete, so using ``-g`` is recommended unless the YAML file is fully tested and working. To have the new test file recognized by ``ctest``, you need to re-run cmake. You can verify that the new test is available -by checking the output of ``ctest -N`. +by checking the output of ``ctest -N``. Some of the force style tests are rather slow to run and some are very sensitive to small differences like CPU architecture, compiler From 3203f7dbbfd4bdf9a7e68875b4e221c51246a364 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 17 Nov 2025 14:48:40 -0700 Subject: [PATCH 292/604] Update Kokkos library in LAMMPS to v5.0.0 --- lib/kokkos/CHANGELOG.md | 90 + lib/kokkos/CMakeLists.txt | 21 +- lib/kokkos/COPYRIGHT.md | 206 ++ lib/kokkos/Copyright.txt | 8 - lib/kokkos/LICENSE_FILE_HEADER | 15 - lib/kokkos/Makefile.kokkos | 1764 --------------- lib/kokkos/Makefile.targets | 129 -- lib/kokkos/README.md | 15 +- lib/kokkos/Spack.md | 268 --- .../algorithms/perf_test/CMakeLists.txt | 7 +- .../perf_test/test_inclusive_scan.cpp | 18 +- .../algorithms/perf_test/test_random.cpp | 62 + lib/kokkos/algorithms/src/CMakeLists.txt | 1 + .../algorithms/src/KokkosAlgorithms_dummy.cpp | 15 +- .../algorithms/src/Kokkos_NestedSort.hpp | 15 +- lib/kokkos/algorithms/src/Kokkos_Random.cppm | 15 +- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 96 +- lib/kokkos/algorithms/src/Kokkos_Sort.cppm | 15 +- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 15 +- .../algorithms/src/Kokkos_StdAlgorithms.cppm | 15 +- .../algorithms/src/Kokkos_StdAlgorithms.hpp | 15 +- .../src/sorting/Kokkos_BinOpsPublicAPI.hpp | 15 +- .../src/sorting/Kokkos_BinSortPublicAPI.hpp | 20 +- .../sorting/Kokkos_NestedSortPublicAPI.hpp | 20 +- .../src/sorting/Kokkos_SortByKeyPublicAPI.hpp | 20 +- .../src/sorting/Kokkos_SortPublicAPI.hpp | 20 +- .../impl/Kokkos_CopyOpsForBinSortImpl.hpp | 15 +- .../sorting/impl/Kokkos_NestedSortImpl.hpp | 22 +- .../src/sorting/impl/Kokkos_SortByKeyImpl.hpp | 22 +- .../src/sorting/impl/Kokkos_SortImpl.hpp | 23 +- .../Kokkos_AdjacentDifference.hpp | 15 +- .../std_algorithms/Kokkos_AdjacentFind.hpp | 15 +- .../src/std_algorithms/Kokkos_AllOf.hpp | 15 +- .../src/std_algorithms/Kokkos_AnyOf.hpp | 15 +- .../src/std_algorithms/Kokkos_BeginEnd.hpp | 15 +- .../src/std_algorithms/Kokkos_Copy.hpp | 15 +- .../std_algorithms/Kokkos_CopyBackward.hpp | 15 +- .../src/std_algorithms/Kokkos_CopyIf.hpp | 15 +- .../src/std_algorithms/Kokkos_CopyN.hpp | 15 +- .../src/std_algorithms/Kokkos_Count.hpp | 15 +- .../src/std_algorithms/Kokkos_CountIf.hpp | 15 +- .../src/std_algorithms/Kokkos_Distance.hpp | 15 +- .../src/std_algorithms/Kokkos_Equal.hpp | 15 +- .../std_algorithms/Kokkos_ExclusiveScan.hpp | 15 +- .../src/std_algorithms/Kokkos_Fill.hpp | 15 +- .../src/std_algorithms/Kokkos_FillN.hpp | 15 +- .../src/std_algorithms/Kokkos_Find.hpp | 15 +- .../src/std_algorithms/Kokkos_FindEnd.hpp | 15 +- .../src/std_algorithms/Kokkos_FindFirstOf.hpp | 15 +- .../src/std_algorithms/Kokkos_FindIf.hpp | 15 +- .../src/std_algorithms/Kokkos_FindIfNot.hpp | 15 +- .../src/std_algorithms/Kokkos_ForEach.hpp | 15 +- .../src/std_algorithms/Kokkos_ForEachN.hpp | 15 +- .../src/std_algorithms/Kokkos_Generate.hpp | 15 +- .../src/std_algorithms/Kokkos_GenerateN.hpp | 15 +- .../std_algorithms/Kokkos_InclusiveScan.hpp | 15 +- .../std_algorithms/Kokkos_IsPartitioned.hpp | 15 +- .../src/std_algorithms/Kokkos_IsSorted.hpp | 15 +- .../std_algorithms/Kokkos_IsSortedUntil.hpp | 15 +- .../src/std_algorithms/Kokkos_IterSwap.hpp | 20 +- .../Kokkos_LexicographicalCompare.hpp | 15 +- .../src/std_algorithms/Kokkos_MaxElement.hpp | 15 +- .../src/std_algorithms/Kokkos_MinElement.hpp | 15 +- .../std_algorithms/Kokkos_MinMaxElement.hpp | 15 +- .../src/std_algorithms/Kokkos_Mismatch.hpp | 15 +- .../src/std_algorithms/Kokkos_Move.hpp | 15 +- .../std_algorithms/Kokkos_MoveBackward.hpp | 15 +- .../src/std_algorithms/Kokkos_NoneOf.hpp | 15 +- .../std_algorithms/Kokkos_PartitionCopy.hpp | 15 +- .../std_algorithms/Kokkos_PartitionPoint.hpp | 15 +- .../src/std_algorithms/Kokkos_Reduce.hpp | 15 +- .../src/std_algorithms/Kokkos_Remove.hpp | 15 +- .../src/std_algorithms/Kokkos_RemoveCopy.hpp | 15 +- .../std_algorithms/Kokkos_RemoveCopyIf.hpp | 15 +- .../src/std_algorithms/Kokkos_RemoveIf.hpp | 15 +- .../src/std_algorithms/Kokkos_Replace.hpp | 15 +- .../src/std_algorithms/Kokkos_ReplaceCopy.hpp | 15 +- .../std_algorithms/Kokkos_ReplaceCopyIf.hpp | 15 +- .../src/std_algorithms/Kokkos_ReplaceIf.hpp | 15 +- .../src/std_algorithms/Kokkos_Reverse.hpp | 15 +- .../src/std_algorithms/Kokkos_ReverseCopy.hpp | 15 +- .../src/std_algorithms/Kokkos_Rotate.hpp | 15 +- .../src/std_algorithms/Kokkos_RotateCopy.hpp | 15 +- .../src/std_algorithms/Kokkos_Search.hpp | 15 +- .../src/std_algorithms/Kokkos_SearchN.hpp | 15 +- .../src/std_algorithms/Kokkos_ShiftLeft.hpp | 15 +- .../src/std_algorithms/Kokkos_ShiftRight.hpp | 15 +- .../src/std_algorithms/Kokkos_SwapRanges.hpp | 15 +- .../src/std_algorithms/Kokkos_Transform.hpp | 15 +- .../Kokkos_TransformExclusiveScan.hpp | 15 +- .../Kokkos_TransformInclusiveScan.hpp | 15 +- .../std_algorithms/Kokkos_TransformReduce.hpp | 15 +- .../src/std_algorithms/Kokkos_Unique.hpp | 15 +- .../src/std_algorithms/Kokkos_UniqueCopy.hpp | 15 +- .../impl/Kokkos_AdjacentDifference.hpp | 20 +- .../impl/Kokkos_AdjacentFind.hpp | 20 +- .../impl/Kokkos_AllOfAnyOfNoneOf.hpp | 15 +- .../impl/Kokkos_Constraints.hpp | 24 +- .../impl/Kokkos_CopyBackward.hpp | 20 +- .../std_algorithms/impl/Kokkos_CopyCopyN.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_CopyIf.hpp | 20 +- .../impl/Kokkos_CountCountIf.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_Equal.hpp | 20 +- .../impl/Kokkos_ExclusiveScan.hpp | 20 +- .../std_algorithms/impl/Kokkos_FillFillN.hpp | 20 +- .../std_algorithms/impl/Kokkos_FindEnd.hpp | 20 +- .../impl/Kokkos_FindFirstOf.hpp | 20 +- .../impl/Kokkos_FindIfOrNot.hpp | 20 +- .../impl/Kokkos_ForEachForEachN.hpp | 20 +- .../impl/Kokkos_FunctorsForExclusiveScan.hpp | 20 +- .../impl/Kokkos_GenerateGenerateN.hpp | 20 +- .../impl/Kokkos_HelperPredicates.hpp | 15 +- .../Kokkos_IdentityReferenceUnaryFunctor.hpp | 15 +- .../impl/Kokkos_InclusiveScan.hpp | 20 +- .../impl/Kokkos_IsPartitioned.hpp | 20 +- .../std_algorithms/impl/Kokkos_IsSorted.hpp | 20 +- .../impl/Kokkos_IsSortedUntil.hpp | 20 +- .../impl/Kokkos_LexicographicalCompare.hpp | 20 +- .../impl/Kokkos_MinMaxMinmaxElement.hpp | 20 +- .../std_algorithms/impl/Kokkos_Mismatch.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_Move.hpp | 20 +- .../impl/Kokkos_MoveBackward.hpp | 20 +- .../impl/Kokkos_MustUseKokkosSingleInTeam.hpp | 20 +- .../impl/Kokkos_PartitionCopy.hpp | 20 +- .../impl/Kokkos_PartitionPoint.hpp | 20 +- .../impl/Kokkos_RandomAccessIterator.hpp | 77 +- .../src/std_algorithms/impl/Kokkos_Reduce.hpp | 24 +- ...cerWithArbitraryJoinerNoNeutralElement.hpp | 20 +- .../impl/Kokkos_RemoveAllVariants.hpp | 20 +- .../std_algorithms/impl/Kokkos_Replace.hpp | 20 +- .../impl/Kokkos_ReplaceCopy.hpp | 20 +- .../impl/Kokkos_ReplaceCopyIf.hpp | 20 +- .../std_algorithms/impl/Kokkos_ReplaceIf.hpp | 20 +- .../std_algorithms/impl/Kokkos_Reverse.hpp | 20 +- .../impl/Kokkos_ReverseCopy.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_Rotate.hpp | 20 +- .../std_algorithms/impl/Kokkos_RotateCopy.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_Search.hpp | 20 +- .../std_algorithms/impl/Kokkos_SearchN.hpp | 21 +- .../std_algorithms/impl/Kokkos_ShiftLeft.hpp | 20 +- .../std_algorithms/impl/Kokkos_ShiftRight.hpp | 20 +- .../std_algorithms/impl/Kokkos_SwapRanges.hpp | 20 +- .../std_algorithms/impl/Kokkos_Transform.hpp | 20 +- .../impl/Kokkos_TransformExclusiveScan.hpp | 20 +- .../impl/Kokkos_TransformInclusiveScan.hpp | 20 +- .../impl/Kokkos_TransformReduce.hpp | 20 +- .../src/std_algorithms/impl/Kokkos_Unique.hpp | 20 +- .../std_algorithms/impl/Kokkos_UniqueCopy.hpp | 20 +- ...Kokkos_ValueWrapperForNoNeutralElement.hpp | 15 +- lib/kokkos/algorithms/unit_tests/Makefile | 127 -- .../algorithms/unit_tests/TestBinSortA.hpp | 20 +- .../algorithms/unit_tests/TestBinSortB.hpp | 15 +- .../algorithms/unit_tests/TestNestedSort.hpp | 16 +- .../algorithms/unit_tests/TestRandom.hpp | 22 +- .../unit_tests/TestRandomAccessIterator.cpp | 15 +- lib/kokkos/algorithms/unit_tests/TestSort.hpp | 23 +- .../algorithms/unit_tests/TestSortByKey.hpp | 18 +- .../unit_tests/TestSortCustomComp.hpp | 18 +- .../TestStdAlgorithmsAdjacentDifference.cpp | 15 +- .../TestStdAlgorithmsAdjacentFind.cpp | 66 +- .../TestStdAlgorithmsAllAnyNoneOf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsCommon.cpp | 15 +- .../unit_tests/TestStdAlgorithmsCommon.hpp | 260 +-- .../TestStdAlgorithmsCompileOnly.cpp | 20 +- .../TestStdAlgorithmsConstraints.cpp | 20 +- .../unit_tests/TestStdAlgorithmsCopyIf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsCount.cpp | 15 +- .../unit_tests/TestStdAlgorithmsEqual.cpp | 15 +- .../TestStdAlgorithmsExclusiveScan.cpp | 34 +- .../unit_tests/TestStdAlgorithmsFind.cpp | 15 +- .../unit_tests/TestStdAlgorithmsFindEnd.cpp | 72 +- .../TestStdAlgorithmsFindFirstOf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsForEach.cpp | 15 +- .../TestStdAlgorithmsHelperFunctors.hpp | 20 +- .../TestStdAlgorithmsInclusiveScan.cpp | 45 +- .../unit_tests/TestStdAlgorithmsIsSorted.cpp | 15 +- .../TestStdAlgorithmsIsSortedUntil.cpp | 15 +- ...estStdAlgorithmsLexicographicalCompare.cpp | 15 +- .../TestStdAlgorithmsMinMaxElementOps.cpp | 18 +- .../unit_tests/TestStdAlgorithmsMismatch.cpp | 15 +- .../unit_tests/TestStdAlgorithmsModOps.cpp | 15 +- .../unit_tests/TestStdAlgorithmsModSeqOps.cpp | 15 +- .../TestStdAlgorithmsMoveBackward.cpp | 15 +- .../unit_tests/TestStdAlgorithmsNumerics.cpp | 15 +- .../TestStdAlgorithmsPartitionCopy.cpp | 15 +- .../TestStdAlgorithmsPartitioningOps.cpp | 15 +- .../unit_tests/TestStdAlgorithmsRemove.cpp | 15 +- .../TestStdAlgorithmsRemoveCopy.cpp | 15 +- .../TestStdAlgorithmsRemoveCopyIf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsRemoveIf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsReplace.cpp | 15 +- .../TestStdAlgorithmsReplaceCopy.cpp | 15 +- .../TestStdAlgorithmsReplaceCopyIf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsReplaceIf.cpp | 30 +- .../unit_tests/TestStdAlgorithmsReverse.cpp | 15 +- .../unit_tests/TestStdAlgorithmsRotate.cpp | 15 +- .../TestStdAlgorithmsRotateCopy.cpp | 15 +- .../unit_tests/TestStdAlgorithmsSearch.cpp | 51 +- .../unit_tests/TestStdAlgorithmsSearch_n.cpp | 63 +- .../unit_tests/TestStdAlgorithmsShiftLeft.cpp | 40 +- .../TestStdAlgorithmsShiftRight.cpp | 38 +- ...estStdAlgorithmsTeamAdjacentDifference.cpp | 15 +- .../TestStdAlgorithmsTeamAdjacentFind.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamAllOf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamAnyOf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamCopy.cpp | 15 +- .../TestStdAlgorithmsTeamCopyBackward.cpp | 15 +- .../TestStdAlgorithmsTeamCopyIf.cpp | 15 +- .../TestStdAlgorithmsTeamCopy_n.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamCount.cpp | 15 +- .../TestStdAlgorithmsTeamCountIf.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamEqual.cpp | 15 +- .../TestStdAlgorithmsTeamExclusiveScan.cpp | 32 +- .../unit_tests/TestStdAlgorithmsTeamFill.cpp | 15 +- .../TestStdAlgorithmsTeamFill_n.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamFind.cpp | 15 +- .../TestStdAlgorithmsTeamFindEnd.cpp | 15 +- .../TestStdAlgorithmsTeamFindFirstOf.cpp | 15 +- .../TestStdAlgorithmsTeamFindIf.cpp | 15 +- .../TestStdAlgorithmsTeamFindIfNot.cpp | 15 +- .../TestStdAlgorithmsTeamForEach.cpp | 15 +- .../TestStdAlgorithmsTeamForEachN.cpp | 15 +- .../TestStdAlgorithmsTeamGenerate.cpp | 15 +- .../TestStdAlgorithmsTeamGenerate_n.cpp | 15 +- .../TestStdAlgorithmsTeamInclusiveScan.cpp | 32 +- .../TestStdAlgorithmsTeamIsPartitioned.cpp | 15 +- .../TestStdAlgorithmsTeamIsSorted.cpp | 15 +- .../TestStdAlgorithmsTeamIsSortedUntil.cpp | 16 +- ...tdAlgorithmsTeamLexicographicalCompare.cpp | 15 +- .../TestStdAlgorithmsTeamMaxElement.cpp | 15 +- .../TestStdAlgorithmsTeamMinElement.cpp | 15 +- .../TestStdAlgorithmsTeamMinMaxElement.cpp | 15 +- .../TestStdAlgorithmsTeamMismatch.cpp | 15 +- .../unit_tests/TestStdAlgorithmsTeamMove.cpp | 15 +- .../TestStdAlgorithmsTeamMoveBackward.cpp | 15 +- .../TestStdAlgorithmsTeamNoneOf.cpp | 15 +- .../TestStdAlgorithmsTeamPartitionCopy.cpp | 15 +- .../TestStdAlgorithmsTeamPartitionPoint.cpp | 15 +- .../TestStdAlgorithmsTeamReduce.cpp | 34 +- .../TestStdAlgorithmsTeamRemove.cpp | 15 +- .../TestStdAlgorithmsTeamRemoveCopy.cpp | 15 +- .../TestStdAlgorithmsTeamRemoveCopyIf.cpp | 15 +- .../TestStdAlgorithmsTeamRemoveIf.cpp | 15 +- .../TestStdAlgorithmsTeamReplace.cpp | 15 +- .../TestStdAlgorithmsTeamReplaceCopy.cpp | 15 +- .../TestStdAlgorithmsTeamReplaceCopyIf.cpp | 15 +- .../TestStdAlgorithmsTeamReplaceIf.cpp | 15 +- .../TestStdAlgorithmsTeamReverse.cpp | 15 +- .../TestStdAlgorithmsTeamReverseCopy.cpp | 15 +- .../TestStdAlgorithmsTeamRotate.cpp | 15 +- .../TestStdAlgorithmsTeamRotateCopy.cpp | 15 +- .../TestStdAlgorithmsTeamSearch.cpp | 15 +- .../TestStdAlgorithmsTeamSearchN.cpp | 15 +- .../TestStdAlgorithmsTeamShiftLeft.cpp | 39 +- .../TestStdAlgorithmsTeamShiftRight.cpp | 36 +- .../TestStdAlgorithmsTeamSwapRanges.cpp | 15 +- ...TestStdAlgorithmsTeamTransformBinaryOp.cpp | 15 +- ...tdAlgorithmsTeamTransformExclusiveScan.cpp | 27 +- ...tdAlgorithmsTeamTransformInclusiveScan.cpp | 32 +- .../TestStdAlgorithmsTeamTransformReduce.cpp | 37 +- .../TestStdAlgorithmsTeamTransformUnaryOp.cpp | 15 +- .../TestStdAlgorithmsTeamUnique.cpp | 15 +- .../TestStdAlgorithmsTeamUniqueCopy.cpp | 15 +- ...estStdAlgorithmsTransformExclusiveScan.cpp | 37 +- ...estStdAlgorithmsTransformInclusiveScan.cpp | 48 +- .../TestStdAlgorithmsTransformUnaryOp.cpp | 15 +- .../unit_tests/TestStdAlgorithmsUnique.cpp | 15 +- .../TestStdAlgorithmsUniqueCopy.cpp | 48 +- .../algorithms/unit_tests/TestStdReducers.cpp | 20 +- .../algorithms/unit_tests/UnitTestMain.cpp | 20 +- lib/kokkos/benchmarks/atomic/Makefile | 52 - lib/kokkos/benchmarks/atomic/main.cpp | 20 +- .../benchmarks/bytes_and_flops/Makefile | 52 - .../benchmarks/bytes_and_flops/bench.hpp | 20 +- .../bytes_and_flops/bench_double.cpp | 15 +- .../bytes_and_flops/bench_float.cpp | 15 +- .../bytes_and_flops/bench_int32_t.cpp | 15 +- .../bytes_and_flops/bench_int64_t.cpp | 15 +- .../bytes_and_flops/bench_stride.hpp | 15 +- .../bytes_and_flops/bench_unroll_stride.hpp | 15 +- .../benchmarks/bytes_and_flops/main.cpp | 20 +- lib/kokkos/benchmarks/gather/Makefile | 52 - lib/kokkos/benchmarks/gather/gather.hpp | 15 +- .../benchmarks/gather/gather_unroll.hpp | 20 +- lib/kokkos/benchmarks/gather/main.cpp | 20 +- lib/kokkos/benchmarks/gups/gups.cpp | 15 +- .../launch_latency/launch_latency.cpp | 23 +- .../benchmarks/policy_performance/Makefile | 52 - .../benchmarks/policy_performance/main.cpp | 22 +- .../policy_performance/policy_perf_test.hpp | 22 +- lib/kokkos/benchmarks/stream/Makefile | 52 - .../benchmarks/stream/stream-kokkos.cpp | 17 +- .../benchmarks/view_copy_constructor/Makefile | 47 - .../view_copy_constructor.cpp | 21 +- lib/kokkos/bin/runtest | 165 -- lib/kokkos/cmake/KokkosConfig.cmake.in | 4 +- lib/kokkos/cmake/KokkosConfigCommon.cmake.in | 16 +- .../cmake/KokkosCore_Config_HeaderSet.in | 17 +- lib/kokkos/cmake/KokkosCore_config.h.in | 13 +- lib/kokkos/cmake/Kokkos_Version_Info.cpp.in | 15 +- lib/kokkos/cmake/Kokkos_Version_Info.hpp | 15 +- lib/kokkos/cmake/Modules/CudaToolkit.cmake | 880 -------- lib/kokkos/cmake/Modules/FindTPLCUDA.cmake | 34 +- lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake | 1 - .../cmake/Modules/FindTPLLIBQUADMATH.cmake | 2 + lib/kokkos/cmake/Modules/FindTPLROCM.cmake | 25 +- .../cmake/Modules/FindTPLROCTHRUST.cmake | 10 - lib/kokkos/cmake/compile_tests/amd_apu.cc | 15 +- lib/kokkos/cmake/compile_tests/clang_omp.cpp | 15 +- .../cmake/compile_tests/cplusplus17.cpp | 25 - .../cmake/compile_tests/cplusplus20.cpp | 21 + .../compile_tests/cuda_compute_capability.cc | 19 +- .../cmake/compile_tests/get_sve_hw_vl.cpp | 15 +- lib/kokkos/cmake/deps/CUDA.cmake | 16 +- lib/kokkos/cmake/deps/HWLOC.cmake | 16 +- lib/kokkos/cmake/deps/Pthread.cmake | 16 +- lib/kokkos/cmake/deps/quadmath.cmake | 16 +- lib/kokkos/cmake/fake_tribits.cmake | 4 +- lib/kokkos/cmake/kokkos_arch.cmake | 50 +- lib/kokkos/cmake/kokkos_compiler_id.cmake | 101 +- lib/kokkos/cmake/kokkos_corner_cases.cmake | 8 - lib/kokkos/cmake/kokkos_enable_devices.cmake | 3 - lib/kokkos/cmake/kokkos_enable_options.cmake | 32 +- lib/kokkos/cmake/kokkos_pick_cxx_std.cmake | 5 +- lib/kokkos/cmake/kokkos_test_cxx_std.cmake | 37 +- lib/kokkos/cmake/kokkos_tpls.cmake | 16 +- lib/kokkos/cmake/kokkos_tribits.cmake | 89 +- lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake | 16 +- lib/kokkos/cmake/tpls/FindTPLPthread.cmake | 16 +- lib/kokkos/cmake/tpls/FindTPLquadmath.cmake | 16 +- .../containers/performance_tests/Makefile | 103 - .../containers/performance_tests/TestCuda.cpp | 26 +- .../performance_tests/TestDynRankView.hpp | 21 +- .../performance_tests/TestGlobal2LocalIds.hpp | 21 +- .../containers/performance_tests/TestHIP.cpp | 26 +- .../containers/performance_tests/TestHPX.cpp | 22 +- .../containers/performance_tests/TestMain.cpp | 20 +- .../performance_tests/TestOpenMP.cpp | 22 +- .../performance_tests/TestScatterView.hpp | 20 +- .../performance_tests/TestThreads.cpp | 22 +- .../TestUnorderedMapPerformance.hpp | 15 +- lib/kokkos/containers/src/CMakeLists.txt | 25 +- lib/kokkos/containers/src/Kokkos_Bitset.cppm | 17 + lib/kokkos/containers/src/Kokkos_Bitset.hpp | 22 +- .../containers/src/Kokkos_DualView.cppm | 24 + lib/kokkos/containers/src/Kokkos_DualView.hpp | 40 +- .../containers/src/Kokkos_DynRankView.cppm | 34 + .../containers/src/Kokkos_DynRankView.hpp | 190 +- .../src/Kokkos_DynRankView_Impl.cppm | 15 + .../containers/src/Kokkos_DynamicView.cppm | 25 + .../containers/src/Kokkos_DynamicView.hpp | 76 +- .../containers/src/Kokkos_ErrorReporter.cppm | 16 + .../containers/src/Kokkos_ErrorReporter.hpp | 224 +- .../containers/src/Kokkos_Functional.cppm | 22 + .../containers/src/Kokkos_Functional.hpp | 15 +- .../containers/src/Kokkos_OffsetView.cppm | 33 + .../containers/src/Kokkos_OffsetView.hpp | 100 +- .../containers/src/Kokkos_ScatterView.cppm | 39 + .../containers/src/Kokkos_ScatterView.hpp | 23 +- .../src/Kokkos_ScatterView_Impl.cppm | 15 + .../containers/src/Kokkos_StaticCrsGraph.hpp | 31 +- .../containers/src/Kokkos_UnorderedMap.cppm | 21 + .../containers/src/Kokkos_UnorderedMap.hpp | 154 +- .../src/Kokkos_UnorderedMap_Impl.cppm | 14 + lib/kokkos/containers/src/Kokkos_Vector.hpp | 15 +- .../src/impl/Kokkos_Bitset_impl.hpp | 15 +- .../src/impl/Kokkos_Functional_impl.hpp | 15 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 40 +- .../src/impl/Kokkos_UnorderedMap_impl.cpp | 21 +- .../src/impl/Kokkos_UnorderedMap_impl.hpp | 23 +- .../containers/unit_tests/CMakeLists.txt | 1 + lib/kokkos/containers/unit_tests/Makefile | 174 -- .../containers/unit_tests/TestBitset.hpp | 23 +- .../containers/unit_tests/TestCompileMain.cpp | 15 +- .../unit_tests/TestCreateMirror.cpp | 39 +- .../containers/unit_tests/TestDualView.hpp | 42 +- .../unit_tests/TestDualViewParameterPack.cpp | 23 +- .../unit_tests/TestDynRankViewTypedefs.cpp | 56 +- .../unit_tests/TestDynRankView_Ctors.hpp | 15 +- .../TestDynRankView_LayoutMember.hpp | 38 + .../TestDynRankView_TeamScratch.hpp | 22 +- .../TestDynRankView_ViewCustomization.hpp | 96 +- .../containers/unit_tests/TestDynViewAPI.hpp | 33 +- .../unit_tests/TestDynViewAPI_generic.hpp | 15 +- .../unit_tests/TestDynViewAPI_rank12345.hpp | 15 +- .../unit_tests/TestDynViewAPI_rank67.hpp | 15 +- .../containers/unit_tests/TestDynamicView.hpp | 25 +- .../unit_tests/TestErrorReporter.hpp | 98 +- .../containers/unit_tests/TestIsViewTrait.cpp | 25 +- .../containers/unit_tests/TestOffsetView.hpp | 112 +- .../containers/unit_tests/TestScatterView.hpp | 23 +- .../unit_tests/TestStaticCrsGraph.hpp | 28 +- .../unit_tests/TestUnorderedMap.hpp | 71 +- .../containers/unit_tests/TestVector.hpp | 15 +- .../TestViewCtorPropEmbeddedDim.hpp | 29 +- .../unit_tests/TestWithoutInitializing.hpp | 25 +- .../containers/unit_tests/UnitTestMain.cpp | 20 +- lib/kokkos/core/perf_test/BenchmarkMain.cpp | 20 +- .../core/perf_test/Benchmark_Context.cpp | 17 +- .../core/perf_test/Benchmark_Context.hpp | 20 +- lib/kokkos/core/perf_test/CMakeLists.txt | 15 +- lib/kokkos/core/perf_test/Makefile | 84 - .../core/perf_test/PerfTestBlasKernels.hpp | 15 +- lib/kokkos/core/perf_test/PerfTestDriver.hpp | 15 +- .../core/perf_test/PerfTestGramSchmidt.cpp | 22 +- lib/kokkos/core/perf_test/PerfTestHexGrad.cpp | 20 +- lib/kokkos/core/perf_test/PerfTestMDRange.hpp | 21 +- lib/kokkos/core/perf_test/PerfTestMain.cpp | 20 +- .../core/perf_test/PerfTest_Category.hpp | 15 +- .../perf_test/PerfTest_CustomReduction.cpp | 24 +- .../PerfTest_ExecSpacePartitioning.cpp | 22 +- lib/kokkos/core/perf_test/PerfTest_Gemv.cpp | 77 + .../core/perf_test/PerfTest_MallocFree.cpp | 38 +- lib/kokkos/core/perf_test/PerfTest_Stream.cpp | 262 +++ .../core/perf_test/PerfTest_ViewAllocate.cpp | 22 +- .../core/perf_test/PerfTest_ViewCopy.hpp | 21 +- .../core/perf_test/PerfTest_ViewCopy_Raw.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_a123.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_a45.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_a6.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_a7.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_a8.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_b123.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_b45.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_b6.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_b7.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_b8.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_c123.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_c45.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_c6.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_c7.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_c8.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_d123.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_d45.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_d6.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_d7.cpp | 15 +- .../core/perf_test/PerfTest_ViewCopy_d8.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill.hpp | 16 +- .../core/perf_test/PerfTest_ViewFill_123.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill_45.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill_6.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill_7.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill_8.cpp | 15 +- .../core/perf_test/PerfTest_ViewFill_Raw.cpp | 15 +- .../PerfTest_ViewFirstTouch_DeepCopy.cpp | 28 + .../PerfTest_ViewFirstTouch_Initialize.cpp | 27 + .../PerfTest_ViewFirstTouch_ParallelFor.cpp | 32 + .../core/perf_test/PerfTest_ViewResize.hpp | 20 +- .../perf_test/PerfTest_ViewResize_123.cpp | 15 +- .../core/perf_test/PerfTest_ViewResize_45.cpp | 15 +- .../core/perf_test/PerfTest_ViewResize_6.cpp | 15 +- .../core/perf_test/PerfTest_ViewResize_7.cpp | 15 +- .../core/perf_test/PerfTest_ViewResize_8.cpp | 15 +- .../perf_test/PerfTest_ViewResize_Raw.cpp | 15 +- lib/kokkos/core/perf_test/run_taskdag.sh | 21 - lib/kokkos/core/perf_test/test_atomic.cpp | 22 +- .../perf_test/test_atomic_minmax_simple.cpp | 20 +- lib/kokkos/core/perf_test/test_mempool.cpp | 20 +- lib/kokkos/core/perf_test/test_reduction.cpp | 20 +- .../core/perf_test/test_sharedSpace.cpp | 20 +- lib/kokkos/core/perf_test/test_taskdag.cpp | 235 -- lib/kokkos/core/src/CMakeLists.txt | 53 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp | 18 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 21 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp | 15 +- .../Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp | 32 +- .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 15 +- .../src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp | 26 +- .../src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp | 15 +- .../core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp | 17 +- .../src/Cuda/Kokkos_Cuda_Half_Conversion.hpp | 232 +- .../src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp | 41 +- ...Kokkos_Cuda_Half_MathematicalFunctions.hpp | 27 +- .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 43 +- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 34 +- .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 122 +- .../src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp | 22 +- .../src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp | 157 +- .../src/Cuda/Kokkos_Cuda_Parallel_Range.hpp | 52 +- .../src/Cuda/Kokkos_Cuda_Parallel_Team.hpp | 52 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 73 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 48 - lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 1248 ----------- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 17 +- .../core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 18 +- .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 15 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 19 +- .../src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 21 +- .../core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp | 15 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 15 +- lib/kokkos/core/src/HIP/Kokkos_HIP.cpp | 24 +- lib/kokkos/core/src/HIP/Kokkos_HIP.hpp | 17 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp | 15 +- .../HIP/Kokkos_HIP_BlockSize_Deduction.hpp | 52 +- .../core/src/HIP/Kokkos_HIP_DeepCopy.cpp | 15 +- .../core/src/HIP/Kokkos_HIP_DeepCopy.hpp | 15 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Error.cpp | 36 + lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp | 56 +- .../src/HIP/Kokkos_HIP_GraphNodeKernel.hpp | 28 +- .../src/HIP/Kokkos_HIP_GraphNode_Impl.hpp | 15 +- .../core/src/HIP/Kokkos_HIP_Graph_Impl.hpp | 17 +- .../src/HIP/Kokkos_HIP_Half_Conversion.hpp | 42 +- .../src/HIP/Kokkos_HIP_Half_Impl_Type.hpp | 15 +- .../core/src/HIP/Kokkos_HIP_Instance.cpp | 60 +- .../core/src/HIP/Kokkos_HIP_Instance.hpp | 19 +- .../core/src/HIP/Kokkos_HIP_IsXnack.cpp | 15 +- .../core/src/HIP/Kokkos_HIP_IsXnack.hpp | 18 +- .../core/src/HIP/Kokkos_HIP_KernelLaunch.hpp | 37 +- .../core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp | 22 +- .../HIP/Kokkos_HIP_ParallelFor_MDRange.hpp | 15 +- .../src/HIP/Kokkos_HIP_ParallelFor_Range.hpp | 20 +- .../src/HIP/Kokkos_HIP_ParallelFor_Team.hpp | 24 +- .../HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp | 15 +- .../HIP/Kokkos_HIP_ParallelReduce_Range.hpp | 29 +- .../HIP/Kokkos_HIP_ParallelReduce_Team.hpp | 15 +- .../src/HIP/Kokkos_HIP_ParallelScan_Range.hpp | 15 +- .../core/src/HIP/Kokkos_HIP_ReduceScan.hpp | 71 +- .../HIP/Kokkos_HIP_SharedAllocationRecord.cpp | 15 +- .../HIP/Kokkos_HIP_SharedAllocationRecord.hpp | 15 +- .../src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp | 15 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp | 24 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp | 15 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp | 17 +- .../src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp | 41 +- .../core/src/HIP/Kokkos_HIP_UniqueToken.hpp | 15 +- .../core/src/HIP/Kokkos_HIP_Vectorization.hpp | 15 +- .../src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp | 22 +- .../core/src/HIP/Kokkos_HIP_ZeroMemset.cpp | 15 +- .../core/src/HIP/Kokkos_HIP_ZeroMemset.hpp | 15 +- lib/kokkos/core/src/HPX/Kokkos_HPX.cpp | 25 +- lib/kokkos/core/src/HPX/Kokkos_HPX.hpp | 43 +- .../core/src/HPX/Kokkos_HPX_MDRangePolicy.hpp | 15 +- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp | 42 - lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 274 --- .../src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 15 +- lib/kokkos/core/src/KokkosExp_InterOp.hpp | 18 +- .../core/src/KokkosExp_MDRangePolicy.hpp | 29 +- lib/kokkos/core/src/Kokkos_Abort.hpp | 15 +- .../src/Kokkos_AcquireUniqueTokenImpl.hpp | 15 +- lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp | 15 +- lib/kokkos/core/src/Kokkos_Array.hpp | 85 +- lib/kokkos/core/src/Kokkos_Assert.hpp | 15 +- lib/kokkos/core/src/Kokkos_Atomic.hpp | 15 +- .../core/src/Kokkos_Atomics_Desul_Wrapper.hpp | 21 +- .../core/src/Kokkos_BitManipulation.hpp | 676 +++--- lib/kokkos/core/src/Kokkos_CheckUsage.hpp | 106 + lib/kokkos/core/src/Kokkos_Clamp.hpp | 15 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 26 +- lib/kokkos/core/src/Kokkos_Concepts.hpp | 15 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 196 +- lib/kokkos/core/src/Kokkos_Core.cppm | 849 +++++++ lib/kokkos/core/src/Kokkos_Core.hpp | 295 +-- lib/kokkos/core/src/Kokkos_Core_Impl.cppm | 101 + lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 15 +- lib/kokkos/core/src/Kokkos_Crs.hpp | 25 +- lib/kokkos/core/src/Kokkos_DetectionIdiom.hpp | 15 +- lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 56 +- lib/kokkos/core/src/Kokkos_Extents.hpp | 17 +- lib/kokkos/core/src/Kokkos_Future.hpp | 484 ---- lib/kokkos/core/src/Kokkos_Graph.hpp | 31 +- lib/kokkos/core/src/Kokkos_GraphNode.hpp | 171 +- lib/kokkos/core/src/Kokkos_Graph_fwd.hpp | 18 +- lib/kokkos/core/src/Kokkos_Half.hpp | 16 +- lib/kokkos/core/src/Kokkos_HostSpace.hpp | 15 +- lib/kokkos/core/src/Kokkos_Layout.hpp | 15 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 64 +- .../core/src/Kokkos_MathematicalConstants.hpp | 15 +- .../core/src/Kokkos_MathematicalFunctions.hpp | 15 +- .../Kokkos_MathematicalSpecialFunctions.hpp | 15 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 17 +- lib/kokkos/core/src/Kokkos_MemoryTraits.hpp | 27 +- lib/kokkos/core/src/Kokkos_MinMax.hpp | 15 +- lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 15 +- lib/kokkos/core/src/Kokkos_Pair.hpp | 15 +- lib/kokkos/core/src/Kokkos_Parallel.hpp | 64 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 1402 +----------- .../core/src/Kokkos_PointerOwnership.hpp | 21 +- lib/kokkos/core/src/Kokkos_Printf.hpp | 15 +- .../src/Kokkos_Profiling_ProfileSection.hpp | 15 +- .../src/Kokkos_Profiling_ScopedRegion.hpp | 15 +- lib/kokkos/core/src/Kokkos_Rank.hpp | 15 +- .../core/src/Kokkos_ReductionIdentity.hpp | 429 +--- lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 15 +- lib/kokkos/core/src/Kokkos_Swap.hpp | 15 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 696 ------ .../core/src/Kokkos_TaskScheduler_fwd.hpp | 223 -- lib/kokkos/core/src/Kokkos_Timer.hpp | 80 +- lib/kokkos/core/src/Kokkos_Tuners.hpp | 100 +- lib/kokkos/core/src/Kokkos_TypeInfo.hpp | 19 +- lib/kokkos/core/src/Kokkos_UniqueToken.hpp | 15 +- lib/kokkos/core/src/Kokkos_Vectorization.hpp | 15 +- lib/kokkos/core/src/Kokkos_View.hpp | 704 ++++-- .../core/src/Kokkos_WorkGraphPolicy.hpp | 15 +- lib/kokkos/core/src/Kokkos_hwloc.hpp | 15 +- .../core/src/OpenACC/Kokkos_OpenACC.cpp | 19 +- .../core/src/OpenACC/Kokkos_OpenACC.hpp | 16 +- .../core/src/OpenACC/Kokkos_OpenACCSpace.cpp | 15 +- .../core/src/OpenACC/Kokkos_OpenACCSpace.hpp | 15 +- .../src/OpenACC/Kokkos_OpenACC_DeepCopy.hpp | 15 +- .../OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp | 16 +- .../src/OpenACC/Kokkos_OpenACC_Instance.cpp | 16 +- .../src/OpenACC/Kokkos_OpenACC_Instance.hpp | 34 +- .../OpenACC/Kokkos_OpenACC_MDRangePolicy.hpp | 15 +- .../src/OpenACC/Kokkos_OpenACC_Macros.hpp | 15 +- .../Kokkos_OpenACC_ParallelFor_MDRange.hpp | 15 +- .../Kokkos_OpenACC_ParallelFor_Range.hpp | 15 +- .../Kokkos_OpenACC_ParallelFor_Team.hpp | 19 +- .../Kokkos_OpenACC_ParallelReduce_MDRange.hpp | 15 +- .../Kokkos_OpenACC_ParallelReduce_Range.hpp | 436 +++- .../Kokkos_OpenACC_ParallelReduce_Team.hpp | 16 +- .../Kokkos_OpenACC_ParallelScan_Range.hpp | 29 +- .../OpenACC/Kokkos_OpenACC_ScheduleType.hpp | 15 +- .../Kokkos_OpenACC_SharedAllocationRecord.cpp | 15 +- .../Kokkos_OpenACC_SharedAllocationRecord.hpp | 15 +- .../core/src/OpenACC/Kokkos_OpenACC_Team.hpp | 62 +- .../src/OpenACC/Kokkos_OpenACC_Traits.hpp | 15 +- lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp | 19 +- lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp | 22 +- .../src/OpenMP/Kokkos_OpenMP_Instance.cpp | 21 +- .../src/OpenMP/Kokkos_OpenMP_Instance.hpp | 76 +- .../OpenMP/Kokkos_OpenMP_MDRangePolicy.hpp | 15 +- .../src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp | 27 +- .../OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp | 15 +- .../OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp | 15 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 75 - .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 379 ---- .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 21 +- .../src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp | 15 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 15 +- .../src/OpenMP/Kokkos_OpenMP_ZeroMemset.hpp | 33 + .../src/OpenMPTarget/Kokkos_OpenMPTarget.hpp | 18 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 15 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.hpp | 15 +- .../Kokkos_OpenMPTarget_Abort.hpp | 15 +- .../Kokkos_OpenMPTarget_DeepCopy.hpp | 15 +- .../Kokkos_OpenMPTarget_Error.hpp | 15 +- .../Kokkos_OpenMPTarget_FunctorAdapter.hpp | 15 +- .../Kokkos_OpenMPTarget_Instance.cpp | 48 +- .../Kokkos_OpenMPTarget_Instance.hpp | 19 +- .../Kokkos_OpenMPTarget_MDRangePolicy.hpp | 15 +- .../Kokkos_OpenMPTarget_Macros.hpp | 15 +- .../Kokkos_OpenMPTarget_Parallel.hpp | 21 +- ...okkos_OpenMPTarget_ParallelFor_MDRange.hpp | 17 +- .../Kokkos_OpenMPTarget_ParallelFor_Range.hpp | 17 +- .../Kokkos_OpenMPTarget_ParallelFor_Team.hpp | 30 +- ...os_OpenMPTarget_ParallelReduce_MDRange.hpp | 15 +- ...kkos_OpenMPTarget_ParallelReduce_Range.hpp | 15 +- ...okkos_OpenMPTarget_ParallelReduce_Team.hpp | 15 +- ...Kokkos_OpenMPTarget_ParallelScan_Range.hpp | 38 +- .../Kokkos_OpenMPTarget_ParallelScan_Team.hpp | 15 +- .../Kokkos_OpenMPTarget_Parallel_Common.hpp | 47 +- .../Kokkos_OpenMPTarget_Reducer.hpp | 15 +- .../Kokkos_OpenMPTarget_UniqueToken.hpp | 15 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 29 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.hpp | 17 +- .../core/src/SYCL/Kokkos_SYCL_Abort.hpp | 15 +- .../core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp | 15 +- .../src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp | 29 +- .../src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp | 15 +- .../core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp | 21 +- .../src/SYCL/Kokkos_SYCL_Half_Conversion.hpp | 66 +- .../src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp | 15 +- ...Kokkos_SYCL_Half_MathematicalFunctions.hpp | 15 +- .../core/src/SYCL/Kokkos_SYCL_Instance.cpp | 22 +- .../core/src/SYCL/Kokkos_SYCL_Instance.hpp | 17 +- .../src/SYCL/Kokkos_SYCL_MDRangePolicy.hpp | 22 +- .../SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp | 15 +- .../SYCL/Kokkos_SYCL_ParallelFor_Range.hpp | 87 +- .../src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp | 24 +- .../Kokkos_SYCL_ParallelReduce_MDRange.hpp | 15 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp | 15 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp | 23 +- .../SYCL/Kokkos_SYCL_ParallelScan_Range.hpp | 15 +- .../core/src/SYCL/Kokkos_SYCL_Space.cpp | 15 +- .../core/src/SYCL/Kokkos_SYCL_Space.hpp | 15 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp | 17 +- .../core/src/SYCL/Kokkos_SYCL_TeamPolicy.hpp | 43 +- .../core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp | 15 +- .../SYCL/Kokkos_SYCL_WorkgroupReduction.hpp | 38 +- .../core/src/SYCL/Kokkos_SYCL_ZeroMemset.hpp | 15 +- lib/kokkos/core/src/Serial/Kokkos_Serial.cpp | 24 +- lib/kokkos/core/src/Serial/Kokkos_Serial.hpp | 20 +- .../Serial/Kokkos_Serial_MDRangePolicy.hpp | 15 +- .../Serial/Kokkos_Serial_Parallel_MDRange.hpp | 15 +- .../Serial/Kokkos_Serial_Parallel_Range.hpp | 15 +- .../Serial/Kokkos_Serial_Parallel_Team.hpp | 27 +- .../core/src/Serial/Kokkos_Serial_Task.cpp | 42 - .../core/src/Serial/Kokkos_Serial_Task.hpp | 231 -- .../src/Serial/Kokkos_Serial_UniqueToken.hpp | 15 +- .../Serial/Kokkos_Serial_WorkGraphPolicy.hpp | 15 +- .../src/Serial/Kokkos_Serial_ZeroMemset.hpp | 23 +- .../core/src/Threads/Kokkos_Threads.hpp | 19 +- .../src/Threads/Kokkos_Threads_Instance.cpp | 30 +- .../src/Threads/Kokkos_Threads_Instance.hpp | 19 +- .../Threads/Kokkos_Threads_MDRangePolicy.hpp | 15 +- .../Kokkos_Threads_ParallelFor_MDRange.hpp | 15 +- .../Kokkos_Threads_ParallelFor_Range.hpp | 15 +- .../Kokkos_Threads_ParallelFor_Team.hpp | 17 +- .../Kokkos_Threads_ParallelReduce_MDRange.hpp | 15 +- .../Kokkos_Threads_ParallelReduce_Range.hpp | 15 +- .../Kokkos_Threads_ParallelReduce_Team.hpp | 15 +- .../Kokkos_Threads_ParallelScan_Range.hpp | 15 +- .../src/Threads/Kokkos_Threads_Spinwait.cpp | 15 +- .../src/Threads/Kokkos_Threads_Spinwait.hpp | 15 +- .../core/src/Threads/Kokkos_Threads_State.hpp | 15 +- .../core/src/Threads/Kokkos_Threads_Team.hpp | 21 +- .../Threads/Kokkos_Threads_UniqueToken.hpp | 15 +- .../Kokkos_Threads_WorkGraphPolicy.hpp | 15 +- .../src/Threads/Kokkos_Threads_ZeroMemset.hpp | 33 + .../core/src/View/Hooks/Kokkos_ViewHooks.hpp | 30 +- lib/kokkos/core/src/View/Kokkos_BasicView.hpp | 217 +- .../Kokkos_ViewAccessPreconditionsCheck.hpp | 15 +- lib/kokkos/core/src/View/Kokkos_ViewAlloc.hpp | 22 +- .../core/src/View/Kokkos_ViewAtomic.hpp | 15 +- .../core/src/View/Kokkos_ViewCommonType.hpp | 24 +- lib/kokkos/core/src/View/Kokkos_ViewCtor.hpp | 59 +- .../core/src/View/Kokkos_ViewDataAnalysis.hpp | 40 +- .../core/src/View/Kokkos_ViewLegacy.hpp | 218 +- .../core/src/View/Kokkos_ViewMapping.hpp | 17 +- .../core/src/View/Kokkos_ViewTracker.hpp | 34 +- .../core/src/View/Kokkos_ViewTraits.hpp | 197 +- .../core/src/View/Kokkos_ViewUniformType.hpp | 15 +- .../View/MDSpan/Kokkos_MDSpan_Accessor.hpp | 15 +- .../src/View/MDSpan/Kokkos_MDSpan_Extents.hpp | 15 +- .../src/View/MDSpan/Kokkos_MDSpan_Header.hpp | 36 +- .../src/View/MDSpan/Kokkos_MDSpan_Layout.hpp | 23 +- .../core/src/decl/Kokkos_Declare_CUDA.hpp | 18 +- .../core/src/decl/Kokkos_Declare_HIP.hpp | 15 +- .../core/src/decl/Kokkos_Declare_HPX.hpp | 15 +- .../core/src/decl/Kokkos_Declare_OPENACC.hpp | 16 +- .../core/src/decl/Kokkos_Declare_OPENMP.hpp | 16 +- .../src/decl/Kokkos_Declare_OPENMPTARGET.hpp | 15 +- .../core/src/decl/Kokkos_Declare_SERIAL.hpp | 15 +- .../core/src/decl/Kokkos_Declare_SYCL.hpp | 15 +- .../core/src/decl/Kokkos_Declare_THREADS.hpp | 16 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_CUDA.hpp | 15 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp | 15 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_HPX.hpp | 15 +- .../core/src/fwd/Kokkos_Fwd_OPENACC.hpp | 15 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_OPENMP.hpp | 15 +- .../core/src/fwd/Kokkos_Fwd_OPENMPTARGET.hpp | 15 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_SERIAL.hpp | 15 +- lib/kokkos/core/src/fwd/Kokkos_Fwd_SYCL.hpp | 15 +- .../core/src/fwd/Kokkos_Fwd_THREADS.hpp | 15 +- .../src/impl/KokkosExp_Host_IterateTile.hpp | 15 +- .../src/impl/KokkosExp_IterateTileGPU.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_Abort.cpp | 15 +- .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 16 +- .../core/src/impl/Kokkos_BuiltinReducers.hpp | 1024 +++++++++ .../core/src/impl/Kokkos_CPUDiscovery.cpp | 15 +- .../core/src/impl/Kokkos_CPUDiscovery.hpp | 15 +- .../impl/Kokkos_CStyleMemoryManagement.hpp | 91 + lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp | 286 --- .../src/impl/Kokkos_CheckedIntegerOps.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp | 15 +- .../core/src/impl/Kokkos_Combined_Reducer.hpp | 19 +- .../src/impl/Kokkos_Command_Line_Parsing.cpp | 15 +- .../src/impl/Kokkos_Command_Line_Parsing.hpp | 15 +- .../core/src/impl/Kokkos_ConcurrentBitset.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_Core.cpp | 67 +- .../impl/Kokkos_Default_GraphNodeKernel.hpp | 15 +- .../impl/Kokkos_Default_GraphNode_Impl.hpp | 27 +- .../src/impl/Kokkos_Default_Graph_Impl.hpp | 25 +- .../src/impl/Kokkos_Default_Graph_fwd.hpp | 15 +- .../src/impl/Kokkos_DesulAtomicsConfig.hpp | 32 +- .../core/src/impl/Kokkos_DeviceManagement.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_EBO.hpp | 78 +- lib/kokkos/core/src/impl/Kokkos_Error.cpp | 20 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 15 +- .../core/src/impl/Kokkos_ExecPolicy.cpp | 21 +- .../core/src/impl/Kokkos_ExecSpaceManager.hpp | 28 +- .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_GraphImpl.hpp | 19 +- .../src/impl/Kokkos_GraphImpl_Utilities.hpp | 15 +- .../core/src/impl/Kokkos_GraphImpl_fwd.hpp | 17 +- .../impl/Kokkos_GraphNodeCustomization.hpp | 15 +- .../core/src/impl/Kokkos_GraphNodeImpl.hpp | 22 +- .../src/impl/Kokkos_GraphNodeThenImpl.hpp | 56 +- .../src/impl/Kokkos_GraphNodeThenPolicy.hpp | 20 + .../impl/Kokkos_Half_FloatingPointWrapper.hpp | 20 +- .../Kokkos_Half_MathematicalFunctions.hpp | 15 +- .../src/impl/Kokkos_Half_NumericTraits.hpp | 15 +- .../impl/Kokkos_Half_ReductionIdentity.hpp | 57 + .../core/src/impl/Kokkos_HostBarrier.cpp | 17 +- .../core/src/impl/Kokkos_HostBarrier.hpp | 15 +- .../core/src/impl/Kokkos_HostSharedPtr.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 15 +- .../src/impl/Kokkos_HostSpace_ZeroMemset.hpp | 42 - .../src/impl/Kokkos_HostSpace_deepcopy.cpp | 60 +- .../src/impl/Kokkos_HostSpace_deepcopy.hpp | 18 +- .../core/src/impl/Kokkos_HostThreadTeam.cpp | 15 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 15 +- .../impl/Kokkos_InitializationSettings.hpp | 15 +- .../src/impl/Kokkos_InitializeFinalize.hpp | 76 + lib/kokkos/core/src/impl/Kokkos_LIFO.hpp | 378 ---- .../core/src/impl/Kokkos_LinkedListNode.hpp | 156 -- .../core/src/impl/Kokkos_MemoryPool.cpp | 15 +- .../src/impl/Kokkos_MultipleTaskQueue.hpp | 506 ----- .../impl/Kokkos_NvidiaGpuArchitectures.hpp | 25 +- .../core/src/impl/Kokkos_OptionalRef.hpp | 206 -- ...ndLineArgumentsAndEnvironmentVariables.hpp | 15 +- .../core/src/impl/Kokkos_PartitionSpace.hpp | 86 + lib/kokkos/core/src/impl/Kokkos_Profiling.cpp | 15 +- lib/kokkos/core/src/impl/Kokkos_Profiling.hpp | 15 +- .../src/impl/Kokkos_Profiling_C_Interface.h | 17 +- .../src/impl/Kokkos_Profiling_DeviceInfo.hpp | 15 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 17 +- .../src/impl/Kokkos_QuadPrecisionMath.hpp | 76 +- .../core/src/impl/Kokkos_ScopeGuard.hpp | 76 + .../core/src/impl/Kokkos_SharedAlloc.cpp | 24 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 18 +- .../src/impl/Kokkos_SharedAlloc_timpl.hpp | 15 +- .../src/impl/Kokkos_SimpleTaskScheduler.hpp | 483 ---- .../core/src/impl/Kokkos_SingleTaskQueue.hpp | 158 -- .../core/src/impl/Kokkos_Stacktrace.cpp | 15 +- .../core/src/impl/Kokkos_Stacktrace.hpp | 15 +- .../src/impl/Kokkos_StringManipulation.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp | 334 --- lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp | 673 ------ .../core/src/impl/Kokkos_TaskPolicyData.hpp | 158 -- lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 227 -- .../core/src/impl/Kokkos_TaskQueueCommon.hpp | 482 ---- .../impl/Kokkos_TaskQueueMemoryManager.hpp | 204 -- .../src/impl/Kokkos_TaskQueueMultiple.hpp | 258 --- .../impl/Kokkos_TaskQueueMultiple_impl.hpp | 45 - .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 651 ------ .../core/src/impl/Kokkos_TaskResult.hpp | 120 - .../core/src/impl/Kokkos_TaskTeamMember.hpp | 104 - .../core/src/impl/Kokkos_TeamMDPolicy.hpp | 17 +- lib/kokkos/core/src/impl/Kokkos_Tools.hpp | 15 +- .../core/src/impl/Kokkos_Tools_Generic.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 78 +- .../core/src/impl/Kokkos_VLAEmulation.hpp | 261 --- .../core/src/impl/Kokkos_Volatile_Load.hpp | 15 +- .../core/src/impl/Kokkos_ZeroMemset_fwd.hpp | 15 +- lib/kokkos/core/src/impl/Kokkos_hwloc.cpp | 20 +- .../core/src/setup/Kokkos_Setup_Cuda.hpp | 15 +- .../core/src/setup/Kokkos_Setup_HIP.hpp | 20 +- .../core/src/setup/Kokkos_Setup_SYCL.hpp | 15 +- .../src/traits/Kokkos_ExecutionSpaceTrait.hpp | 54 +- .../src/traits/Kokkos_GraphKernelTrait.hpp | 30 +- .../core/src/traits/Kokkos_IndexTypeTrait.hpp | 15 +- .../traits/Kokkos_IterationPatternTrait.hpp | 44 +- .../src/traits/Kokkos_LaunchBoundsTrait.hpp | 41 +- .../traits/Kokkos_OccupancyControlTrait.hpp | 15 +- .../src/traits/Kokkos_PolicyTraitAdaptor.hpp | 15 +- .../src/traits/Kokkos_PolicyTraitMatcher.hpp | 15 +- .../core/src/traits/Kokkos_ScheduleTrait.hpp | 47 +- .../traits/Kokkos_StaticBatchSizeTrait.hpp | 76 + .../core/src/traits/Kokkos_Traits_fwd.hpp | 17 +- .../traits/Kokkos_WorkItemPropertyTrait.hpp | 29 +- .../core/src/traits/Kokkos_WorkTagTrait.hpp | 42 +- lib/kokkos/core/unit_test/CMakeLists.txt | 84 +- .../core/unit_test/IncrementalTest.cpp.in | 15 +- ...tionEnvironmentNeverInitializedFixture.hpp | 20 +- lib/kokkos/core/unit_test/Makefile | 530 ----- lib/kokkos/core/unit_test/TestAbort.hpp | 20 +- lib/kokkos/core/unit_test/TestArray.cpp | 95 +- lib/kokkos/core/unit_test/TestArrayOps.hpp | 46 +- .../core/unit_test/TestAtomicOperations.hpp | 24 +- .../TestAtomicOperations_complexdouble.hpp | 15 +- .../TestAtomicOperations_complexfloat.hpp | 15 +- .../unit_test/TestAtomicOperations_double.hpp | 15 +- .../unit_test/TestAtomicOperations_float.hpp | 15 +- .../unit_test/TestAtomicOperations_int.hpp | 15 +- .../unit_test/TestAtomicOperations_int16.hpp | 26 + .../unit_test/TestAtomicOperations_int8.hpp | 26 + .../TestAtomicOperations_longint.hpp | 15 +- .../TestAtomicOperations_longlongint.hpp | 15 +- .../unit_test/TestAtomicOperations_shared.hpp | 20 +- .../TestAtomicOperations_unsignedint.hpp | 15 +- .../TestAtomicOperations_unsignedlongint.hpp | 15 +- ...stAtomicOperations_unsignedlonglongint.hpp | 15 +- lib/kokkos/core/unit_test/TestAtomicViews.hpp | 41 +- lib/kokkos/core/unit_test/TestAtomics.hpp | 24 +- .../core/unit_test/TestBitManipulation.cpp | 17 +- .../unit_test/TestBitManipulationBuiltins.hpp | 43 +- .../core/unit_test/TestBlockSizeDeduction.hpp | 20 +- .../unit_test/TestCStyleMemoryManagement.cpp | 20 +- lib/kokkos/core/unit_test/TestCTestDevice.cpp | 15 +- lib/kokkos/core/unit_test/TestCXX11.hpp | 32 +- .../core/unit_test/TestCXX11Deduction.hpp | 20 +- .../core/unit_test/TestCheckedIntegerOps.hpp | 15 +- .../TestCommonPolicyConstructors.hpp | 25 +- .../unit_test/TestCommonPolicyInterface.hpp | 21 +- lib/kokkos/core/unit_test/TestCompileMain.cpp | 15 +- .../core/unit_test/TestCompilerMacros.cpp | 20 +- lib/kokkos/core/unit_test/TestComplex.hpp | 50 +- lib/kokkos/core/unit_test/TestConcepts.hpp | 26 +- .../core/unit_test/TestConcurrentBitset.hpp | 17 +- .../core/unit_test/TestCreateMirror.cpp | 21 +- lib/kokkos/core/unit_test/TestCrs.hpp | 20 +- lib/kokkos/core/unit_test/TestDeepCopy.hpp | 62 + .../core/unit_test/TestDeepCopyAlignment.hpp | 20 +- .../unit_test/TestDeepCopy_Assignment.hpp | 57 + .../core/unit_test/TestDeepCopy_Narrowing.hpp | 42 + .../core/unit_test/TestDetectionIdiom.cpp | 15 +- .../core/unit_test/TestDeviceAndThreads.py | 33 +- .../unit_test/TestExecSpacePartitioning.hpp | 71 +- .../unit_test/TestExecSpaceThreadSafety.hpp | 36 +- ...onEnvironmentNonInitializedOrFinalized.cpp | 244 ++- .../core/unit_test/TestExecutionSpace.hpp | 20 +- .../core/unit_test/TestFunctorAnalysis.hpp | 31 +- lib/kokkos/core/unit_test/TestGraph.hpp | 227 +- .../core/unit_test/TestGraphAtomicLocks.hpp | 20 +- lib/kokkos/core/unit_test/TestHWLOC.cpp | 15 +- .../core/unit_test/TestHalfConversion.hpp | 18 +- .../core/unit_test/TestHalfOperators.hpp | 34 +- .../core/unit_test/TestHostSharedPtr.hpp | 15 +- .../TestHostSharedPtrAccessOnDevice.hpp | 20 +- .../unit_test/TestInitializationSettings.cpp | 15 +- .../core/unit_test/TestInitializeFinalize.cpp | 20 +- lib/kokkos/core/unit_test/TestInterOp.cpp | 43 +- .../core/unit_test/TestIrregularLayout.hpp | 20 +- .../TestJoinBackwardCompatibility.hpp | 20 +- ...kkosHelpCausesNormalProgramTermination.cpp | 20 +- .../unit_test/TestLegionInitialization.cpp | 21 +- .../core/unit_test/TestLocalDeepCopy.hpp | 76 +- lib/kokkos/core/unit_test/TestMDRange.hpp | 44 +- .../core/unit_test/TestMDRangePolicyCTAD.cpp | 24 +- .../TestMDRangePolicyConstructors.hpp | 46 +- .../core/unit_test/TestMDRangeReduce.hpp | 20 +- lib/kokkos/core/unit_test/TestMDRange_a.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_b.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_c.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_d.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_e.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_f.hpp | 15 +- lib/kokkos/core/unit_test/TestMDRange_g.hpp | 20 +- lib/kokkos/core/unit_test/TestMDSpan.hpp | 22 +- .../unit_test/TestMDSpanAtomicAccessor.hpp | 27 +- .../core/unit_test/TestMDSpanConversion.hpp | 21 +- .../unit_test/TestMathematicalConstants.hpp | 20 +- .../unit_test/TestMathematicalFunctions.hpp | 63 +- .../unit_test/TestMathematicalFunctions1.hpp | 15 +- .../unit_test/TestMathematicalFunctions2.hpp | 15 +- .../unit_test/TestMathematicalFunctions3.hpp | 15 +- .../TestMathematicalSpecialFunctions.hpp | 120 +- lib/kokkos/core/unit_test/TestMemoryPool.hpp | 20 +- lib/kokkos/core/unit_test/TestMinMaxClamp.hpp | 20 +- lib/kokkos/core/unit_test/TestMultiGPU.hpp | 15 +- .../core/unit_test/TestNestedReducerCTAD.cpp | 21 +- .../unit_test/TestNonTrivialScalarTypes.hpp | 24 +- .../core/unit_test/TestNumericTraits.hpp | 39 +- .../unit_test/TestOccupancyControlTrait.hpp | 20 +- lib/kokkos/core/unit_test/TestOther.hpp | 15 +- .../unit_test/TestParallelScanRangePolicy.hpp | 120 +- .../TestParseCmdLineArgsAndEnvVars.cpp | 35 +- lib/kokkos/core/unit_test/TestPrintf.hpp | 20 +- .../core/unit_test/TestPushFinalizeHook.cpp | 20 +- .../core/unit_test/TestQuadPrecisionMath.hpp | 24 +- lib/kokkos/core/unit_test/TestRange.hpp | 114 +- lib/kokkos/core/unit_test/TestRangePolicy.hpp | 15 +- .../core/unit_test/TestRangePolicyCTAD.cpp | 20 +- .../unit_test/TestRangePolicyConstructors.hpp | 46 +- .../core/unit_test/TestRangePolicyRequire.hpp | 22 +- lib/kokkos/core/unit_test/TestRealloc.hpp | 20 +- lib/kokkos/core/unit_test/TestReduce.hpp | 113 +- .../unit_test/TestReduceCombinatorical.hpp | 29 +- .../core/unit_test/TestReducerCTADs.hpp | 20 +- lib/kokkos/core/unit_test/TestReducers.hpp | 139 +- lib/kokkos/core/unit_test/TestReducers_a.hpp | 15 +- lib/kokkos/core/unit_test/TestReducers_b.hpp | 15 +- lib/kokkos/core/unit_test/TestReducers_c.hpp | 15 +- lib/kokkos/core/unit_test/TestReducers_d.hpp | 20 +- lib/kokkos/core/unit_test/TestReducers_e.hpp | 15 +- lib/kokkos/core/unit_test/TestReductions.hpp | 15 +- .../unit_test/TestReductions_DeviceView.hpp | 20 +- lib/kokkos/core/unit_test/TestResize.hpp | 52 +- lib/kokkos/core/unit_test/TestScopeGuard.cpp | 20 +- lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 21 +- .../unit_test/TestSharedHostPinnedSpace.cpp | 20 +- lib/kokkos/core/unit_test/TestSharedSpace.cpp | 22 +- .../core/unit_test/TestSpaceAwareAccessor.hpp | 28 +- .../TestSpaceAwareAccessorAccessViolation.hpp | 21 +- lib/kokkos/core/unit_test/TestStackTrace.cpp | 15 +- lib/kokkos/core/unit_test/TestStackTrace.hpp | 15 +- .../core/unit_test/TestStackTrace_f0.cpp | 15 +- .../core/unit_test/TestStackTrace_f1.cpp | 15 +- .../core/unit_test/TestStackTrace_f2.cpp | 15 +- .../core/unit_test/TestStackTrace_f3.cpp | 15 +- .../core/unit_test/TestStackTrace_f4.cpp | 15 +- .../core/unit_test/TestStringManipulation.cpp | 15 +- lib/kokkos/core/unit_test/TestSubView_a.hpp | 35 +- lib/kokkos/core/unit_test/TestSubView_b.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c01.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c02.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c03.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c04.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c05.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c06.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c07.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c08.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c09.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c10.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c11.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c12.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c13.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c14.hpp | 15 +- lib/kokkos/core/unit_test/TestSubView_c15.hpp | 20 +- lib/kokkos/core/unit_test/TestSwap.hpp | 20 +- .../core/unit_test/TestTaskScheduler.hpp | 839 ------- .../unit_test/TestTaskScheduler_single.hpp | 72 - lib/kokkos/core/unit_test/TestTeam.hpp | 27 +- lib/kokkos/core/unit_test/TestTeamBasic.hpp | 28 +- .../unit_test/TestTeamCombinedReducers.hpp | 22 +- lib/kokkos/core/unit_test/TestTeamMDRange.hpp | 58 +- .../unit_test/TestTeamMDRangePolicyCTAD.cpp | 21 +- .../core/unit_test/TestTeamPolicyCTAD.cpp | 22 +- .../unit_test/TestTeamPolicyConstructors.hpp | 46 +- .../core/unit_test/TestTeamReductionScan.hpp | 23 +- lib/kokkos/core/unit_test/TestTeamScan.hpp | 23 +- lib/kokkos/core/unit_test/TestTeamScratch.hpp | 15 +- .../core/unit_test/TestTeamTeamSize.hpp | 20 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 25 +- .../core/unit_test/TestTeamVectorRange.hpp | 24 +- lib/kokkos/core/unit_test/TestTimer.hpp | 43 + lib/kokkos/core/unit_test/TestTypeInfo.cpp | 21 +- lib/kokkos/core/unit_test/TestTypeList.cpp | 57 +- lib/kokkos/core/unit_test/TestUniqueToken.hpp | 28 +- lib/kokkos/core/unit_test/TestUtilities.hpp | 20 +- .../core/unit_test/TestVersionMacros.cpp | 20 +- lib/kokkos/core/unit_test/TestViewAPI.hpp | 64 +- lib/kokkos/core/unit_test/TestViewAPI_a.hpp | 15 +- lib/kokkos/core/unit_test/TestViewAPI_b.hpp | 15 +- lib/kokkos/core/unit_test/TestViewAPI_c.hpp | 15 +- lib/kokkos/core/unit_test/TestViewAPI_d.hpp | 15 +- lib/kokkos/core/unit_test/TestViewAPI_e.hpp | 25 +- .../core/unit_test/TestViewBadAlloc.hpp | 26 +- lib/kokkos/core/unit_test/TestViewCopy_a.hpp | 20 +- lib/kokkos/core/unit_test/TestViewCopy_b.hpp | 20 +- lib/kokkos/core/unit_test/TestViewCopy_c.hpp | 21 +- .../core/unit_test/TestViewCtorDimMatch.hpp | 20 +- .../core/unit_test/TestViewCtorProp.hpp | 21 +- .../unit_test/TestViewCtorPropEmbeddedDim.hpp | 32 +- .../TestViewEmptyRuntimeUnmanaged.hpp | 23 +- lib/kokkos/core/unit_test/TestViewHooks.hpp | 178 +- .../core/unit_test/TestViewIsAssignable.hpp | 22 +- .../TestViewLayoutStrideAssignment.hpp | 161 +- .../core/unit_test/TestViewMapping_a.hpp | 97 +- .../core/unit_test/TestViewMapping_b.hpp | 23 +- .../unit_test/TestViewMapping_subview.hpp | 42 +- .../TestViewMemoryAccessViolation.hpp | 20 +- lib/kokkos/core/unit_test/TestViewMove.hpp | 158 ++ lib/kokkos/core/unit_test/TestViewOfClass.hpp | 22 +- lib/kokkos/core/unit_test/TestViewOfViews.hpp | 20 +- .../unit_test/TestViewOutOfBoundsAccess.hpp | 21 +- lib/kokkos/core/unit_test/TestViewRank.cpp | 22 +- lib/kokkos/core/unit_test/TestViewResize.hpp | 15 +- .../core/unit_test/TestViewSpaceAssign.hpp | 20 +- lib/kokkos/core/unit_test/TestViewSubview.hpp | 75 +- .../core/unit_test/TestViewTypeTraits.cpp | 20 +- .../core/unit_test/TestViewTypedefs.cpp | 56 +- lib/kokkos/core/unit_test/TestView_64bit.hpp | 44 +- .../core/unit_test/TestWindowsInclude.cpp | 20 +- .../unit_test/TestWithoutInitializing.hpp | 20 +- lib/kokkos/core/unit_test/TestWorkGraph.hpp | 20 +- .../core/unit_test/Test_InterOp_Streams.hpp | 20 +- lib/kokkos/core/unit_test/UnitTestMain.cpp | 15 +- .../core/unit_test/UnitTestMainInit.cpp | 20 +- .../UnitTest_CMakePassCmdLineArgs.cpp | 15 +- .../UnitTest_CMakeTriBITSCompatibility.cpp | 15 +- .../unit_test/UnitTest_DeviceAndThreads.cpp | 27 +- .../TestCudaHostPinned_Category.hpp | 15 +- .../category_files/TestCudaUVM_Category.hpp | 15 +- .../category_files/TestCuda_Category.hpp | 15 +- .../TestDefaultDeviceType_Category.hpp | 15 +- .../TestHIPHostPinned_Category.hpp | 15 +- .../TestHIPManaged_Category.hpp | 15 +- .../category_files/TestHIP_Category.hpp | 15 +- .../category_files/TestHPX_Category.hpp | 15 +- .../category_files/TestOpenACC_Category.hpp | 15 +- .../TestOpenMPTarget_Category.hpp | 15 +- .../category_files/TestOpenMP_Category.hpp | 15 +- .../TestSYCLHostUSM_Category.hpp | 15 +- .../TestSYCLSharedUSM_Category.hpp | 15 +- .../category_files/TestSYCL_Category.hpp | 15 +- .../category_files/TestSerial_Category.hpp | 15 +- .../category_files/TestThreads_Category.hpp | 15 +- .../configuration/test-code/CMakeLists.txt | 44 - .../configuration/test-code/Makefile | 47 - .../configuration/test-code/main.cpp | 22 - .../configuration/test-code/test_config.bash | 7 - .../test-code/test_config_arch_list.bash | 45 - .../test-code/test_config_device_list.bash | 45 - .../test-code/test_config_options_list.bash | 48 - .../test-code/test_config_run.bash | 111 - .../cuda/TestCuda_DebugPinUVMSpace.cpp | 20 +- .../cuda/TestCuda_DebugSerialExecution.cpp | 20 +- .../unit_test/cuda/TestCuda_InterOp_Graph.cpp | 32 +- .../cuda/TestCuda_InterOp_GraphMultiGPU.cpp | 104 + .../unit_test/cuda/TestCuda_InterOp_Init.cpp | 20 +- .../cuda/TestCuda_InterOp_Streams.cpp | 15 +- .../cuda/TestCuda_InterOp_StreamsMultiGPU.cpp | 15 +- .../cuda/TestCuda_ReducerViewSizeLimit.cpp | 20 +- .../core/unit_test/cuda/TestCuda_Spaces.cpp | 22 +- .../core/unit_test/cuda/TestCuda_Task.cpp | 18 - .../cuda/TestCuda_TeamScratchStreams.cpp | 20 +- .../default/TestDefaultDeviceDevelop.cpp | 20 +- .../default/TestDefaultDeviceType.cpp | 21 +- .../default/TestDefaultDeviceTypeResize.cpp | 15 +- .../default/TestDefaultDeviceTypeViewAPI.cpp | 22 +- .../default/TestDefaultDeviceType_a1.cpp | 20 +- .../default/TestDefaultDeviceType_a2.cpp | 20 +- .../default/TestDefaultDeviceType_a3.cpp | 20 +- .../default/TestDefaultDeviceType_b1.cpp | 20 +- .../default/TestDefaultDeviceType_b2.cpp | 20 +- .../default/TestDefaultDeviceType_b3.cpp | 20 +- .../default/TestDefaultDeviceType_c1.cpp | 20 +- .../default/TestDefaultDeviceType_c2.cpp | 20 +- .../default/TestDefaultDeviceType_c3.cpp | 20 +- .../headers_self_contained/CMakeLists.txt | 1 - .../headers_self_contained/tstHeader.cpp | 15 +- .../unit_test/hip/TestHIP_AsyncLauncher.cpp | 20 +- .../hip/TestHIP_BlocksizeDeduction.cpp | 20 +- .../unit_test/hip/TestHIP_InterOp_Graph.cpp | 44 +- .../unit_test/hip/TestHIP_InterOp_Init.cpp | 20 +- .../unit_test/hip/TestHIP_InterOp_Streams.cpp | 15 +- .../hip/TestHIP_InterOp_StreamsMultiGPU.cpp | 15 +- .../hip/TestHIP_Memory_Requirements.cpp | 20 +- .../core/unit_test/hip/TestHIP_ScanUnit.cpp | 20 +- .../hip/TestHIP_SharedResourceLock.cpp | 20 +- .../core/unit_test/hip/TestHIP_Spaces.cpp | 20 +- .../hip/TestHIP_TeamScratchStreams.cpp | 20 +- .../hip/TestHIP_UnifiedMemory_ZeroMemset.cpp | 20 +- .../core/unit_test/hpx/TestHPX_InParallel.cpp | 20 +- .../hpx/TestHPX_IndependentInstances.cpp | 20 +- ...X_IndependentInstancesDelayedExecution.cpp | 20 +- ...estHPX_IndependentInstancesInstanceIds.cpp | 20 +- ...estHPX_IndependentInstancesRefCounting.cpp | 20 +- ...PX_IndependentInstancesSynchronization.cpp | 20 +- .../core/unit_test/hpx/TestHPX_InterOp.cpp | 20 +- .../core/unit_test/hpx/TestHPX_Task.cpp | 18 - .../incremental/Test01_execspace.hpp | 20 +- .../incremental/Test02_atomic_host.hpp | 20 +- .../Test03a_MemorySpace_malloc.hpp | 20 +- .../incremental/Test03b_MemorySpace_free.hpp | 20 +- .../Test04_ParallelFor_RangePolicy.hpp | 21 +- .../Test05_ParallelReduce_RangePolicy.hpp | 20 +- .../Test06_ParallelFor_MDRangePolicy.hpp | 21 +- .../incremental/Test08_deep_copy.hpp | 26 +- .../incremental/Test10_HierarchicalBasics.hpp | 20 +- .../Test11a_ParallelFor_TeamThreadRange.hpp | 20 +- .../Test11b_ParallelFor_TeamVectorRange.hpp | 20 +- .../Test11c_ParallelFor_ThreadVectorRange.hpp | 20 +- .../incremental/Test12a_ThreadScratch.hpp | 20 +- .../incremental/Test12b_TeamScratch.hpp | 20 +- .../Test13a_ParallelRed_TeamThreadRange.hpp | 20 +- .../Test13b_ParallelRed_TeamVectorRange.hpp | 20 +- .../Test13c_ParallelRed_ThreadVectorRange.hpp | 20 +- .../incremental/Test14_MDRangeReduce.hpp | 20 +- .../incremental/Test16_ParallelScan.hpp | 20 +- .../incremental/Test17_CompleteAtomic.hpp | 20 +- .../unit_test/openmp/TestOpenMP_InterOp.cpp | 20 +- .../core/unit_test/openmp/TestOpenMP_Task.cpp | 18 - .../core/unit_test/serial/TestSerial_Task.cpp | 18 - lib/kokkos/core/unit_test/standalone/Makefile | 57 - .../unit_test/standalone/UnitTestMainInit.cpp | 60 - .../unit_test/sycl/TestSYCL_InterOp_Graph.cpp | 36 +- .../unit_test/sycl/TestSYCL_InterOp_Init.cpp | 20 +- .../sycl/TestSYCL_InterOp_Init_Context.cpp | 20 +- .../sycl/TestSYCL_InterOp_Streams.cpp | 15 +- .../sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp | 15 +- .../core/unit_test/sycl/TestSYCL_Spaces.cpp | 20 +- .../sycl/TestSYCL_TeamScratchStreams.cpp | 20 +- .../core/unit_test/tools/TestAllCalls.cpp | 22 +- .../unit_test/tools/TestBuiltinTuners.cpp | 248 ++- .../unit_test/tools/TestCategoricalTuner.cpp | 20 +- .../unit_test/tools/TestEventCorrectness.cpp | 15 +- .../unit_test/tools/TestEventCorrectness.hpp | 15 +- .../core/unit_test/tools/TestIndependence.cpp | 15 +- .../core/unit_test/tools/TestKernelNames.cpp | 34 +- .../unit_test/tools/TestLogicalSpaces.cpp | 15 +- .../unit_test/tools/TestProfilingSection.cpp | 15 +- .../core/unit_test/tools/TestScopedRegion.cpp | 15 +- .../tools/TestToolsInitialization.cpp | 20 +- .../core/unit_test/tools/TestTuning.cpp | 20 +- .../tools/TestWithoutInitializing.cpp | 20 +- .../tools/include/ToolTestingUtilities.hpp | 20 +- .../core/unit_test/tools/printing-tool.cpp | 15 +- .../view/TestAllocationAndSpanSize.hpp | 71 + .../core/unit_test/view/TestBasicView.hpp | 21 +- .../view/TestBasicViewMDSpanConversion.cpp | 22 +- .../view/TestConversionFromPointer.cpp | 21 + .../view/TestExtentsDatatypeConversion.cpp | 21 +- .../view/TestReferenceCountedAccessor.hpp | 21 +- .../view/TestReferenceCountedDataHandle.hpp | 21 +- .../unit_test/view/TestViewCustomization.cpp | 22 +- .../view/TestViewCustomizationAccessorArg.hpp | 21 +- ...stViewCustomizationAccessorFromMapping.hpp | 80 +- .../TestViewCustomizationAllocationType.hpp | 143 +- lib/kokkos/{ => docs}/CONTRIBUTING.md | 0 lib/kokkos/{ => docs}/SECURITY.md | 0 lib/kokkos/docs/TuningDesign.md | 221 ++ lib/kokkos/example/CMakeLists.txt | 5 + lib/kokkos/example/README | 9 - .../build_cmake_in_tree/CMakeLists.txt | 4 +- .../build_cmake_in_tree/cmake_example.cpp | 15 +- .../build_cmake_installed/CMakeLists.txt | 4 +- .../build_cmake_installed/cmake_example.cpp | 15 +- .../CMakeLists.txt | 4 +- .../bar.cpp | 15 +- .../foo.cpp | 15 +- .../CMakeLists.txt | 4 +- .../bar.cpp | 15 +- .../cmake_example.cpp | 15 +- .../CMakeLists.txt | 15 + .../cmake_example.cpp | 53 + .../CMakeLists.txt | 40 + .../cmake_example.cpp | 26 + .../library.cpp | 11 + .../library.cu | 11 + .../library.hip | 11 + lib/kokkos/example/make_buildlink/Makefile | 51 - lib/kokkos/example/make_buildlink/README | 2 - lib/kokkos/example/make_buildlink/main.cpp | 29 - lib/kokkos/example/query_device/Makefile | 49 - .../example/query_device/query_device.cpp | 15 +- .../example/relocatable_function/Makefile | 35 - .../example/relocatable_function/functor.cpp | 15 +- .../example/relocatable_function/main.cpp | 15 +- .../example/tutorial/01_hello_world/Makefile | 50 - .../tutorial/01_hello_world/hello_world.cpp | 15 +- .../tutorial/01_hello_world_lambda/Makefile | 51 - .../hello_world_lambda.cpp | 15 +- .../tutorial/02_simple_reduce/Makefile | 50 - .../02_simple_reduce/simple_reduce.cpp | 15 +- .../tutorial/02_simple_reduce_lambda/Makefile | 51 - .../simple_reduce_lambda.cpp | 15 +- .../example/tutorial/03_simple_view/Makefile | 51 - .../tutorial/03_simple_view/simple_view.cpp | 15 +- .../tutorial/03_simple_view_lambda/Makefile | 51 - .../simple_view_lambda.cpp | 15 +- .../tutorial/04_simple_memoryspaces/Makefile | 50 - .../simple_memoryspaces.cpp | 19 +- .../tutorial/05_simple_atomics/Makefile | 50 - .../05_simple_atomics/simple_atomics.cpp | 19 +- .../tutorial/06_simple_mdrangepolicy/Makefile | 49 - .../simple_mdrangepolicy.cpp | 15 +- .../Advanced_Views/01_data_layouts/Makefile | 50 - .../01_data_layouts/data_layouts.cpp | 15 +- .../Advanced_Views/02_memory_traits/Makefile | 50 - .../02_memory_traits/memory_traits.cpp | 17 +- .../Advanced_Views/03_subviews/Makefile | 50 - .../Advanced_Views/03_subviews/subviews.cpp | 15 +- .../Advanced_Views/04_dualviews/Makefile | 50 - .../Advanced_Views/04_dualviews/dual_view.cpp | 20 +- .../Advanced_Views/05_NVIDIA_UVM/Makefile | 50 - .../05_NVIDIA_UVM/uvm_example.cpp | 15 +- .../Advanced_Views/06_AtomicViews/Makefile | 50 - .../07_Overlapping_DeepCopy/CMakeLists.txt | 5 + .../07_Overlapping_DeepCopy/Makefile | 50 - .../overlapping_deepcopy.cpp | 16 +- .../tutorial/Advanced_Views/CMakeLists.txt | 4 + .../example/tutorial/Advanced_Views/Makefile | 123 -- .../Algorithms/01_random_numbers/Makefile | 50 - .../01_random_numbers/random_numbers.cpp | 15 +- .../example/tutorial/Algorithms/Makefile | 43 - lib/kokkos/example/tutorial/CMakeLists.txt | 1 + .../Debugging/01_ErrorReporter/CMakeLists.txt | 5 + .../01_ErrorReporter/error_reporter.cpp | 70 + .../example/tutorial/Debugging/CMakeLists.txt | 1 + .../01_thread_teams/Makefile | 50 - .../01_thread_teams/thread_teams.cpp | 15 +- .../01_thread_teams_lambda/Makefile | 51 - .../thread_teams_lambda.cpp | 15 +- .../02_nested_parallel_for/Makefile | 50 - .../nested_parallel_for.cpp | 15 +- .../03_vectorization/Makefile | 50 - .../03_vectorization/vectorization.cpp | 15 +- .../04_team_scan/Makefile | 50 - .../04_team_scan/team_scan.cpp | 27 +- .../Hierarchical_Parallelism/CMakeLists.txt | 1 + .../Hierarchical_Parallelism/Makefile | 95 - lib/kokkos/example/tutorial/Makefile | 174 -- lib/kokkos/example/tutorial/README | 12 - .../example/tutorial/launch_bounds/Makefile | 58 - .../launch_bounds/launch_bounds_reduce.cpp | 15 +- .../example/virtual_functions/CMakeLists.txt | 6 + lib/kokkos/example/virtual_functions/Makefile | 57 - .../example/virtual_functions/classes.cpp | 15 +- .../example/virtual_functions/classes.hpp | 36 +- lib/kokkos/example/virtual_functions/main.cpp | 23 +- lib/kokkos/generate_makefile.bash | 497 ----- lib/kokkos/gnu_generate_makefile.bash | 423 ---- lib/kokkos/master_history.txt | 48 - lib/kokkos/simd/src/CMakeLists.txt | 16 +- lib/kokkos/simd/src/Kokkos_SIMD.cppm | 97 + lib/kokkos/simd/src/Kokkos_SIMD.hpp | 104 +- lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp | 1432 +++++++++--- lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp | 1950 ++++++++++++++--- lib/kokkos/simd/src/Kokkos_SIMD_Common.hpp | 259 ++- .../simd/src/Kokkos_SIMD_Common_Math.hpp | 160 +- lib/kokkos/simd/src/Kokkos_SIMD_Impl.cppm | 27 + lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp | 1124 ++++++++-- lib/kokkos/simd/src/Kokkos_SIMD_SVE.hpp | 1791 +++++++++++---- lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp | 183 +- lib/kokkos/simd/src/Kokkos_SIMD_dummy.cpp | 15 +- .../simd/src/impl/Kokkos_Neon_SVE_bridge.hpp | 15 +- lib/kokkos/simd/unit_tests/TestSIMD.cpp | 17 +- lib/kokkos/simd/unit_tests/UnitTestMain.cpp | 20 +- .../unit_tests/include/SIMDTesting_Ops.hpp | 94 +- .../include/SIMDTesting_Utilities.hpp | 50 +- .../unit_tests/include/TestSIMD_Condition.hpp | 21 +- .../include/TestSIMD_Construction.hpp | 59 +- .../include/TestSIMD_Conversions.hpp | 264 +-- .../include/TestSIMD_GeneratorCtors.hpp | 38 +- .../unit_tests/include/TestSIMD_LoadStore.hpp | 236 ++ .../unit_tests/include/TestSIMD_MaskOps.hpp | 21 +- .../unit_tests/include/TestSIMD_MathOps.hpp | 49 +- .../include/TestSIMD_Reductions.hpp | 21 +- .../unit_tests/include/TestSIMD_ShiftOps.hpp | 47 +- .../include/TestSIMD_WhereExpressions.hpp | 227 -- lib/kokkos/tpls/desul-hash.txt | 1 + lib/kokkos/tpls/desul/Config.hpp.cmake.in | 4 +- .../desul/include/desul/atomics/Common.hpp | 38 +- .../desul/atomics/Compare_Exchange_CUDA.hpp | 39 +- .../desul/atomics/Compare_Exchange_GCC.hpp | 44 +- .../desul/atomics/Compare_Exchange_HIP.hpp | 18 +- .../desul/atomics/Compare_Exchange_MSVC.hpp | 16 +- .../atomics/Compare_Exchange_OpenACC.hpp | 4 + .../desul/atomics/Compare_Exchange_OpenMP.hpp | 12 +- .../desul/atomics/Compare_Exchange_SYCL.hpp | 9 +- .../include/desul/atomics/Fetch_Op_GCC.hpp | 15 + .../desul/atomics/Fetch_Op_Generic.hpp | 6 +- .../include/desul/atomics/Fetch_Op_HIP.hpp | 5 +- .../include/desul/atomics/Lock_Array_CUDA.hpp | 4 +- .../include/desul/atomics/Lock_Array_HIP.hpp | 4 +- .../include/desul/atomics/Lock_Array_SYCL.hpp | 4 +- .../atomics/Lock_Based_Fetch_Op_CUDA.hpp | 5 +- .../desul/atomics/Lock_Based_Fetch_Op_HIP.hpp | 5 +- .../atomics/Lock_Based_Fetch_Op_Host.hpp | 13 +- .../atomics/Lock_Based_Fetch_Op_OpenACC.hpp | 5 +- .../atomics/Lock_Based_Fetch_Op_SYCL.hpp | 5 +- .../desul/atomics/Lock_Free_Fetch_Op.hpp | 2 +- .../desul/include/desul/atomics/Macros.hpp | 10 + .../desul/atomics/cuda/CUDA_asm_exchange.hpp | 8 +- .../atomics/cuda/cuda_cc9_asm_exchange.inc | 19 + .../cuda/cuda_cc9_asm_exchange_memorder.inc | 26 + .../atomics/cuda/cuda_cc9_asm_exchange_op.inc | 25 + lib/kokkos/tpls/mdspan-hash.txt | 1 + .../experimental/__p0009_bits/config.hpp | 9 +- .../__p0009_bits/layout_stride.hpp | 24 +- .../experimental/__p0009_bits/macros.hpp | 8 +- .../experimental/__p0009_bits/mdspan.hpp | 122 +- .../experimental/__p0009_bits/utility.hpp | 55 +- .../experimental/__p1684_bits/mdarray.hpp | 30 +- .../__p2630_bits/strided_slice.hpp | 8 +- .../__p2630_bits/submdspan_mapping.hpp | 26 +- .../__p2642_bits/layout_padded.hpp | 284 ++- 1351 files changed, 21167 insertions(+), 43020 deletions(-) create mode 100644 lib/kokkos/COPYRIGHT.md delete mode 100644 lib/kokkos/Copyright.txt delete mode 100644 lib/kokkos/LICENSE_FILE_HEADER delete mode 100644 lib/kokkos/Makefile.kokkos delete mode 100644 lib/kokkos/Makefile.targets delete mode 100644 lib/kokkos/Spack.md create mode 100644 lib/kokkos/algorithms/perf_test/test_random.cpp delete mode 100644 lib/kokkos/algorithms/unit_tests/Makefile delete mode 100644 lib/kokkos/benchmarks/atomic/Makefile delete mode 100644 lib/kokkos/benchmarks/bytes_and_flops/Makefile delete mode 100644 lib/kokkos/benchmarks/gather/Makefile delete mode 100644 lib/kokkos/benchmarks/policy_performance/Makefile delete mode 100644 lib/kokkos/benchmarks/stream/Makefile delete mode 100644 lib/kokkos/benchmarks/view_copy_constructor/Makefile delete mode 100755 lib/kokkos/bin/runtest delete mode 100644 lib/kokkos/cmake/Modules/CudaToolkit.cmake delete mode 100644 lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake delete mode 100644 lib/kokkos/cmake/compile_tests/cplusplus17.cpp create mode 100644 lib/kokkos/cmake/compile_tests/cplusplus20.cpp delete mode 100644 lib/kokkos/cmake/kokkos_corner_cases.cmake delete mode 100644 lib/kokkos/containers/performance_tests/Makefile create mode 100644 lib/kokkos/containers/src/Kokkos_Bitset.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_DualView.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_DynRankView.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_DynRankView_Impl.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_DynamicView.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_ErrorReporter.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_Functional.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_OffsetView.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_ScatterView.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_ScatterView_Impl.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_UnorderedMap.cppm create mode 100644 lib/kokkos/containers/src/Kokkos_UnorderedMap_Impl.cppm delete mode 100644 lib/kokkos/containers/unit_tests/Makefile create mode 100644 lib/kokkos/containers/unit_tests/TestDynRankView_LayoutMember.hpp delete mode 100644 lib/kokkos/core/perf_test/Makefile create mode 100644 lib/kokkos/core/perf_test/PerfTest_Gemv.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_Stream.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_DeepCopy.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_Initialize.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_ParallelFor.cpp delete mode 100755 lib/kokkos/core/perf_test/run_taskdag.sh delete mode 100644 lib/kokkos/core/perf_test/test_taskdag.cpp delete mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp delete mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_Error.cpp delete mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp delete mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp create mode 100644 lib/kokkos/core/src/Kokkos_CheckUsage.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Core.cppm create mode 100644 lib/kokkos/core/src/Kokkos_Core_Impl.cppm delete mode 100644 lib/kokkos/core/src/Kokkos_Future.hpp delete mode 100644 lib/kokkos/core/src/Kokkos_TaskScheduler.hpp delete mode 100644 lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp delete mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp delete mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_ZeroMemset.hpp delete mode 100644 lib/kokkos/core/src/Serial/Kokkos_Serial_Task.cpp delete mode 100644 lib/kokkos/core/src/Serial/Kokkos_Serial_Task.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_ZeroMemset.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_BuiltinReducers.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_CStyleMemoryManagement.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_GraphNodeThenPolicy.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Half_ReductionIdentity.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_HostSpace_ZeroMemset.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_InitializeFinalize.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_LIFO.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_PartitionSpace.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ScopeGuard.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp create mode 100644 lib/kokkos/core/src/traits/Kokkos_StaticBatchSizeTrait.hpp delete mode 100644 lib/kokkos/core/unit_test/Makefile create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_int16.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_int8.hpp create mode 100644 lib/kokkos/core/unit_test/TestDeepCopy.hpp create mode 100644 lib/kokkos/core/unit_test/TestDeepCopy_Assignment.hpp create mode 100644 lib/kokkos/core/unit_test/TestDeepCopy_Narrowing.hpp delete mode 100644 lib/kokkos/core/unit_test/TestTaskScheduler.hpp delete mode 100644 lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp create mode 100644 lib/kokkos/core/unit_test/TestTimer.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewMove.hpp delete mode 100644 lib/kokkos/core/unit_test/configuration/test-code/CMakeLists.txt delete mode 100644 lib/kokkos/core/unit_test/configuration/test-code/Makefile delete mode 100644 lib/kokkos/core/unit_test/configuration/test-code/main.cpp delete mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config.bash delete mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_arch_list.bash delete mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_device_list.bash delete mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_options_list.bash delete mode 100755 lib/kokkos/core/unit_test/configuration/test-code/test_config_run.bash create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_GraphMultiGPU.cpp delete mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Task.cpp delete mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Task.cpp delete mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp delete mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Task.cpp delete mode 100644 lib/kokkos/core/unit_test/standalone/Makefile delete mode 100644 lib/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp create mode 100644 lib/kokkos/core/unit_test/view/TestAllocationAndSpanSize.hpp create mode 100644 lib/kokkos/core/unit_test/view/TestConversionFromPointer.cpp rename lib/kokkos/{ => docs}/CONTRIBUTING.md (100%) rename lib/kokkos/{ => docs}/SECURITY.md (100%) create mode 100644 lib/kokkos/docs/TuningDesign.md delete mode 100644 lib/kokkos/example/README create mode 100644 lib/kokkos/example/build_cmake_installed_modules/CMakeLists.txt create mode 100644 lib/kokkos/example/build_cmake_installed_modules/cmake_example.cpp create mode 100644 lib/kokkos/example/build_cmake_installed_multilanguage/CMakeLists.txt create mode 100644 lib/kokkos/example/build_cmake_installed_multilanguage/cmake_example.cpp create mode 100644 lib/kokkos/example/build_cmake_installed_multilanguage/library.cpp create mode 100644 lib/kokkos/example/build_cmake_installed_multilanguage/library.cu create mode 100644 lib/kokkos/example/build_cmake_installed_multilanguage/library.hip delete mode 100644 lib/kokkos/example/make_buildlink/Makefile delete mode 100644 lib/kokkos/example/make_buildlink/README delete mode 100644 lib/kokkos/example/make_buildlink/main.cpp delete mode 100644 lib/kokkos/example/query_device/Makefile delete mode 100644 lib/kokkos/example/relocatable_function/Makefile delete mode 100644 lib/kokkos/example/tutorial/01_hello_world/Makefile delete mode 100644 lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile delete mode 100644 lib/kokkos/example/tutorial/02_simple_reduce/Makefile delete mode 100644 lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile delete mode 100644 lib/kokkos/example/tutorial/03_simple_view/Makefile delete mode 100644 lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile delete mode 100644 lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile delete mode 100644 lib/kokkos/example/tutorial/05_simple_atomics/Makefile delete mode 100644 lib/kokkos/example/tutorial/06_simple_mdrangepolicy/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/CMakeLists.txt delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile delete mode 100644 lib/kokkos/example/tutorial/Advanced_Views/Makefile delete mode 100644 lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile delete mode 100644 lib/kokkos/example/tutorial/Algorithms/Makefile create mode 100644 lib/kokkos/example/tutorial/Debugging/01_ErrorReporter/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Debugging/01_ErrorReporter/error_reporter.cpp create mode 100644 lib/kokkos/example/tutorial/Debugging/CMakeLists.txt delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile delete mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile delete mode 100644 lib/kokkos/example/tutorial/Makefile delete mode 100644 lib/kokkos/example/tutorial/README delete mode 100644 lib/kokkos/example/tutorial/launch_bounds/Makefile create mode 100644 lib/kokkos/example/virtual_functions/CMakeLists.txt delete mode 100644 lib/kokkos/example/virtual_functions/Makefile delete mode 100755 lib/kokkos/generate_makefile.bash delete mode 100755 lib/kokkos/gnu_generate_makefile.bash delete mode 100644 lib/kokkos/master_history.txt create mode 100644 lib/kokkos/simd/src/Kokkos_SIMD.cppm create mode 100644 lib/kokkos/simd/src/Kokkos_SIMD_Impl.cppm create mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_LoadStore.hpp delete mode 100644 lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp create mode 100644 lib/kokkos/tpls/desul-hash.txt create mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc9_asm_exchange.inc create mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc9_asm_exchange_memorder.inc create mode 100644 lib/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc9_asm_exchange_op.inc create mode 100644 lib/kokkos/tpls/mdspan-hash.txt diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index f58fe779a2a..8b3203d46c6 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,95 @@ # CHANGELOG +## 5.0.0 + +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.7.01...5.0.0) + +### Features: +* Require C++20 [\#8372](https://github.com/kokkos/kokkos/pull/8372) + * Implies a new set of minimum compiler versions +* Reenabled new `mdspan` based `View` implementation (disabled in 4.7.1) +* Complete C++20 module support [\#8222](https://github.com/kokkos/kokkos/pull/8222), [\#8263](https://github.com/kokkos/kokkos/pull/8263), [\#8218](https://github.com/kokkos/kokkos/pull/8218) +* Begin removal of features deprecated in the Kokkos 4 release cycle [\#8374](https://github.com/kokkos/kokkos/pull/8374), [\#8390](https://github.com/kokkos/kokkos/pull/8390) + +### Backend and Architecture Enhancements: + +#### CUDA: +* Leverage hardware atomics for 128-bit compare-and-swap on Hopper+ [\#8025](https://github.com/kokkos/kokkos/pull/8025) +* Enable lock-free atomic_fetch_op for 128-bit types on Hopper+ [\#8511](https://github.com/kokkos/kokkos/pull/8511) +* Use `__grid_constant__` based launch mechanism: this can lead to subtle synchronization behavior change for mid-size functors [\#8529](https://github.com/kokkos/kokkos/pull/8529) + +#### HIP: +* Fix a performance regression introduced in 4.6 when using lightweight kernel in parallel reduce [\#8268](https://github.com/kokkos/kokkos/pull/8268) +* Add support for the Navi 4 architecture [\#8501](https://github.com/kokkos/kokkos/pull/8501) +* Leverage HIP atomic builtins for `atomic_fetch_{min,max}` with floating point types [\#8554](https://github.com/kokkos/kokkos/pull/8554) +* Prefer smaller block sizes for `parallel_for` with small iteration space. [\#8512](https://github.com/kokkos/kokkos/pull/8512) + +#### SYCL: +* Change SYCL::size_type from int to unsigned - the default iteration integer type [\#8542](https://github.com/kokkos/kokkos/pull/8542) + +#### OpenMPTarget: +* Update `parallel_scan()` to support RangePolicy with non-zero begin index [\#8406](https://github.com/kokkos/kokkos/pull/8406) + +#### OpenACC: +* Add partial support for pseudorandom number generator APIs in Kokkos_Random.hpp [\#8052](https://github.com/kokkos/kokkos/pull/8052) +* Update `parallel_scan()` to support RangePolicy with non-zero begin index [\#8316](https://github.com/kokkos/kokkos/pull/8316) +* Add support for `partition_space()` API [\#7920](https://github.com/kokkos/kokkos/pull/7920) +* Add support for custom reduction with range policy for scalar reduction variables [\#8307](https://github.com/kokkos/kokkos/pull/8307) + +### General Enhancements +* Optimize View initialization with adaptive `memset`/`parallel_for` selection in parallel Host backends [\#8178](https://github.com/kokkos/kokkos/pull/8178) +* `UnorderedMap` allow using `SequentialHostInit` [\#8182](https://github.com/kokkos/kokkos/pull/8182) +* Add simd type conversions [\#8085](https://github.com/kokkos/kokkos/pull/8085) +* Make `reduction_identity<[b]half_t>` member functions return `[b]half_t` instead of `float` [\#8329](https://github.com/kokkos/kokkos/pull/8329) +* Print the commit hash of the embedded dependencies at configure time and when calling `print_configuration` [\#8385](https://github.com/kokkos/kokkos/pull/8385) +* Desul atomics: Use Clang atomic min/max GCC-style builtins [\#8507](https://github.com/kokkos/kokkos/pull/8507) +* Enable running tests on systems with only 2GB of device memory [\#8552](https://github.com/kokkos/kokkos/pull/8552) +* Added `begin()`/`end()` methods to `Kokkos::Array` [\#8577](https://github.com/kokkos/kokkos/pull/8577) +* Added missing `noexcept` specifier to some methods of `Kokkos::Array` [\#8582](https://github.com/kokkos/kokkos/pull/8582) +* Updated `ErrorReporter`: adhere to Kokkos naming conventions and change `get_reports` to return values [\#8486](https://github.com/kokkos/kokkos/pull/8486) +* Added explicit loop unrolling execution policy trait (currently only effective in CUDA) [\#8164](https://github.com/kokkos/kokkos/pull/8164) + +### Build System Changes +* Change `Kokkos_ENABLE_DEPRECATED_CODE_4` default `ON -> OFF` [\#8378](https://github.com/kokkos/kokkos/pull/8378) +* Added `Kokkos_ENABLE_DEPRECATED_CODE_5` option. Default is `ON`. [\#8340](https://github.com/kokkos/kokkos/pull/8340) +* Bump CMake minimum required version to 3.22 [\#8377](https://github.com/kokkos/kokkos/pull/8377) +* Require CMake 3.25.2 for c++20 support in the CUDA language [\#8402](https://github.com/kokkos/kokkos/pull/8402) +* Make Kokkos support multiple languages [\#8167](https://github.com/kokkos/kokkos/pull/8167) +* Allow compiling to shared libraries on Windows [\#8324](https://github.com/kokkos/kokkos/pull/8324) + +### Incompatibilities (i.e. breaking changes) +* Change `simd` to use the largest available simd size by default [\#8250](https://github.com/kokkos/kokkos/pull/8250) +* `MemoryRandomAccess` does not imply `Unmanaged` [\#8368](https://github.com/kokkos/kokkos/pull/8368) +* Drop makefile support [\#8374](https://github.com/kokkos/kokkos/pull/8374) +* Remove task DAG capabilities [\#8390](https://github.com/kokkos/kokkos/pull/8390) +* Remove support for Nvidia Kepler architecture [\#8518](https://github.com/kokkos/kokkos/pull/8518) +* Bump google benchmark minimum requirement to v1.8.3 [\#8579](https://github.com/kokkos/kokkos/pull/8579) +* Check that parallel constructs invoked before initialize or after finalize will error out [\#7675](https://github.com/kokkos/kokkos/pull/7675) +* Submit graphs on the default execution space when no execution space is specified [\#8365](https://github.com/kokkos/kokkos/pull/8365) + +### Deprecations +* Deprecate `KOKKOS_ATTRIBUTE_NODISCARD` macro [\#8388](https://github.com/kokkos/kokkos/pull/8388) +* Deprecate `{Owning,Observing}RawPtr` aliases [\#8397](https://github.com/kokkos/kokkos/pull/8397) +* Deprecate support for using nested OpenMP parallel regions without nested OpenMP enabled [\#7417](https://github.com/kokkos/kokkos/pull/7417) +* Deprecate creating Kokkos::OpenMP instances inside OpenMP parallel regions [\#8488](https://github.com/kokkos/kokkos/pull/8488) +* Deprecate `Random_XorShift{64,1024}_Pool::init` [\#8082](https://github.com/kokkos/kokkos/pull/8082) +* Deprecate simd::\[const_\]where_expression [\#7960](https://github.com/kokkos/kokkos/pull/7960) +* Deprecate View::HostMirror in favor of View::host_mirror_type [\#8232](https://github.com/kokkos/kokkos/pull/8232) +* Deprecate array_type and scalar_array_type and more enums in ViewTraits [\#7360](https://github.com/kokkos/kokkos/pull/7360) +* Deprecated old member function names in (experimental) `ErrorReporter` [\#8486](https://github.com/kokkos/kokkos/pull/8486) + +### Bug Fixes +* Track modification for `resize` only if `DualView` is not using a single device [\#8273](https://github.com/kokkos/kokkos/pull/8273) +* Fix MSVC `floating-point value does not fit in required floating-point type` warning from `reduction_identity` [\#8376](https://github.com/kokkos/kokkos/pull/8376) +* Properly delete `Timer` copy constructor and copy assignment operators [\#8399](https://github.com/kokkos/kokkos/pull/8399) +* Fix RISC-V support (compiler check at configuration time and missing semicolons at compile time) [\#8439](https://github.com/kokkos/kokkos/pull/8439) +* Corrected `bit_width` return type to be `int` instead of `T` to align with the standard library [\#8509](https://github.com/kokkos/kokkos/pull/8509) +* OpenMP: fix `partition_space` with low thread counts [\#8488](https://github.com/kokkos/kokkos/pull/8488) +* Fix `parallel_reduce` on HIP and Cuda with `LaunchBounds` values smaller than 32 [\#8452](https://github.com/kokkos/kokkos/pull/8452) +* Cuda,HIP: Launch work graph on the specified instance [\#8576](https://github.com/kokkos/kokkos/pull/8576) +* Work around a performance regression related to index computation in the mdspan-based View [\#8476](https://github.com/kokkos/kokkos/pull/8476) +* Fix a failure at configure time when SVE is enabled and the tests are disabled [\#8661](https://github.com/kokkos/kokkos/pull/8661) + ## 4.7.01 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.7.00...4.7.01) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 7ab16c2fbac..0a8bcd8d906 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16 FATAL_ERROR) +cmake_minimum_required(VERSION 3.22 FATAL_ERROR) # scan source files for importing C++20 modules if(Kokkos_ENABLE_EXPERIMENTAL_CXX20_MODULES) @@ -101,19 +101,6 @@ if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_HIP) set(KOKKOS_COMPILE_LANGUAGE HIP) endif() -if(Spack_WORKAROUND) - if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) - message(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!") - endif() - - #if we are explicitly using Spack for development, - #nuke the Spack compiler - set(SPACK_CXX $ENV{SPACK_CXX}) - if(SPACK_CXX) - set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) - set(ENV{CXX} ${SPACK_CXX}) - endif() -endif() # Always call the project command to define Kokkos_ variables # and to make sure that C++ is an enabled language project(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE}) @@ -152,9 +139,9 @@ elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) endif() endif() -set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 7) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MAJOR 5) +set(Kokkos_VERSION_MINOR 0) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/lib/kokkos/COPYRIGHT.md b/lib/kokkos/COPYRIGHT.md new file mode 100644 index 00000000000..63184ffcb7c --- /dev/null +++ b/lib/kokkos/COPYRIGHT.md @@ -0,0 +1,206 @@ +# Copyright Contributors to the Kokkos Project + +This file contains a (partial) list of people who contributed to the Kokkos Project +and retain applicable copyrights to their contributions. + +For each contributor, we list their organization (if known) and the email +address associated with their commits. + +Contributors are listed for specific date ranges reflecting major Kokkos versions. +These lists are generated using `git shortlog -sne --since="YYYY-MM-DD" --until="YYYY-MM-DD"` +commands. + +## Kokkos 5 + +Date Range: present -- 2025-08-11 + +### Sandia National Laboratories (SNL) + + Under the terms of Contract DE-NA0003525 with NTESS, + the U.S. Government retains certain rights in this software. + +- Christian Trott; SNL; crtrott@sandia.gov +- Nathan Ellingwood; SNL; ndellin@sandia.gov +- Dong Hun Lee; SNL; donlee@sandia.gov +- Nicolas Morales; SNL; nmmoral@sandia.gov +- Conrad Clevenger ; SNL; tccleve@sandia.gov +- Carl Pearson; SNL; cwpears@sandia.gov +- Jan Ciesko; SNL; jan.ciesko@gmail.com + +### Oak Ridge National Laboratories (ORNL) + +- Damien Lebrun-Grandie; ORNL; dalg24@gmail.com +- Daniel Arndt; ORNL; arndtd@ornl.gov +- Bruno Turcksin; ORNL; bruno.turcksin@gmail.com +- Jakob Bludau; ORNL; bludauj@ornl.gov +- Seyong Lee; ORNL; lees2@ornl.gov + +### Commissariat à l’Energie Atomique et aux Energies Alternatives (CEA) + +- Paul Zehner; CEA; paul.zehner@cea.fr +- Trévis Morvany; CEA; trevis.morvany@cea.fr +- Paul Gannay; CEA; paul.gannay@cea.fr +- Thomas Padioleau; CEA; thomas.padioleau@cea.fr +- Cédric Chevalier; CEA; cedric.chevalier@cea.fr + +### Individual Contributors (OTHER) + +- Romin Tomasetti; OTHER; romin.tomasetti@gmail.com +- Maarten Arnst; OTHER; maarten.arnst@uliege.be + +## Kokkos 4 + +Date Range: 2025-08-11 -- 2022-09-20 + +### Sandia National Laboratories (SNL) + + Kokkos v. 4.0 + Copyright (2022) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + + Under the terms of Contract DE-NA0003525 with NTESS, + the U.S. Government retains certain rights in this software. + +- Christian Trott; SNL; crtrott@sandia.gov +- Francesco Rizzi; SNL; fnrizzi@sandia.gov +- Nathan Ellingwood; SNL; ndellin@sandia.gov +- Dong Hun Lee; SNL; donlee@sandia.gov +- Nicolas Morales; SNL; nmmoral@sandia.gov +- Conrad Clevenger ; SNL; tccleve@sandia.gov +- Carl Pearson; SNL; cwpears@sandia.gov +- Jan Ciesko; SNL; jan.ciesko@gmail.com +- Cezary Skrzyński; SNL; cezary.skrzynski@ng-analytics.com +- Arkadiusz Szczepkowicz; SNL; arek.szczepkowicz@ng-analytics.com +- Dan Ibanez; SNL; daibane@sandia.gov +- Evan Harvey; SNL; eharvey@sandia.gov + +### Oak Ridge National Laboratories (ORNL) + +- Damien Lebrun-Grandie; ORNL; dalg24@gmail.com +- Daniel Arndt; ORNL; arndtd@ornl.gov +- Bruno Turcksin; ORNL; bruno.turcksin@gmail.com +- Jakob Bludau; ORNL; bludauj@ornl.gov +- Seyong Lee; ORNL; lees2@ornl.gov +- Andrey Prokopenko; ORNL; prokopenkoav@ornl.gov + +### Commissariat à l’Energie Atomique et aux Energies Alternatives (CEA) + +- Paul Zehner; CEA; paul.zehner@cea.fr +- Thomas Padioleau; CEA; thomas.padioleau@cea.fr +- Trévis Morvany; CEA; trevis.morvany@cea.fr +- Cédric Chevalier; CEA; cedric.chevalier@cea.fr +- Paul Gannay; CEA; paul.gannay@cea.fr + +### Individual Contributors (OTHER) + +- Rahulkumar Gayatri; OTHER; rahulgayatri84@gmail.com rgayatri@lbl.gov rahulkgayatri@gmail.com +- Romin Tomasetti; OTHER; romin.tomasetti@gmail.com +- Mikael Simberg; OTHER; mikael.simberg@iki.fi +- Nevin ":-)" Liber; OTHER; nliber+github@gmail.com nliber@anl.gov +- Maarten Arnst; OTHER; maarten.arnst@uliege.be +- Christoph Junghans; OTHER; junghans@votca.org +- Timo Heister; OTHER; timo.heister@gmail.com + +## Kokkos 3 + +Date Range: 2022-09-20 -- 2019-06-24 + +### Sandia National Laboratories (SNL) + + Kokkos v. 3.0 + Copyright (2020) National Technology & Engineering + Solutions of Sandia, LLC (NTESS). + + Under the terms of Contract DE-NA0003525 with NTESS, + the U.S. Government retains certain rights in this software. + +- Christian Trott; SNL; crtrott@sandia.gov +- D. S. Hollman; SNL; dshollm@sandia.gov +- David Poliakoff; SNL; david.poliakoff@gmail.com +- Nathan Ellingwood; SNL; ndellin@sandia.gov +- Dan Ibanez; SNL; daibane@sandia.gov +- Francesco Rizzi; SNL; fnrizzi@sandia.gov +- Dong Hun Lee; SNL; donlee@sandia.gov +- Evan Harvey; SNL; eharvey@sandia.gov +- Phil Miller; SNL; pbmille@sandia.gov +- Jeff Miles; SNL; jsmiles@sandia.gov +- Nicolas Morales; SNL; nmmoral@sandia.gov +- Jeremiah Wilke; SNL; jjwilke@sandia.gov +- Jan Ciesko; SNL; jan.ciesko@gmail.com +- Cezary Skrzyński; SNL; cezary.skrzynski@ng-analytics.com +- Brian Kelley; SNL; bmkelle@sandia.gov +- Jonathan Lifflander; SNL; jliffla@sandia.gov +- Vinh Dang; SNL; vqdang@sandia.gov +- CA Lewis; SNL; canlewi@sandia.gov +- Amy Powell; SNL; ajpowel@sandia.gov +- Eric Phipps; SNL; etphipp@sandia.gov +- Samuel Browne; SNL; sebrown@sandia.gov +- Si Hammond; SNL; sdhammo@compton1.(none) + +### Oak Ridge National Laboratories (ORNL) + +- Damien Lebrun-Grandie; ORNL; dalg24@gmail.com +- Daniel Arndt; ORNL; arndtd@ornl.gov +- Bruno Turcksin; ORNL; bruno.turcksin@gmail.com +- Seyong Lee; ORNL; lees2@ornl.gov + +### Individual Contributors (OTHER) + +- Rahul Gayatri; OTHER; rgayatri@lbl.gov rahulgayatri84@gmail.com +- Nick Curtis; OTHER; nicholas.curtis@amd.com nicurtis@amd.com +- Jonathan R. Madsen; OTHER; jonathanrmadsen@gmail.com +- Christoph Junghans; OTHER; junghans@votca.org +- Mikael Simberg; OTHER; simberg@cscs.ch mikael.simberg@iki.fi +- J Todd; OTHER; joeatodd@gmail.com +- Jakob Bludau; OTHER; jakob.bludau@tum.de +- Matt Stack; OTHER; 36867242+matt-stack@users.noreply.github.com +- Jeff Hammond; OTHER; jeff.r.hammond@intel.com +- Cameron Smith; OTHER; smithc11@rpi.edu + +## Kokkos 2 + +### Sandia National Laboratories (SNL) + + Kokkos v. 2.0 + Copyright (2014) Sandia Corporation + + Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, + the U.S. Government retains certain rights in this software. + +- Christian Trott; SNL; crtrott@sandia.gov +- Carter Edwards; SNL; hcedwar@sandia.gov +- Dan Ibanez; SNL; daibane@sandia.gov +- Nathan Ellingwood; SNL; ndellin@sandia.gov +- David Hollman; SNL; dshollm@sandia.gov +- Dan Sunderland; SNL; dsunder@sandia.gov +- James Foucar; SNL; jgfouca@sandia.gov +- Steve Bova; SNL; swbova@sandia.gov +- Vinh Dang; SNL; vqdang@sandia.gov +- Greg Mackey; SNL; gemacke@sandia.gov +- Si Hammond; SNL; sdhammo@sandia.gov +- James David Stevens; SNL; jdsteve@kokkos-dev.sandia.gov +- Mark Hoemmen; SNL; mhoemme@sandia.gov +- Jeremiah Wilke; SNL; jjwilke@sandia.gov +- Jeff Miles; SNL; jsmiles@sandia.gov +- Stan Moore; SNL; stamoor@sandia.gov +- James Elliott; SNL; jjellio@sandia.gov +- Eric Phipps; SNL; etphipp@sandia.gov +- Kyungjoo Kim; SNL; kyukim@sandia.gov +- Steven W. Bova; SNL; swbova@kokkos-dev.sandia.gov + +### Oak Ridge National Laboratories (ORNL) + +- Damien Lebrun-Grandie; ORNL; dalg24@gmail.com + +### Commissariat à l’Energie Atomique et aux Energies Alternatives (CEA) + +- Pierre Kestener; CEA; pierre.kestener@cea.fr + +### Individual Contributors (OTHER) + +- Mikael Simberg; OTHER; simberg@cscs.ch mikael.simberg@iki.fi +- Chip Freitag; OTHER; chip.freitag@amd.com +- Scott Kruger; OTHER; scott.e.kruger@gmail.com +- Christoph Junghans; OTHER; junghans@votca.org +- Daniel Holladay; OTHER; dholladay00@lanl.gov + diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt deleted file mode 100644 index cbba3efc7bc..00000000000 --- a/lib/kokkos/Copyright.txt +++ /dev/null @@ -1,8 +0,0 @@ -************************************************************************ - - Kokkos v. 4.0 - Copyright (2022) National Technology & Engineering - Solutions of Sandia, LLC (NTESS). - -Under the terms of Contract DE-NA0003525 with NTESS, -the U.S. Government retains certain rights in this software. diff --git a/lib/kokkos/LICENSE_FILE_HEADER b/lib/kokkos/LICENSE_FILE_HEADER deleted file mode 100644 index 03eb04f8bdc..00000000000 --- a/lib/kokkos/LICENSE_FILE_HEADER +++ /dev/null @@ -1,15 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos deleted file mode 100644 index 5eb983a7e3b..00000000000 --- a/lib/kokkos/Makefile.kokkos +++ /dev/null @@ -1,1764 +0,0 @@ -# Default settings common options. - -#LAMMPS specific settings: - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -ifndef KOKKOS_PATH - KOKKOS_PATH=../../lib/kokkos -endif - -CXXFLAGS=$(CCFLAGS) -ifeq ($(mode),shared) - CXXFLAGS += $(SHFLAGS) -endif - - -ifneq ($(KOKKOS_USE_DEPRECATED_MAKEFILES), 1) - $(error Makefile support is deprecated. Only CMake builds will be supported from Kokkos 5 on. Set KOKKOS_USE_DEPRECATED_MAKEFILES=1 to silence this error.) -endif - -KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 7 -KOKKOS_VERSION_PATCH = 1 -KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) - -# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial -KOKKOS_DEVICES ?= "OpenMP" -#KOKKOS_DEVICES ?= "Threads" -# Options: -# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ampere87,Ada89,Hopper90,Blackwell100,Blackwell120 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace,ARMv84,ARMv84_SVE -# IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 -# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3,Zen4,Zen5 -# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_DG2,Intel_XeHP,Intel_PVC -KOKKOS_ARCH ?= "" -# Options: yes,no -KOKKOS_DEBUG ?= "no" -# Options: hwloc -KOKKOS_USE_TPLS ?= "" -# Options: c++17,c++1z,c++20,c++2a,c++23,c++2b -KOKKOS_CXX_STANDARD ?= "c++17" -# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings -KOKKOS_OPTIONS ?= "" -KOKKOS_CMAKE ?= "no" -KOKKOS_TRIBITS ?= "no" -KOKKOS_STANDALONE_CMAKE ?= "no" - -# Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,enable_malloc_async -KOKKOS_CUDA_OPTIONS ?= "" - -# Options: rdc,enable_malloc_async -KOKKOS_HIP_OPTIONS ?= "" - -# Default settings specific options. -# Options: enable_async_dispatch -KOKKOS_HPX_OPTIONS ?= "" - -#Options : force_host_as_device -KOKKOS_OPENACC_OPTIONS ?= "" - -# Helper functions for conversion to upper case -uppercase_TABLE:=a,A b,B c,C d,D e,E f,F g,G h,H i,I j,J k,K l,L m,M n,N o,O p,P q,Q r,R s,S t,T u,U v,V w,W x,X y,Y z,Z -uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(wordlist 2,$(words $1),$1),$2)),$2) -uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT) -# Return a 1 if a string contains a substring and 0 if not -# Note the search string should be without '"' -# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc) -# Will return a 1 -kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0) -# Returns 1 if the path exists, 0 otherwise -# Example: $(call kokkos_path_exists,/path/to/file) -# Will return a 1 if /path/to/file exists -kokkos_path_exists=$(if $(wildcard $1),1,0) - -# Check for general settings - -KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) -KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17) -KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z) -KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++20) -KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a) -KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23) -KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b) -KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26) -KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c) - -# Check for external libraries. -KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) - -# Check for advanced settings. -KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) -KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) -KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning) -KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) -KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) -KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) -# deprecated -KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) -KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) -KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) -KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) -KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) -KOKKOS_INTERNAL_CUDA_ENABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_malloc_async) -KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) -# deprecated -KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) -# deprecated -KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics) -KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),impl_disable_bundled_mdspan) -KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) -KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings) - -KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc) -KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),enable_malloc_async) -KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE := $(call kokkos_has_string,$(KOKKOS_OPENACC_OPTIONS),force_host_as_device) - -# Check for Kokkos Host Execution Spaces one of which must be on. -KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) -KOKKOS_INTERNAL_USE_THREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Threads) -KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) -KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 0) - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 - endif - endif -endif - -# Check for other Execution Spaces. -KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda) -KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP) -KOKKOS_INTERNAL_USE_SYCL := $(call kokkos_has_string,$(KOKKOS_DEVICES),SYCL) -KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget) -KOKKOS_INTERNAL_USE_OPENACC := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenACC) - -KOKKOS_DEVICELIST = -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - KOKKOS_DEVICELIST += Serial -endif -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_DEVICELIST += OpenMP -endif -ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - KOKKOS_DEVICELIST += Threads -endif -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - KOKKOS_DEVICELIST += HPX -endif -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_DEVICELIST += Cuda -endif -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - KOKKOS_DEVICELIST += HIP -endif -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - KOKKOS_DEVICELIST += SYCL -endif -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_DEVICELIST += OPENMPTARGET -endif -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - KOKKOS_DEVICELIST += OpenACC -endif - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - ifeq ($(origin CUDA_PATH), undefined) - CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) - endif - ifeq ($(CUDA_PATH),) - CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) - endif - KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) -endif - -# Check OS. -KOKKOS_OS := $(strip $(shell uname -s)) -KOKKOS_INTERNAL_OS_CYGWIN := $(call kokkos_has_string,$(KOKKOS_OS),CYGWIN) -KOKKOS_INTERNAL_OS_LINUX := $(call kokkos_has_string,$(KOKKOS_OS),Linux) -KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin) - -# Check compiler. -KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1)) -KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation) -KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "CC-")) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc)) -KOKKOS_INTERNAL_COMPILER_NVHPC := $(strip $(shell $(CXX) --version 2>&1 | grep -c "nvc++")) -KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) -KOKKOS_INTERNAL_COMPILER_CRAY_CLANG := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -v "error:" | grep -c "clang++")) -KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),oneAPI) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) -KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) -KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) -# TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment. -KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU) - -# Check Host Compiler if using NVCC through nvcc_wrapper -ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep -c nvcc_wrapper)) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER), 1) - - KOKKOS_CXX_HOST_VERSION := $(strip $(shell $(CXX) $(CXXFLAGS) --host-version 2>&1)) - KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),Intel Corporation) - KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),clang) - endif -endif - -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) - KOKKOS_INTERNAL_COMPILER_CLANG = 1 -endif - -# Apple Clang passes both clang and apple clang tests, so turn off clang. -ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CLANG = 0 -endif -# AMD HCC passes both clang and hcc test so turn off clang -ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) - KOKKOS_INTENAL_COMPILER_CLANG = 0 -endif -# Fujitsu passes also as clang and gcc respectively -ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) - # TODO handle gcc flags and workaround for bug? - # fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730 - $(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.) - # HACK since fujitsu only accepts some gcc flags, disable gcc here? - # KOKKOS_INTERNAL_COMPILER_GCC = 0 - endif - # TODO handle clang flags - # warnings: works fine as is - # openmp: handled - #KOKKOS_INTERNAL_COMPILER_CLANG = 0 -endif - -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - # TODO empty variable if fujitsu (clang mode) passes as clang - KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.') - - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) - $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) - endif - - KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 - endif -endif - -# Set compiler warnings flags. -ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized - else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # TODO check if cray accepts GNU style warnings - KOKKOS_INTERNAL_COMPILER_WARNINGS = - else - #gcc - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized - endif -else - KOKKOS_INTERNAL_COMPILER_WARNINGS = -endif - -# Set OpenMP flags. -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp - else ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) - # fujitsu (clang mode) fails with `=libomp` - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp - else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp - endif -else - ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment. - KOKKOS_INTERNAL_OPENMP_FLAG := - else ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -fiopenmp - else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - # Set OpenACC flags. - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) - KOKKOS_INTERNAL_OPENACC_FLAG := -acc - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_OPENACC_FLAG := -fopenacc -fopenacc-fake-async-wait -fopenacc-implicit-worker=vector -Wno-openacc-and-cxx -Wno-openmp-mapping -Wno-unknown-cuda-version -Wno-pass-failed - else - $(error Makefile.kokkos: OpenACC is enabled but the compiler must be NVHPC (got version string $(KOKKOS_CXX_VERSION))) - endif -endif - -# Set C++ version flags. -ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_INTERNAL_CXX17_FLAG := -hstd=c++17 - KOKKOS_INTERNAL_CXX1Z_FLAG := -hstd=c++1z - KOKKOS_INTERNAL_CXX20_FLAG := -hstd=c++20 - KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2a - KOKKOS_INTERNAL_CXX23_FLAG := -hstd=c++23 - KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2b -else - KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17 - KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1z - KOKKOS_INTERNAL_CXX20_FLAG := -std=c++20 - KOKKOS_INTERNAL_CXX2A_FLAG := -std=c++2a - KOKKOS_INTERNAL_CXX23_FLAG := -std=c++23 - KOKKOS_INTERNAL_CXX2B_FLAG := -std=c++2b -endif - -# Check for Kokkos Architecture settings. - -# Intel based. -KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC) -KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) -KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) -KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) -KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL) -KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) -KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) -KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) -KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) -KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) - -# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter -# matches the CMake option but we also accept the former for backward-compatibility. -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \ - + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \ - + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP)) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen) - endif -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_DG2 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG2) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG2), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_DG2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG2) -endif -KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0) - KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP) -endif -# Traditionally the architecture was called PVC instead of Intel_PVC. This -# version makes us accept IntelPVC and Intel_PVC as well. -KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) - -# NVIDIA based. -NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper -KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30) -KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler32) -KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler35) -KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler37) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell50) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell52) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53) -KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61) -KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60) -KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70) -KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72) -KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) -KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80) -KOKKOS_INTERNAL_USE_ARCH_AMPERE86 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere86) -KOKKOS_INTERNAL_USE_ARCH_AMPERE87 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere87) -KOKKOS_INTERNAL_USE_ARCH_ADA89 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ada89) -KOKKOS_INTERNAL_USE_ARCH_HOPPER90 := $(call kokkos_has_string,$(KOKKOS_ARCH),Hopper90) -KOKKOS_INTERNAL_USE_ARCH_BLACKWELL100 := $(call kokkos_has_string,$(KOKKOS_ARCH),Blackwell100) -KOKKOS_INTERNAL_USE_ARCH_BLACKWELL120 := $(call kokkos_has_string,$(KOKKOS_ARCH),Blackwell120) -KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ - + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE86) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE87) \ - + $(KOKKOS_INTERNAL_USE_ARCH_ADA89) \ - + $(KOKKOS_INTERNAL_USE_ARCH_HOPPER90) \ - + $(KOKKOS_INTERNAL_USE_ARCH_BLACKWELL100) \ - + $(KOKKOS_INTERNAL_USE_ARCH_BLACKWELL120)) - -#SEK: This seems like a bug to me -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell) - KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler) - KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50)) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) - endif - endif -endif -# ARM based. -KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80) -KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81) -KOKKOS_INTERNAL_USE_ARCH_ARMV84 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv84) -KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) -KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) -KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) -KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace) -KOKKOS_INTERNAL_USE_ARCH_ARMV84_SVE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv84_SVE) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV84)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV84_SVE) | bc)) - -# IBM based. -KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) -KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9) -KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) - -# AMD based. -KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) -KOKKOS_INTERNAL_USE_ARCH_ZEN5 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen5) -KOKKOS_INTERNAL_USE_ARCH_ZEN4 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen4) -KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) -KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN5), 0) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 0) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) - KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) - endif - endif - endif -endif - -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) - KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) -endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) -KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103)) - -# Any AVX? -KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) -KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) - -# Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) -KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) - $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) - $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) -endif - -# Generating the list of Flags. - -KOKKOS_CPPFLAGS = -KOKKOS_LIBDIRS = -ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -endif -KOKKOS_TPL_INCLUDE_DIRS = -KOKKOS_TPL_LIBRARY_DIRS = -KOKKOS_TPL_LIBRARY_NAMES = - -ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS) -endif - -KOKKOS_LIBS = -ldl -KOKKOS_TPL_LIBRARY_NAMES += dl -ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_LIBDIRS = -L$(shell pwd) - # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command - KOKKOS_CXXLDFLAGS = -L$(shell pwd) -endif -KOKKOS_LINK_FLAGS = -KOKKOS_SRC = -KOKKOS_HEADERS = - -# Generating the KokkosCore_config.h file. - -KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp -KOKKOS_CONFIG_HEADER=KokkosCore_config.h -# Functions for generating config header file -kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP)) - -# assign hash sign to variable for compat. with make 4.3 -H := \# - -# Do not append first line -tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) -tmp := $(call kokkos_append_header,"Makefile constructed configuration:") -tmp := $(call kokkos_append_header,"----------------------------------------------*/") - -tmp := $(call kokkos_append_header,'$H''if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)') -tmp := $(call kokkos_append_header,'$H''error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."') -tmp := $(call kokkos_append_header,'$H''else') -tmp := $(call kokkos_append_header,'$H''define KOKKOS_CORE_CONFIG_H') -tmp := $(call kokkos_append_header,'$H''endif') - -tmp := $(call kokkos_append_header,"") -tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)") -tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_MAJOR $(KOKKOS_VERSION_MAJOR)") -tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_MINOR $(KOKKOS_VERSION_MINOR)") -tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION_PATCH $(KOKKOS_VERSION_PATCH)") -tmp := $(call kokkos_append_header,"") - -tmp := $(call kokkos_append_header,"/* Execution Spaces */") - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP') -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_SYCL') -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_LIBS += -latomic - tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET') -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC") - ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC_FORCE_HOST_AS_DEVICE") - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP') -endif - -ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL") -endif - -#only add the c++ standard flags if this is not CMake -tmp := $(call kokkos_append_header,"/* General Settings */") -ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATED_CODE_4") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATION_WARNINGS") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) -ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) -endif - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX17") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX20), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX20_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX20") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX23), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX23_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1) - #I cannot make CMake add this in a good way - so add it here - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26") -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo - endif - - KOKKOS_CXXFLAGS += -g - KOKKOS_LDFLAGS += -g - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG") - ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK") - endif -endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN") -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TUNING") -endif - -tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LIBDL") - -ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - ifneq ($(KOKKOS_CMAKE), yes) - ifneq ($(HWLOC_PATH),) - KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LIBDIRS += -L$(HWLOC_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib - endif - KOKKOS_LIBS += -lhwloc - KOKKOS_TPL_LIBRARY_NAMES += hwloc - endif - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC") -endif - -ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS") -endif - -tmp := $(call kokkos_append_header,"/* Optimization Settings */") - -ifeq ($(KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION), 1) - # deprecated - tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") -endif - -tmp := $(call kokkos_append_header,"/* Cuda Settings */") - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - #deprecated - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - # deprecated - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_UVM") - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_CXXFLAGS += -fcuda-rdc - KOKKOS_LDFLAGS += -fcuda-rdc - else - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true - endif - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 90; echo $$?),0) - # This diagnostic is just plain wrong in CUDA 9 - # See https://github.com/kokkos/kokkos/issues/1470 - KOKKOS_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored - endif - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - KOKKOS_CXXFLAGS += -extended-lambda - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR") - KOKKOS_CXXFLAGS += -expt-relaxed-constexpr - endif - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_CONSTEXPR") - endif - endif - - ifeq ($(KOKKOS_INTERNAL_CUDA_ENABLE_MALLOC_ASYNC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") - else - tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH") - endif -endif - -# Add Architecture flags. - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a - KOKKOS_LDFLAGS += -march=armv8-a - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8.1-a - KOKKOS_LDFLAGS += -march=armv8.1-a - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV84), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV84") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8.4-a - KOKKOS_LDFLAGS += -march=armv8.4-a - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - KOKKOS_CXXFLAGS += -march=armv8.2-a+sve - KOKKOS_LDFLAGS += -march=armv8.2-a+sve - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_CXXFLAGS += -msve-vector-bits=512 - KOKKOS_LDFLAGS += -msve-vector-bits=512 - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) - KOKKOS_CXXFLAGS += -msve-vector-bits=512 - KOKKOS_LDFLAGS += -msve-vector-bits=512 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_SVE") - - KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 - KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV84_SVE), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV84_SVE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_SVE") - - # try_run() to query HW SVE vector-length - tmp := $(strip $(shell mktemp)) - KOKKOS_INTERNAL_ARCH_ARM_SVE_VL := $(strip $(shell $(CXX) -march=armv8.4-a+sve \ - $(KOKKOS_PATH)/cmake/compile_tests/get_sve_hw_vl.cpp \ - -o $(tmp) > /dev/null 2>&1 \ - && $(tmp) | sed 's/SVE_HW_VL=//' \ - ; rm -f $(tmp) )) - - KOKKOS_CXXFLAGS += -march=armv8.4-a+sve -msve-vector-bits=$(KOKKOS_INTERNAL_ARCH_ARM_SVE_VL) - KOKKOS_LDFLAGS += -march=armv8.4-a+sve -msve-vector-bits=$(KOKKOS_INTERNAL_ARCH_ARM_SVE_VL) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx2 - KOKKOS_LDFLAGS += -mavx2 - else - KOKKOS_CXXFLAGS += -march=znver1 -mtune=znver1 - KOKKOS_LDFLAGS += -march=znver1 -mtune=znver1 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx2 - KOKKOS_LDFLAGS += -mavx2 - else - KOKKOS_CXXFLAGS += -march=znver2 -mtune=znver2 - KOKKOS_LDFLAGS += -march=znver2 -mtune=znver2 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx2 - KOKKOS_LDFLAGS += -mavx2 - else - KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3 - KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN4") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else - KOKKOS_CXXFLAGS += -march=znver4 -mtune=znver4 - KOKKOS_LDFLAGS += -march=znver4 -mtune=znver4 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN5), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN5") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else - KOKKOS_CXXFLAGS += -march=znver5 -mtune=znver5 - KOKKOS_LDFLAGS += -march=znver5 -mtune=znver5 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx - KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 - KOKKOS_LDFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER7") - - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7 - KOKKOS_LDFLAGS += -mcpu=power7 -mtune=power7 -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER8") - - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_POWER9") - - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Assume that this is a really a GNU compiler. - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512MIC") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xMIC-AVX512 - KOKKOS_LDFLAGS += -xMIC-AVX512 - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Asssume that this is really a GNU compiler. - KOKKOS_CXXFLAGS += -march=knl -mtune=knl - KOKKOS_LDFLAGS += -march=knl -mtune=knl - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xSKYLAKE - KOKKOS_LDFLAGS += -xSKYLAKE - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake - KOKKOS_LDFLAGS += -march=skylake - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 - KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client - KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server - KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") - KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids - KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC") - KOKKOS_CXXFLAGS += -mmic - KOKKOS_LDFLAGS += -mmic -endif - -# Figure out the architecture flag for Cuda. -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_INTERNAL_USE_CUDA_ARCH=1 -endif -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_USE_CUDA_ARCH=1 - endif -endif -ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch - else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch - KOKKOS_CXXFLAGS += -x cuda - else - $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) - endif - KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp --offload-arch -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--offload-arch - endif -endif - -# Do not add this flag if its the cray compiler or the nvhpc compiler. -ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE87), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE87") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_87 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BLACKWELL100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_BLACKWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_BLACKWELL100") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_100 - endif - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BLACKWELL120), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_BLACKWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_BLACKWELL120") - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_120 - endif - endif -endif - -ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - endif -endif - - -# Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx906\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx906 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx908\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx908 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx90a\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx90a -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx940\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx940 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1030 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1100\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1100 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1103\"") - KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1103 -endif - - -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) - - KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_AMD_ARCH_FLAG) - KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_AMD_ARCH_FLAG) - - ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE") - KOKKOS_CXXFLAGS+=-fgpu-rdc - KOKKOS_LDFLAGS+=-fgpu-rdc - else - KOKKOS_CXXFLAGS+=-fno-gpu-rdc - KOKKOS_LDFLAGS+=-fno-gpu-rdc - endif - - ifeq ($(KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC") - else - tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC */") - endif -endif - -ifneq ($(KOKKOS_INTERNAL_USE_ARCH_AMD), 0) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_AMD_ARCH_FLAG) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_AMD_ARCH_FLAG) - endif - endif -endif - -# Figure out Intel architecture flags. -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - KOKKOS_INTERNAL_LC_BACKEND := sycl -endif -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_INTERNAL_LC_BACKEND := openmp -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG2), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG2") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg2" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.50.4" -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC") - KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device pvc" -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp) - - KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization - KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) - KOKKOS_LDFLAGS+=-fsycl - KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) - - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SYCL_RELOCATABLE_DEVICE_CODE") -endif - -ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 1) - $(warning disable_desul_atomics option has been removed. Desul atomics cannot be disabled.) - KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := 0 -endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) - $(warning enable_desul_atomics option has been removed. Desul atomics are always enabled.) -endif -KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include - -ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) - KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/mdspan/include -endif -tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") - -tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY") - -KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) - -ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) - KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep -c define)) -else - KOKKOS_INTERNAL_NEW_CONFIG := 1 -endif - -ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) - tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) - - # Functions for generating config header file - kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1) - kokkos_update_config_header = $(shell sed 's~@HEADER_GUARD_TAG@~$1~g' $2 > $3) - kokkos_append_config_header = $(shell echo $1 >> $2)) - tmp := $(call kokkos_start_config_header, "KokkosCore_Config_FwdBackend.tmp") - tmp := $(call kokkos_start_config_header, "KokkosCore_Config_SetupBackend.tmp") - tmp := $(call kokkos_start_config_header, "KokkosCore_Config_DeclareBackend.tmp") - tmp := $(call kokkos_start_config_header, "KokkosCore_Config_PostInclude.tmp") - tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp") - tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") - endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - endif -endif - -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/View/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/View/MDSpan/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) -KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) - -KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) -KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - ifneq ($(CUDA_PATH),) - KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include - ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) - KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib64 - KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 - else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) - KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib - KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib - else - $(error Can't find CUDA library directory: no lib64 or lib directory in $(CUDA_PATH)) - endif - KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) - endif - endif - KOKKOS_LIBS += -lcudart -lcuda - KOKKOS_TPL_LIBRARY_NAMES += cudart cuda -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) - ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) - endif - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) - KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenACC/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenACC/*.hpp) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) - KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENACC_LIB) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) - ifneq ($(CUDA_PATH),) - ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) - CUDA_PATH := $(CUDA_PATH:/compilers=/cuda) - endif - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifneq ($(CUDA_PATH),) - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - endif - KOKKOS_LIBS += -lcudart - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) - KOKKOS_LIBS += -cuda - endif - ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1) - $(error If a GPU architecture is specified, KOKKOS_OPENACC_OPTIONS = force_host_as_device cannot be used. Disable the force_host_as_device option) - endif - else ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifneq ($(ROCM_PATH),) - KOKKOS_CPPFLAGS += -I$(ROCM_PATH)/include - KOKKOS_LDFLAGS += -L$(ROCM_PATH)/lib - endif - KOKKOS_LIBS += -lamdhip64 - endif - ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1) - $(error If a GPU architecture is specified, KOKKOS_OPENACC_OPTIONS = force_host_as_device cannot be used. Disable the force_host_as_device option) - endif - else ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1) - # Compile for kernel execution on the host. In that case, - # memory is shared between the OpenACC space and the host space. - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) - KOKKOS_CXXFLAGS += -acc=multicore - endif - else - # Automatic fallback mode; try to offload any available GPU, and fall back - # to the host CPU if no available GPU is found. - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) - KOKKOS_CXXFLAGS += -acc=gpu,multicore - endif - endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) - endif - - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) - KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) - KOKKOS_LIBS += -lpthread - KOKKOS_TPL_LIBRARY_NAMES += pthread -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) - ifneq ($(HPX_PATH),) - ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) - KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) - KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) - KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) - else - KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) - KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) - KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) - endif - else - ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) - KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) - KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) - KOKKOS_LIBS += $(shell pkg-config --libs hpx_application_debug) - else - KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) - KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) - KOKKOS_LIBS += $(shell pkg-config --libs hpx_application) - endif - endif - KOKKOS_TPL_LIBRARY_NAMES += hpx -endif - -# With Cygwin functions such as fdopen and fileno are not defined -# when strict ansi is enabled. strict ansi gets enabled with -std=c++14 -# though. So we hard undefine it here. Not sure if that has any bad side effects -# This is needed for gtest actually, not for Kokkos itself! -ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) - KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ -endif - -# Set KokkosExtraLibs and add -lkokkos to link line -KOKKOS_EXTRA_LIBS := ${KOKKOS_LIBS} -KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS} - -# Generating the header -DESUL_INTERNAL_CONFIG_TMP=Desul_Config.tmp -DESUL_CONFIG_HEADER=desul/atomics/Config.hpp -desul_append_header = $(shell echo $1 >> $(DESUL_INTERNAL_CONFIG_TMP)) -tmp := $(call desul_append_header, "// generated by on-demand build system by crtrott" > $(DESUL_INTERNAL_CONFIG_TMP)) -tmp := $(call desul_append_header, "$H""ifndef DESUL_ATOMICS_CONFIG_HPP_") -tmp := $(call desul_append_header, "$H""define DESUL_ATOMICS_CONFIG_HPP_") -tmp := $(call desul_append_header, "") -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA */") -endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION */") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP */") -endif -ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION */") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_SYCL") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_SYCL */") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENMP") -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) - tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC") - else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */") - endif -else - tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */") -endif -tmp := $(call desul_append_header, "") -tmp := $(call desul_append_header, "$H""endif") - -DESUL_INTERNAL_LS_CONFIG := $(shell ls $(DESUL_CONFIG_HEADER) 2>&1) - -ifeq ($(DESUL_INTERNAL_LS_CONFIG), $(DESUL_CONFIG_HEADER)) - DESUL_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) -else - DESUL_INTERNAL_NEW_CONFIG := 1 -endif - -ifneq ($(DESUL_INTERNAL_NEW_CONFIG), 0) - tmp := $(shell mkdir -p desul/atomics) - tmp := $(shell cp $(DESUL_INTERNAL_CONFIG_TMP) $(DESUL_CONFIG_HEADER)) -endif - -# Setting up dependencies. - -KokkosCore_config.h: - -$(DESUL_CONFIG_HEADER): - -KOKKOS_CPP_DEPENDS := $(DESUL_CONFIG_HEADER) KokkosCore_config.h $(KOKKOS_HEADERS) - -# Tasking is deprecated -ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) - TMP_KOKKOS_SRC := $(KOKKOS_SRC) - KOKKOS_SRC = $(patsubst %Task.cpp,, $(TMP_KOKKOS_SRC)) -endif - -KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) -KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) - -include $(KOKKOS_PATH)/Makefile.targets - -kokkos-clean: - rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \ - KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \ - KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp - -libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) - ar cr libkokkos.a $(KOKKOS_OBJ_LINK) - ranlib libkokkos.a - -print-cxx-flags: - echo "$(KOKKOS_CXXFLAGS)" - -KOKKOS_LINK_DEPENDS=libkokkos.a - -#we have carefully separated LDFLAGS from LIBS and LIBDIRS -#we have also separated CPPFLAGS from CXXFLAGS -#if this is not cmake, for backwards compatibility -#we just jam everything together into the CXXFLAGS and LDFLAGS -ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += $(KOKKOS_CPPFLAGS) - KOKKOS_LDFLAGS += $(KOKKOS_LIBDIRS) -endif diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets deleted file mode 100644 index 1e0285772be..00000000000 --- a/lib/kokkos/Makefile.targets +++ /dev/null @@ -1,129 +0,0 @@ -Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp -Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp -Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp -Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp -Kokkos_Stacktrace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp -Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp -Kokkos_Command_Line_Parsing.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp -Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp -Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp -Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp -Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp -Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp -Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp -Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp -Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp -Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp -Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp - -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) -Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp -ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) -Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp -endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) -Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp -Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp -ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) -Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp -endif -Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) -Kokkos_SYCL.o : $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp -Kokkos_SYCL_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp -Kokkos_SYCL_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) -Kokkos_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP.cpp -Kokkos_HIP_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp -Kokkos_HIP_DeepCopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_DeepCopy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_DeepCopy.cpp -Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp -Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp -Kokkos_HIP_ZeroMemset.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp -Kokkos_HIP_IsXnack.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_IsXnack.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_IsXnack.cpp -Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) -Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp -Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) -Kokkos_OpenMP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp -Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp -ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) -Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp -endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) -Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp -ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) -Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp -endif -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) -Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp -Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) -Kokkos_OpenACC.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC.cpp -Kokkos_OpenACCSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACCSpace.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACCSpace.cpp -Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp -Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp -endif diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index 9b24322e456..ebbe466a109 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -32,23 +32,25 @@ To start learning about Kokkos: The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). -The current release is [4.7.01](https://github.com/kokkos/kokkos/releases/tag/4.7.01). +The current release is [5.0.0](https://github.com/kokkos/kokkos/releases/tag/5.0.0). ```bash -curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.7.01/kokkos-4.7.01.tar.gz +curl -OJ -L https://github.com/kokkos/kokkos/releases/download/5.0.0/kokkos-5.0.0.tar.gz # Or with wget -wget https://github.com/kokkos/kokkos/releases/download/4.7.01/kokkos-4.7.01.tar.gz +wget https://github.com/kokkos/kokkos/releases/download/5.0.0/kokkos-5.0.0.tar.gz +# Or with git +git clone --depth=2 --branch 5.0.0 https://github.com/kokkos/kokkos.git ``` To clone the latest development version of Kokkos from GitHub: ```bash -git clone -b develop https://github.com/kokkos/kokkos.git +git clone --branch develop https://github.com/kokkos/kokkos.git ``` ### Building Kokkos -To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. +To build Kokkos, you will need to have a C++ compiler that supports C++20 or later. All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/get-started/requirements.html). Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/get-started/building-from-source.html#configuring-and-building-kokkos). @@ -75,8 +77,5 @@ Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.htm [![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) -Under the terms of Contract DE-NA0003525 with NTESS, -the U.S. Government retains certain rights in this software. - The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or [here](https://github.com/kokkos/kokkos/blob/develop/LICENSE). diff --git a/lib/kokkos/Spack.md b/lib/kokkos/Spack.md deleted file mode 100644 index 06c763a64ee..00000000000 --- a/lib/kokkos/Spack.md +++ /dev/null @@ -1,268 +0,0 @@ -![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) - -# Kokkos Spack - -This gives instructions for using Spack to install Kokkos and developing packages that depend on Kokkos. - -## Getting Started - -Make sure you have downloaded [Spack](https://github.com/spack/spack). -The easiest way to configure the Spack environment is: -````bash -> source spack/share/spack/setup-env.sh -```` -with other scripts available for other shells. -You can display information about how to install packages with: -````bash -> spack info kokkos -```` -This will print all the information about how to install Kokkos with Spack. -For detailed instructions on how to use Spack, see the [User Manual](https://spack.readthedocs.io). - -## Setting Up Spack: Avoiding the Package Cascade -By default, Spack doesn't 'see' anything on your system - including things like CMake and CUDA. -This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be: -````yaml -packages: - cuda: - buildable: false - externals: - - prefix: /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243 - spec: cuda@10.1.243 - - modules: - - cuda/10.1.243 - spec: cuda@10.1.243 - cmake: - buildable: false - externals: - - prefix: /opt/local/ppc64le/cmake/3.16.8 - spec: cmake@3.16.8 - - modules: - - cmake/3.16.8 - spec: cmake@3.16.8 -```` -The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems). -The `buildable` flag is useful to make sure Spack crashes if there is a path error, -rather than having a type-o and Spack rebuilding everything because `cmake` isn't found. -You can verify your environment is set up correctly by running `spack graph` or `spack spec`. -For example: -````bash -> spack graph kokkos +cuda -o kokkos -|\ -o | cuda - / -o cmake -```` -Without the existing CUDA and CMake being identified in `packages.yaml`, a (subset!) of the output would be: -````bash -o kokkos -|\ -| o cmake -| |\ -| | | |\ -| | | | | |\ -| | | | | | | |\ -| | | | | | | | | |\ -| | | | | | | o | | | libarchive -| | | | | | | |\ \ \ \ -| | | | | | | | | |\ \ \ \ -| | | | | | | | | | | | |_|/ -| | | | | | | | | | | |/| | -| | | | | | | | | | | | | o curl -| | |_|_|_|_|_|_|_|_|_|_|/| -| |/| | | |_|_|_|_|_|_|_|/ -| | | | |/| | | | | | | | -| | | | o | | | | | | | | openssl -| |/| | | | | | | | | | | -| | | | | | | | | | o | | libxml2 -| | |_|_|_|_|_|_|_|/| | | -| | | | | | | | | | |\ \ \ -| o | | | | | | | | | | | | zlib -| / / / / / / / / / / / / -| o | | | | | | | | | | | xz -| / / / / / / / / / / / -| o | | | | | | | | | | rhash -| / / / / / / / / / / -| | | | o | | | | | | nettle -| | | | |\ \ \ \ \ \ \ -| | | o | | | | | | | | libuv -| | | | o | | | | | | | autoconf -| | |_|/| | | | | | | | -| | | | |/ / / / / / / -| o | | | | | | | | | perl -| o | | | | | | | | | gdbm -| o | | | | | | | | | readline -```` - -## Configuring Kokkos as a Project Dependency -Say you have a project "SuperScience" which needs to use Kokkos. -In your `package.py` file, you would generally include something like: -````python -class SuperScience(CMakePackage): - ... - depends_on("kokkos") -```` -Often projects want to tweak behavior when using certain features, e.g. -````python - depends_on("kokkos+cuda", when="+cuda") -```` -if your project needs CUDA-specific logic to configure and build. -This illustrates the general principle in Spack of "flowing-up". -A user requests a feature in the final app: -````bash -> spack install superscience+cuda -```` -This flows upstream to the Kokkos dependency, causing the `kokkos+cuda` variant to build. -The downstream app (SuperScience) tells the upstream app (Kokkos) how to build. - -Because Kokkos is a performance portability library, it somewhat inverts this principle. -Kokkos "flows-down", telling your application how best to configure for performance. -Rather than a downstream app (SuperScience) telling the upstream (Kokkos) what variants to build, -a pre-built Kokkos should be telling the downstream app SuperScience what variants to use. -Kokkos works best when there is an "expert" configuration installed on your system. -Your build should simply request `-DKokkos_ROOT=` and configure appropriately based on the Kokkos it finds. - -Kokkos has many, many build variants. -Where possible, projects should only depend on a general Kokkos, not specific variants. -We recommend instead adding for each system you build on a Kokkos configuration to your `packages.yaml` file (usually found in `~/.spack` for specific users). -For a Xeon + Volta system, this could look like: -````yaml - kokkos: - variants: +cuda +openmp +cuda_lambda +wrapper ^cuda@10.1 cuda_arch=70 - compiler: [gcc@7.2.0] -```` -which gives the "best" Kokkos configuration as CUDA+OpenMP optimized for a Volta 70 architecture using CUDA 10.1. -It also enables support for CUDA Lambdas. -The `+wrapper` option tells Kokkos to build with the special `nvcc_wrapper` (more below). -Note here that we use the built-in `cuda_arch` variant of Spack to specify the archicture. -For a Haswell system, we use -````yaml - kokkos: - variants: +openmp std=14 target=haswell - compiler: [intel@18] -```` -which uses the built-in microarchitecture variants of Spack. -Consult the Spack documentation for more details of Spack microarchitectures -and CUDA architectures. -Spack does not currently provide an AMD GPU microarchitecture option. -If building for HIP or an AMD GPU, Kokkos provides an `amd_gpu_arch` similar to `cuda_arch`. -````yaml - kokkos: - variants: +hip amd_gpu_arch=vega900 -```` - -Without an optimal default in your `packages.yaml` file, it is highly likely that the default Kokkos configuration you get will not be what you want. -For example, CUDA is not enabled by default (there is no easy logic to conditionally activate this for CUDA-enabled systems). -If you don't specify a CUDA build variant in a `packages.yaml` and you build your Kokkos-dependent project: -````bash -> spack install superscience -```` -you may end up just getting the default Kokkos (i.e. Serial). -Before running `spack install ` we recommend running `spack spec ` to confirm your dependency tree is correct. -For example, with Kokkos Kernels: -````bash -kokkos-kernels@3.0%gcc@8.3.0~blas build_type=RelWithDebInfo ~cblas~complex_double~complex_float~cublas~cuda cuda_arch=none ~cusparse~diy+double execspace_cuda=auto execspace_openmp=auto execspace_serial=auto execspace_threads=auto ~float~lapack~lapacke+layoutleft~layoutright memspace_cudaspace=auto memspace_cudauvmspace=auto +memspace_hostspace~mkl+offset_int+offset_size_t~openmp+ordinal_int~ordinal_int64_t~serial~superlu arch=linux-rhel7-skylake_avx512 - ^cmake@3.16.2%gcc@8.3.0~doc+ncurses+openssl+ownlibs~qt arch=linux-rhel7-skylake_avx512 - ^kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=14 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512 - ^cuda@10.1%gcc@8.3.0 arch=linux-rhel7-skylake_avx512 - ^kokkos-nvcc-wrapper@old%gcc@8.3.0 build_type=RelWithDebInfo +mpi arch=linux-rhel7-skylake_avx512 - ^openmpi@4.0.2%gcc@8.3.0~cuda+cxx_exceptions fabrics=none ~java~legacylaunchers~memchecker patches=073477a76bba780c67c36e959cd3ee6910743e2735c7e76850ffba6791d498e4 ~pmi schedulers=none ~sqlite3~thread_multiple+vt arch=linux-rhel7-skylake_avx512 -```` -The output can be very verbose, but we can verify the expected `kokkos`: -````bash -kokkos@3.0%gcc@8.3.0~aggressive_vectorization~amdavx~armv80~armv81~armv8_thunderx~armv8_tx2~bdw~bgq build_type=RelWithDebInfo ~carrizo~compiler_warnings+cuda cuda_arch=none +cuda_lambda~cuda_ldg_intrinsic~cuda_relocatable_device_code~cuda_uvm~debug~debug_bounds_check~debug_dualview_modify_check~deprecated_code~diy~epyc~examples~explicit_instantiation~fiji~gfx901~hpx~hpx_async_dispatch~hsw~hwloc~kaveri~kepler30~kepler32~kepler35~kepler37~knc~knl~maxwell50~maxwell52~maxwell53~memkind~numactl+openmp~pascal60~pascal61~power7~power8~power9+profiling~profiling_load_print~pthread~qthread~rocm~ryzen~serial~skx~snb std=11 ~tests~turing75~vega+volta70~volta72+wrapper~wsm arch=linux-rhel7-skylake_avx512 -```` -We see that we do have `+volta70` and `+wrapper`, e.g. - -### Spack Environments -The encouraged way to use Spack is with Spack environments ([more details here](https://spack-tutorial.readthedocs.io/en/latest/tutorial_environments.html#dealing-with-many-specs-at-once)). -Rather than installing packages one-at-a-time, you add packages to an environment. -After adding all packages, you concretize and install them all. -Using environments, one can explicitly add a desired Kokkos for the environment, e.g. -````bash -> spack add kokkos +cuda +cuda_lambda +volta70 -> spack add my_project +my_variant -> ... -> spack install -```` -All packages within the environment will build against the CUDA-enabled Kokkos, -even if they only request a default Kokkos. - -## NVCC Wrapper -Kokkos is a C++ project, but often builds for the CUDA backend. -This is particularly problematic with CMake. At this point, `nvcc` does not accept all the flags that normally get passed to a C++ compiler. -Kokkos provides `nvcc_wrapper` that identifies correctly as a C++ compiler to CMake and accepts C++ flags, but uses `nvcc` as the underlying compiler. -`nvcc` itself also uses an underlying host compiler, e.g. GCC. - -In Spack, the underlying host compiler is specified as below, e.g.: -````bash -> spack install package %gcc@8.0.0 -```` -This is still valid for Kokkos. To use the special wrapper for CUDA builds, request a desired compiler and simply add the `+wrapper` variant. -````bash -> spack install kokkos +cuda +wrapper %gcc@7.2.0 -```` -Downstream projects depending on Kokkos need to override their compiler. -Kokkos provides the compiler in a `kokkos_cxx` variable, -which points to either `nvcc_wrapper` when needed or the regular compiler otherwise. -Spack projects already do this to use MPI compiler wrappers. -````python -def cmake_args(self): - options = [] - ... - options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["kokkos"].kokkos_cxx) - ... - return options -```` -Note: `nvcc_wrapper` works with the MPI compiler wrappers. -If building your project with MPI, do NOT set your compiler to `nvcc_wrapper`. -Instead set your compiler to `mpicxx` and `nvcc_wrapper` will be used under the hood. -````python -def cmake_args(self): - options = [] - ... - options.append("-DCMAKE_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx) - ... - return options -```` -To accomplish this, `nvcc_wrapper` must depend on MPI (even though it uses no MPI). -This has the unfortunate consequence that Kokkos CUDA projects not using MPI will implicitly depend on MPI anyway. -This behavior is necessary for now, but will hopefully be removed later. -When using environments, if MPI is not needed, you can remove the MPI dependency with: -````bash -> spack add kokkos-nvcc-wrapper ~mpi -```` - -## Developing With Spack - -Spack has historically been much more suited to *deployment* of mature packages than active testing or developing. -However, recent features have improved support for development. -Future releases are likely to make this even easier and incorporate Git integration. -The most common commands will do a full build and install of the packages. -If doing development, you may wish to merely set up a build environment. -This allows you to modify the source and re-build. -In this case, you can stop after configuring. -Suppose you have Kokkos checkout in the folder `kokkos-src`: -````bash -> spack dev-build -d kokkos-src -u cmake kokkos@develop +wrapper +openmp -```` -This sets up a development environment for you in `kokkos-src` which you can use (Bash example shown): -Note: Always specify `develop` as the version when doing `dev-build`, except in rare cases. -You are usually developing a feature branch that will merge into `develop`, -hence you are making a new `develop` branch. - -````bash -> cd kokko-src -> source spack-build-env.txt -> cd spack-build -> make -```` -Before sourcing the Spack development environment, you may wish to save your current environment: -````bash -> declare -px > myenv.sh -```` -When done with Spack, you can then restore your original environment: -````bash -> source myenv.sh -```` diff --git a/lib/kokkos/algorithms/perf_test/CMakeLists.txt b/lib/kokkos/algorithms/perf_test/CMakeLists.txt index a41d3f891b1..a9dc04a7552 100644 --- a/lib/kokkos/algorithms/perf_test/CMakeLists.txt +++ b/lib/kokkos/algorithms/perf_test/CMakeLists.txt @@ -1,7 +1,7 @@ # FIXME: The following logic should be moved from here and also from `core/perf_test/CMakeLists.txt` to # the root `CMakeLists.txt` in the form of a macro # Find or download google/benchmark library -find_package(benchmark QUIET 1.5.6) +find_package(benchmark QUIET 1.8.3) if(benchmark_FOUND) message(STATUS "Using google benchmark found in ${benchmark_DIR}") else() @@ -13,8 +13,8 @@ else() FetchContent_Declare( googlebenchmark DOWNLOAD_EXTRACT_TIMESTAMP FALSE - URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz - URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7 + URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.tar.gz + URL_HASH MD5=7b93dd03670665684f1b2e9b70ad17fe ) FetchContent_MakeAvailable(googlebenchmark) list(POP_BACK CMAKE_MESSAGE_INDENT) @@ -61,3 +61,4 @@ function(KOKKOS_ADD_BENCHMARK NAME) endfunction() kokkos_add_benchmark(PerformanceTest_InclusiveScan SOURCES test_inclusive_scan.cpp) +kokkos_add_benchmark(PerformanceTest_Random SOURCES test_random.cpp) diff --git a/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp index 9a8e89cc483..e2e85586d2a 100644 --- a/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp +++ b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -23,11 +10,12 @@ #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.std_algorithms; #else +#include #include #endif -#include #include // FIXME: Benchmark_Context.hpp should be moved to a common location #include "../../core/perf_test/Benchmark_Context.hpp" diff --git a/lib/kokkos/algorithms/perf_test/test_random.cpp b/lib/kokkos/algorithms/perf_test/test_random.cpp new file mode 100644 index 00000000000..c98442ed6bd --- /dev/null +++ b/lib/kokkos/algorithms/perf_test/test_random.cpp @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#include +#include +#include +// FIXME: Benchmark_Context.hpp should be moved to a common location +#include "../../core/perf_test/Benchmark_Context.hpp" + +namespace Benchmark { + +// Fills each entry of +// * a view of size N +// * with the sum of K random numbers +template +static void Random(benchmark::State &state) { + const size_t N = state.range(0); + const size_t K = state.range(1); + constexpr double I = 1024; + + Kokkos::View out("out", N); + Pool random_pool(/*seed=*/12345); + + for ([[maybe_unused]] auto _ : state) { + Kokkos::parallel_for( + N, KOKKOS_LAMBDA(const int i) { + auto generator = random_pool.get_state(); + double acc = 0; + + for (size_t k = 0; k < K; ++k) { + acc += generator.drand(I); + } + random_pool.free_state(generator); + out(i) = acc; + }); + Kokkos::fence(); + } + + state.counters[KokkosBenchmark::benchmark_fom("rate")] = benchmark::Counter( + state.iterations() * N * K, benchmark::Counter::kIsRate); +} + +static void Random64(benchmark::State &state) { + return Random>(state); +} + +static void Random1024(benchmark::State &state) { + return Random>(state); +} + +#define RANDOM_ARGS() \ + ArgNames({"N", "K"}) \ + ->ArgsProduct({{1 << 21}, {1, 256}}) \ + ->UseRealTime() \ + ->Unit(benchmark::kMicrosecond) + +BENCHMARK(Random64)->RANDOM_ARGS(); +BENCHMARK(Random1024)->RANDOM_ARGS(); + +#undef RANDOM_ARGS + +} // namespace Benchmark diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt index 2802b8e7630..72a8a920e98 100644 --- a/lib/kokkos/algorithms/src/CMakeLists.txt +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -22,6 +22,7 @@ install( kokkos_add_library( kokkosalgorithms + STATIC # MSVC can't deal with empty shared libraries HEADERS ${ALGO_HEADERS} MODULE_INTERFACE diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp index ddd6374483e..6c19a1f1d29 100644 --- a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp +++ b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp @@ -1,17 +1,4 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {} diff --git a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp index 18e0674efea..8cc0be59bd5 100644 --- a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_NESTED_SORT_HPP_ #define KOKKOS_NESTED_SORT_HPP_ diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.cppm b/lib/kokkos/algorithms/src/Kokkos_Random.cppm index b94f003ab63..e2ba35d86d3 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.cppm +++ b/lib/kokkos/algorithms/src/Kokkos_Random.cppm @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project module; diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 236ec22e773..b1b978e68d3 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_RANDOM_HPP #define KOKKOS_RANDOM_HPP @@ -21,7 +8,13 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM #endif +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +import kokkos.core_impl; +#else #include +#endif #include #include #include @@ -77,13 +70,24 @@ namespace Kokkos { //Default constructor: does not initialize a pool Pool(); - //Initializing constructor: calls init(seed,Device_Specific_Number); - Pool(unsigned int seed); + //Initializing constructor + //Initialize Pool with seed as a starting seed + Pool(uint64_t seed); - //Initialize Pool with seed as a starting seed with a pool_size of num_states - //The Random_XorShift64 generator is used in serial to initialize all states, + //Initializing constructor + //Initialize Pool with seed as a starting seed and a pool_size of num_states + //Note: The generator is used in serial to initialize all states, //thus the initialization process is platform independent and deterministic. - void init(unsigned int seed, int num_states); + Pool(uint64_t seed, uint64_t num_states); + + //Initializing constructor + //Initialize Pool with seed as a starting seed using the specified execution space instance + Pool(const execution_space& exec, uint64_t seed); + + //Initializing constructor + //Initialize Pool with seed as a starting seed with a pool_size of num_states using the + //specified execution space instance + Pool(const execution_space& exec, uint64_t seed, uint64_t num_states); //Get a generator. This will lock one of the states, guaranteeing that each thread //will have its private generator. Note: on Cuda getting a state involves atomics, @@ -585,7 +589,7 @@ struct Random_XorShift1024_State { template KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v, int state_idx) - : state_(&v(state_idx, 0)), stride_(v.stride_1()) {} + : state_(&v(state_idx, 0)), stride_(v.stride(1)) {} // NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result) KOKKOS_FUNCTION @@ -941,31 +945,34 @@ class Random_XorShift64_Pool { #endif Random_XorShift64_Pool(uint64_t seed) { - init(execution_space(), seed, execution_space().concurrency()); + init_impl(execution_space(), seed, execution_space().concurrency()); execution_space().fence("Random_XorShift64_Pool: Constructor"); } Random_XorShift64_Pool(uint64_t seed, uint64_t num_states) { - init(execution_space(), seed, num_states); + init_impl(execution_space(), seed, num_states); execution_space().fence("Random_XorShift64_Pool: Constructor"); } Random_XorShift64_Pool(const execution_space& exec, uint64_t seed) { - init(exec, seed, exec.concurrency()); + init_impl(exec, seed, exec.concurrency()); } Random_XorShift64_Pool(const execution_space& exec, uint64_t seed, uint64_t num_states) { - init(exec, seed, num_states); + init_impl(exec, seed, num_states); } - void init(uint64_t seed, uint64_t num_states) { - init(execution_space(), seed, num_states); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED void init(uint64_t seed, uint64_t num_states) { + init_impl(execution_space(), seed, num_states); execution_space().fence("Random_XorShift64_Pool::init"); } +#endif private: - void init(execution_space const& exec, uint64_t seed, uint64_t num_states) { + void init_impl(execution_space const& exec, uint64_t seed, + uint64_t num_states) { num_states_ = num_states; if (seed == 0) seed = uint64_t(1318319); @@ -980,9 +987,9 @@ class Random_XorShift64_Pool { state_data_type(view_alloc(exec, "Kokkos::Random_XorShift64::state"), num_states_, padding_); - typename state_data_type::HostMirror h_state = + typename state_data_type::host_mirror_type h_state = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); - typename locks_type::HostMirror h_lock = + typename locks_type::host_mirror_type h_lock = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); // if the host mirror is the device view, need to fence here @@ -990,8 +997,9 @@ class Random_XorShift64_Pool { if (state_.data() == h_state.data()) exec.fence("Random_XorShift64_Pool::init UnifiedMemory"); - // Execute on the HostMirror's default execution space. - Random_XorShift64 + // Execute on the host_mirror_type's default execution space. + Random_XorShift64< + typename state_data_type::host_mirror_type::execution_space> gen(seed, 0); for (int i = 0; i < 17; i++) gen.rand(); for (int i = 0; i < num_states_; i++) { @@ -1217,31 +1225,34 @@ class Random_XorShift1024_Pool { #endif Random_XorShift1024_Pool(uint64_t seed) { - init(execution_space(), seed, execution_space().concurrency()); + init_impl(execution_space(), seed, execution_space().concurrency()); execution_space().fence("Random_XorShift1024_Pool: Constructor"); } Random_XorShift1024_Pool(uint64_t seed, uint64_t num_states) { - init(execution_space(), seed, num_states); + init_impl(execution_space(), seed, num_states); execution_space().fence("Random_XorShift1024_Pool: Constructor"); } Random_XorShift1024_Pool(const execution_space& exec, uint64_t seed) { - init(exec, seed, exec.concurrency()); + init_impl(exec, seed, exec.concurrency()); } Random_XorShift1024_Pool(const execution_space& exec, uint64_t seed, uint64_t num_states) { - init(exec, seed, num_states); + init_impl(exec, seed, num_states); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 void init(uint64_t seed, uint64_t num_states) { - init(execution_space(), seed, num_states); + init_impl(execution_space(), seed, num_states); execution_space().fence("Random_XorShift1024_Pool::init"); } +#endif private: - void init(execution_space const& exec, uint64_t seed, uint64_t num_states) { + void init_impl(execution_space const& exec, uint64_t seed, + uint64_t num_states) { num_states_ = num_states; if (seed == 0) seed = uint64_t(1318319); @@ -1256,11 +1267,11 @@ class Random_XorShift1024_Pool { p_ = int_view_type(view_alloc(exec, "Kokkos::Random_XorShift1024::p"), num_states_, padding_); - typename state_data_type::HostMirror h_state = + typename state_data_type::host_mirror_type h_state = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); - typename locks_type::HostMirror h_lock = + typename locks_type::host_mirror_type h_lock = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); - typename int_view_type::HostMirror h_p = + typename int_view_type::host_mirror_type h_p = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, p_); // if the host mirror is the device view, need to fence here @@ -1268,8 +1279,9 @@ class Random_XorShift1024_Pool { if (state_.data() == h_state.data()) exec.fence("Random_XorShift1024_Pool::init UnifiedMemory"); - // Execute on the HostMirror's default execution space. - Random_XorShift64 + // Execute on the host_mirror_type's default execution space. + Random_XorShift64< + typename state_data_type::host_mirror_type::execution_space> gen(seed, 0); for (int i = 0; i < 17; i++) gen.rand(); for (int i = 0; i < num_states_; i++) { diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.cppm b/lib/kokkos/algorithms/src/Kokkos_Sort.cppm index be3eb233448..fd5e752b8a5 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.cppm +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.cppm @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project module; diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 136b4ec82dc..88c54936254 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ diff --git a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.cppm b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.cppm index ce3e37a6db0..6e8269420e4 100644 --- a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.cppm +++ b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.cppm @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project module; diff --git a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp index b532a774e13..219ca0a7f2b 100644 --- a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_HPP #define KOKKOS_STD_ALGORITHMS_HPP diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp index b093b72ad64..8bd4a11e581 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ #define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp index f417b6b13b3..707c48fb74e 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp @@ -1,25 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ #define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ #include "Kokkos_BinOpsPublicAPI.hpp" #include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp index dd468e07342..afa9743fc21 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp @@ -1,24 +1,16 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ #define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ #include "impl/Kokkos_NestedSortImpl.hpp" +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp index fc73eccad68..056fb6afa5a 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp @@ -1,24 +1,16 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ #define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_ #include "./impl/Kokkos_SortByKeyImpl.hpp" +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include namespace Kokkos::Experimental { diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp index 308e9e3a008..1ba9f2873d5 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -1,25 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_SORT_PUBLIC_API_HPP_ #define KOKKOS_SORT_PUBLIC_API_HPP_ #include "./impl/Kokkos_SortImpl.hpp" #include +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp index 07f5926d82a..5143c460205 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ #define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp index 2fe58272d92..7ef52ea3ec4 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp @@ -1,23 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ #define KOKKOS_NESTED_SORT_IMPL_HPP_ +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif + +#include namespace Kokkos { namespace Experimental { diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp index 736b21ea282..4d8091377fe 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -1,23 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ #define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_ +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif + +#include #if defined(KOKKOS_ENABLE_CUDA) diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index bec8649a8e9..ffbdf307ee3 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ #define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ @@ -21,7 +8,15 @@ #include "../Kokkos_BinSortPublicAPI.hpp" #include #include +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif +#include + +#include #if defined(KOKKOS_ENABLE_CUDA) diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index f254686dbaf..cc140ef79e6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp index ac476ca5bfa..bf0529098d6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp index d6ed4c4a7e0..8466e76dfc4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP #define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp index 82356e65982..793fcde0432 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP #define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp index 38d6e6c4aa8..5a46c291e39 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_BEGIN_END_HPP #define KOKKOS_BEGIN_END_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp index c5406c72b0d..4bd0cfa66e1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP #define KOKKOS_STD_ALGORITHMS_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp index 82071a9362e..c7b55680480 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP #define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp index 599fde5737a..121a06ceb76 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp index 637d8d4cbc5..4a178ee016b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP #define KOKKOS_STD_ALGORITHMS_COPY_N_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp index f179e88baba..d2254ec7026 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp index 967cf75e7a4..07ba8333fd5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp index 8605606307e..bc3a25dcfd5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp index 593c42f87e1..e32b05cb0ee 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP #define KOKKOS_STD_ALGORITHMS_EQUAL_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp index ee3a1051264..4b562ee1365 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp index 6d805ba1bed..4ad4cabd373 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FILL_HPP #define KOKKOS_STD_ALGORITHMS_FILL_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp index 66b8cd66cc5..abb0ae8bbaa 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FILL_N_HPP #define KOKKOS_STD_ALGORITHMS_FILL_N_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp index e5e2b0e2b05..f0882682fe6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_HPP #define KOKKOS_STD_ALGORITHMS_FIND_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp index a4ec735fd59..cf31b3e0ba5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_END_HPP #define KOKKOS_STD_ALGORITHMS_FIND_END_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp index 341a70e2f25..2e44ee26590 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP #define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp index 283fab7617f..0124d2bb5d1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp index 5e17a6f539b..bde0f9424b1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index 05969be463a..f9d348069b6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp index e6fbcad891e..99a5e9b8e75 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp index a3295084eeb..7ad062930e4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp index e480062c236..17c48f336f0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp index a0e540b5e7a..4bbda929b4e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp index 42f20bc4ecb..10a0e13906a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP #define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp index 2c676c3ff34..7a17cea9675 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp index 96a17b67852..d110da6620d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp index fd5180aec90..e1934dd8357 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP #define KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "impl/Kokkos_Constraints.hpp" namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp index e13479c370b..6932e4743f9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP #define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp index d16bac5bfc3..2e128cd1718 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp index 2a53fce3e24..99376507976 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp index c3a1f73ef69..d5cc34c77d7 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp index 090afe69e37..f2ede075d9c 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MISMATCH_HPP #define KOKKOS_STD_ALGORITHMS_MISMATCH_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp index ac308ea1845..e1f81c53024 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MOVE_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp index 2789ab21796..f184d6d459e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp index f7baab3fc0f..e1e54a69c4e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_NONE_OF_HPP #define KOKKOS_STD_ALGORITHMS_NONE_OF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp index a1feee8d6d7..2dc855b1e93 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp index 60cbeeda875..5f7b15be9b6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index ea7e55ca619..d4c4804501e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REDUCE_HPP #define KOKKOS_STD_ALGORITHMS_REDUCE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp index 8a429d8d518..0c907064998 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp index 4b8fa9fe077..69ec297311b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp index 45e2b54bb6f..bf96c5b9ecb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp index 38461a37f26..ac3a1f6aea9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp index 29afc4f0c21..1511a5cf23a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp index 04d5767e895..5ebf57fd0b3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp index b87163f194f..e208d71cdc9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp index 73af1f16f02..d916e84db39 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp index a0786d3a2eb..35f9016f658 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp index 66f39c4eaa6..c0415fa380a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp index aff04b47d63..ca9751e66cd 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp index cce37fccfae..aa97dafc873 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp index 43258a484ec..4d3f71fe7e3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp index 0f8aa5f1c13..6c36c1506ba 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp index b3e04a3b974..d4e930a5e39 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp index 0f7ed539487..b847b934f93 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp index 99a9c6c8a94..eb8dd39e568 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP #define KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp index 84cbed524d3..7a711950540 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp index 37fc0f860ee..2c41f932657 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp index 5f694dbfd98..040aa091ee2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index 89585ddbea0..2c97e42f5e8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp index 2d56315f616..974f4b830db 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp index 4a32d7e095d..78510e7b6d9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index 9f7fcf94fe0..6e50e86094e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index f30b7be06a6..a88df9c1907 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp index bdc050f9c19..a10c9a62914 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 7b3a4fe8762..6949488a655 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -1,25 +1,21 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ #define KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif +#include + #include +#include + namespace Kokkos { namespace Experimental { namespace Impl { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp index 0f68c9e9780..5900a17d125 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp index 86e99ecbd0c..823a7c58dce 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index ef39be6366a..91436f06d4b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_MustUseKokkosSingleInTeam.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp index 9b6b403aa49..6b962286692 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp index 62b7d226f63..39edd0e585e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 08e04810f67..15732962f95 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index 972e57f2ccb..a598362673f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp index 1f1ec5e54f6..ef8b5733285 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index 145e235b9dd..8660ec55749 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index 8fffb59094a..e7780796c0e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index 99cc4a1cf3a..d1a978e4ec2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp index 5a7fe16984a..a16bbdf8d6d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp index 157de1125ee..7a32e2fe318 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp index db0def6f227..b9dc9cc4e17 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP #define KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp index 928508fdfb3..98d983aed5f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP #define KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index 867d0b02667..dfa5e892662 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 17a1eff2962..55fd8cba2ac 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp index b2c912848a3..609de85ae45 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index d33580ca537..e12781a84c0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index b95a66c3bd9..67a3dd75c5c 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp index 2f51db03b46..ea1bd1e9317 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp index dfe96aaf586..edc825547bc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MISMATCH_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_MISMATCH_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp index 5110c51d414..ff04e1d03c6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MOVE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp index dc910861d50..3a33fc37478 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MustUseKokkosSingleInTeam.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MustUseKokkosSingleInTeam.hpp index 162c72c2db7..a3c78ba63bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MustUseKokkosSingleInTeam.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MustUseKokkosSingleInTeam.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_MUSTUSEKOKKOSSINGLEINTEAM_HPP #define KOKKOS_STD_ALGORITHMS_MUSTUSEKOKKOSSINGLEINTEAM_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif namespace Kokkos { namespace Experimental { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp index 35e9cfa53e2..6333bcc950b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp index 7ec7061e316..635f1bb2099 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_PARTITION_POINT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_PARTITION_POINT_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp index ee788378f43..5e6851ced8e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP #define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP @@ -29,27 +16,6 @@ namespace Impl { template class RandomAccessIterator; -namespace { - -template -struct is_always_strided { - static_assert(is_view_v); - - constexpr static bool value = -#ifdef KOKKOS_ENABLE_IMPL_MDSPAN - decltype(std::declval().to_mdspan())::is_always_strided(); -#else - (std::is_same_v || - std::is_same_v || - std::is_same_v); -#endif -}; - -} // namespace - template class RandomAccessIterator<::Kokkos::View> { public: @@ -69,41 +35,36 @@ class RandomAccessIterator<::Kokkos::View> { using is_passed_directly = std::true_type; #endif - static_assert(view_type::rank == 1 && - is_always_strided<::Kokkos::View>::value); + private: + static constexpr bool view_is_always_strided = +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN + decltype(std::declval().to_mdspan())::is_always_strided(); +#else + (std::is_same_v || + std::is_same_v || + std::is_same_v); +#endif + + static_assert(view_type::rank == 1 && view_is_always_strided); + public: KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default; explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view) - : m_data(view.data()), m_stride(view.stride_0()) {} + : m_data(view.data()), m_stride(view.stride(0)) {} explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view, ptrdiff_t current_index) - : m_data(view.data() + current_index * view.stride_0()), - m_stride(view.stride_0()) {} + : m_data(view.data() + current_index * view.stride(0)), + m_stride(view.stride(0)) {} -#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond template requires(std::is_constructible_v) KOKKOS_FUNCTION explicit(!std::is_convertible_v) RandomAccessIterator(const RandomAccessIterator& other) : m_data(other.m_data), m_stride(other.m_stride) {} -#else - template < - class OtherViewType, - std::enable_if_t && - !std::is_convertible_v, - int> = 0> - KOKKOS_FUNCTION explicit RandomAccessIterator( - const RandomAccessIterator& other) - : m_data(other.m_data), m_stride(other.m_stride) {} - - template , - int> = 0> - KOKKOS_FUNCTION RandomAccessIterator( - const RandomAccessIterator& other) - : m_data(other.m_data), m_stride(other.m_stride) {} -#endif KOKKOS_FUNCTION iterator_type& operator++() { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp index ab01cdd8048..0eb7a35dcee 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REDUCE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REDUCE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp" @@ -124,7 +116,7 @@ ValueType reduce_default_functors_exespace_impl( Impl::static_assert_is_not_openmptarget(ex); Impl::expect_valid_range(first, last); - using value_type = Kokkos::Impl::remove_cvref_t; + using value_type = std::remove_cvref_t; if (::Kokkos::is_detected::value) { if (first == last) { @@ -199,7 +191,7 @@ KOKKOS_FUNCTION ValueType reduce_default_functors_team_impl( Impl::static_assert_is_not_openmptarget(teamHandle); Impl::expect_valid_range(first, last); - using value_type = Kokkos::Impl::remove_cvref_t; + using value_type = std::remove_cvref_t; if (::Kokkos::is_detected::value) { if (first == last) { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp index b460982d334..674c7427b78 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REDUCER_WITH_ARBITRARY_JOINER_NONEUTRAL_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_REDUCER_WITH_ARBITRARY_JOINER_NONEUTRAL_ELEMENT_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" namespace Kokkos { diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp index 2c29570e99c..2001bc2c306 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REMOVE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REMOVE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp index 5b5e4147d6b..d950169f1b6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp index 61ffa9fd93d..b038a5d556a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp index 2cc38d1d0e7..bb6305145b1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp index 61e8abf44cb..e106d6536b9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IF_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REPLACE_IF_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp index e6caa072880..3951cb54dba 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp index 7aa0e4fc44c..194ab1bf06d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_REVERSE_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_REVERSE_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp index 7a4cb8e3253..d13e10bef46 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_Reverse.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp index f22dae2de48..cdd4084c65e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_ROTATE_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_ROTATE_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp index fa04350eb52..9faaf5b6a54 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp index 0910f952c0c..7da2f5d806f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp @@ -1,23 +1,16 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SEARCH_N_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_SEARCH_N_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif +#include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_AllOfAnyOfNoneOf.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp index 94147485071..36eb6665965 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp index 0414e6f1c25..f88c1b98401 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp index 930a14ac48c..5a995150c81 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_SWAP_RANGES_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_SWAP_RANGES_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp index c7e05733243..5eac7c91342 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp index d832f8849d1..abef3d8e64a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_EXCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_EXCLUSIVE_SCAN_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp index dc432573ee3..6ce5bf2a0ce 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp index 79bdf98915b..52b575fdd1f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp index 75f33154731..6c955922de8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp index 226fd49d169..e2aef5e1551 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_IMPL_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_MustUseKokkosSingleInTeam.hpp" diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp index 8a73b8e0f1d..108da7b30bc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_STD_ALGORITHMS_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP #define KOKKOS_STD_ALGORITHMS_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile deleted file mode 100644 index eaf616c5d62..00000000000 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ /dev/null @@ -1,127 +0,0 @@ -KOKKOS_PATH = ../.. - -GTEST_PATH = ../../TPL/gtest - -vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests - -default: build_all - echo "End Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper -else - CXX = g++ -endif - -CXXFLAGS = -O3 -LINK ?= $(CXX) -LDFLAGS ?= -override LDFLAGS += -lpthread - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -include $(KOKKOS_PATH)/Makefile.kokkos - -KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files - -TEST_TARGETS = -TARGETS = - -tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ - $(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\ - $(shell echo "$(H)include " > Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - $(shell echo "$(H)include " >> Test$(device).cpp); \ - ) \ -) - -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - OBJ_CUDA = TestCuda.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_Cuda - TEST_TARGETS += test-cuda -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - OBJ_HIP = TestHIP.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_HIP - TEST_TARGETS += test-hip -endif - -ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) - OBJ_THREADS = TestThreads.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_Threads - TEST_TARGETS += test-threads -endif - -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_OpenMP - TEST_TARGETS += test-openmp -endif - -ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - OBJ_HPX = TestHPX.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_HPX - TEST_TARGETS += test-hpx -endif - -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = TestSerial.o TestStdAlgorithmsCommon.o UnitTestMain.o gtest-all.o - TARGETS += KokkosAlgorithms_UnitTest_Serial - TEST_TARGETS += test-serial -endif - -KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda - -KokkosAlgorithms_UnitTest_HIP: $(OBJ_HIP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_HIP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_HIP - -KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads - -KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP - -KokkosAlgorithms_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_HPX - -KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial - -test-cuda: KokkosAlgorithms_UnitTest_Cuda - ./KokkosAlgorithms_UnitTest_Cuda - -test-hip: KokkosAlgorithms_UnitTest_HIP - ./KokkosAlgorithms_UnitTest_HIP - -test-threads: KokkosAlgorithms_UnitTest_Threads - ./KokkosAlgorithms_UnitTest_Threads - -test-openmp: KokkosAlgorithms_UnitTest_OpenMP - ./KokkosAlgorithms_UnitTest_OpenMP - -test-hpx: KokkosAlgorithms_UnitTest_HPX - ./KokkosAlgorithms_UnitTest_HPX - -test-serial: KokkosAlgorithms_UnitTest_Serial - ./KokkosAlgorithms_UnitTest_Serial - -build_all: $(TARGETS) - -test: $(TEST_TARGETS) - -clean: kokkos-clean - rm -f *.o $(TARGETS) - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< - -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc diff --git a/lib/kokkos/algorithms/unit_tests/TestBinSortA.hpp b/lib/kokkos/algorithms/unit_tests/TestBinSortA.hpp index 6200b48d5aa..eda035916e7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestBinSortA.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestBinSortA.hpp @@ -1,33 +1,23 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP #include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.random; import kokkos.sort; #else +#include #include #include #endif + #include +#include namespace Test { namespace BinSortSetA { diff --git a/lib/kokkos/algorithms/unit_tests/TestBinSortB.hpp b/lib/kokkos/algorithms/unit_tests/TestBinSortB.hpp index 5645c028413..cf2f11080db 100644 --- a/lib/kokkos/algorithms/unit_tests/TestBinSortB.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestBinSortB.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP diff --git a/lib/kokkos/algorithms/unit_tests/TestNestedSort.hpp b/lib/kokkos/algorithms/unit_tests/TestNestedSort.hpp index 8ea39395228..05892be4da4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestNestedSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestNestedSort.hpp @@ -1,23 +1,11 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP #include +#include #include #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 7ee7403ce9e..2fade46acee 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP @@ -21,15 +8,18 @@ #include #include #include -#include #include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +import kokkos.dyn_rank_view; import kokkos.random; #else +#include +#include #include #endif +#include #include #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 7023772831b..8c15262971c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index ed267044d31..14bf0b5cd4b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -1,34 +1,25 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP #include -#include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +import kokkos.dynamic_view; import kokkos.random; import kokkos.sort; #else +#include +#include #include #include #endif +#include + namespace Test { namespace SortImpl { diff --git a/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp b/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp index 5e9590c9bbb..f55b372116d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp @@ -1,29 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP #include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.random; import kokkos.sort; #else +#include #include #include #endif diff --git a/lib/kokkos/algorithms/unit_tests/TestSortCustomComp.hpp b/lib/kokkos/algorithms/unit_tests/TestSortCustomComp.hpp index 3ab2cb52cb8..0c7380632c4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSortCustomComp.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSortCustomComp.hpp @@ -1,29 +1,17 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_CUSTOM_COMP_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_CUSTOM_COMP_HPP #include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.random; import kokkos.sort; #else +#include #include #include #endif diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp index 208b46b15f2..9c7124c04a4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp index d8b80675c9d..f2bb9da821c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -25,33 +12,6 @@ namespace AdjacentFind { namespace KE = Kokkos::Experimental; -// impl is here for std because it is only avail from c++>=17 -template -auto my_unique_copy(InputIterator first, InputIterator last, - OutputIterator result, BinaryPredicate pred) { - if (first != last) { - typename OutputIterator::value_type t(*first); - *result = t; - ++result; - while (++first != last) { - if (!pred(t, *first)) { - t = *first; - *result = t; - ++result; - } - } - } - return result; -} - -template -auto my_unique_copy(InputIterator first, InputIterator last, - OutputIterator result) { - using value_type = typename OutputIterator::value_type; - using func_t = IsEqualFunctor; - return my_unique_copy(first, last, result, func_t()); -} - template struct UnifDist; @@ -181,28 +141,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -template -IteratorType my_std_adjacent_find(IteratorType first, IteratorType last, - BinaryPredicate p) { - if (first == last) { - return last; - } - IteratorType next = first; - ++next; - for (; next != last; ++next, ++first) { - if (p(*first, *next)) { - return first; - } - } - return last; -} - -template -IteratorType my_std_adjacent_find(IteratorType first, IteratorType last) { - using value_type = typename IteratorType::value_type; - return my_std_adjacent_find(first, last, IsEqualFunctor()); -} - std::string value_type_to_string(int) { return "int"; } std::string value_type_to_string(double) { return "double"; } @@ -226,7 +164,7 @@ void verify(DiffType my_diff, ViewType view, Args... args) { auto view_dc = create_deep_copyable_compatible_clone(view); auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); auto std_r = - my_std_adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...); + std::adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...); const auto std_diff = std_r - KE::cbegin(view_h); ASSERT_EQ(my_diff, std_diff); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp index cccc0f6c18b..9434f3bdb5f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp index 833145bdb46..28184a7867c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index b799243c417..8df849acc9f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_COMMON_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_COMMON_HPP @@ -20,17 +7,19 @@ #include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.random; import kokkos.std_algorithms; #else +#include #include #include #endif -#include #include -#include +#include #include #include +#include namespace Test { namespace stdalgos { @@ -253,245 +242,6 @@ auto make_bounds(const ValueType1& lower, const ValueType2 upper) { return Kokkos::pair{lower, upper}; } -// libstdc++ as provided by GCC 8 does not have reduce, transform_reduce, -// exclusive_scan, inclusive_scan, transform_exclusive_scan, -// transform_inclusive_scan and for GCC 9.1, 9.2 fails to compile them for -// missing overload not accepting policy so use here simplified versions of -// them, only for testing purpose -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) - -template -ValueType testing_reduce(InputIterator first, InputIterator last, - ValueType initIn, BinaryOp binOp) { - using value_type = std::remove_const_t; - value_type init = initIn; - - while (last - first >= 4) { - ValueType v1 = binOp(first[0], first[1]); - ValueType v2 = binOp(first[2], first[3]); - ValueType v3 = binOp(v1, v2); - init = binOp(init, v3); - first += 4; - } - - for (; first != last; ++first) { - init = binOp(init, *first); - } - - return init; -} - -template -ValueType testing_reduce(InputIterator first, InputIterator last, - ValueType init) { - return testing_reduce( - first, last, init, - [](const ValueType& lhs, const ValueType& rhs) { return lhs + rhs; }); -} - -template -auto testing_reduce(InputIterator first, InputIterator last) { - using ValueType = typename InputIterator::value_type; - return testing_reduce( - first, last, ValueType{}, - [](const ValueType& lhs, const ValueType& rhs) { return lhs + rhs; }); -} - -template -ValueType testing_transform_reduce(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, ValueType initIn, - BinaryJoiner binJoiner, - BinaryTransform binTransform) { - using value_type = std::remove_const_t; - value_type init = initIn; - - while (last1 - first1 >= 4) { - ValueType v1 = binJoiner(binTransform(first1[0], first2[0]), - binTransform(first1[1], first2[1])); - - ValueType v2 = binJoiner(binTransform(first1[2], first2[2]), - binTransform(first1[3], first2[3])); - - ValueType v3 = binJoiner(v1, v2); - init = binJoiner(init, v3); - - first1 += 4; - first2 += 4; - } - - for (; first1 != last1; ++first1, ++first2) { - init = binJoiner(init, binTransform(*first1, *first2)); - } - - return init; -} - -template -ValueType testing_transform_reduce(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, ValueType init) { - return testing_transform_reduce( - first1, last1, first2, init, - [](const ValueType& lhs, const ValueType& rhs) { return lhs + rhs; }, - [](const ValueType& lhs, const ValueType& rhs) { return lhs * rhs; }); -} - -template -ValueType testing_transform_reduce(InputIterator first, InputIterator last, - ValueType initIn, BinaryJoiner binJoiner, - UnaryTransform unaryTransform) { - using value_type = std::remove_const_t; - value_type init = initIn; - - while (last - first >= 4) { - ValueType v1 = - binJoiner(unaryTransform(first[0]), unaryTransform(first[1])); - ValueType v2 = - binJoiner(unaryTransform(first[2]), unaryTransform(first[3])); - ValueType v3 = binJoiner(v1, v2); - init = binJoiner(init, v3); - first += 4; - } - - for (; first != last; ++first) { - init = binJoiner(init, unaryTransform(*first)); - } - - return init; -} - -/* - EXCLUSIVE_SCAN - */ -template -OutputIterator testing_exclusive_scan(InputIterator first, InputIterator last, - OutputIterator result, ValueType initIn, - BinaryOp binOp) { - using value_type = std::remove_const_t; - value_type init = initIn; - - while (first != last) { - auto v = init; - init = binOp(init, *first); - ++first; - *result++ = v; - } - - return result; -} - -template -OutputIterator testing_exclusive_scan(InputIterator first, InputIterator last, - OutputIterator result, ValueType init) { - return testing_exclusive_scan( - first, last, result, init, - [](const ValueType& lhs, const ValueType& rhs) { return lhs + rhs; }); -} - -/* - INCLUSIVE_SCAN - */ -template -OutputIterator testing_inclusive_scan(InputIterator first, InputIterator last, - OutputIterator result, BinaryOp binOp, - ValueType initIn) { - using value_type = std::remove_const_t; - value_type init = initIn; - for (; first != last; ++first) { - init = binOp(init, *first); - *result++ = init; - } - - return result; -} - -template -OutputIterator testing_inclusive_scan(InputIterator first, InputIterator last, - OutputIterator result, BinaryOp bop) { - if (first != last) { - auto init = *first; - *result++ = init; - ++first; - if (first != last) { - result = testing_inclusive_scan(first, last, result, bop, init); - } - } - return result; -} - -template -OutputIterator testing_inclusive_scan(InputIterator first, InputIterator last, - OutputIterator result) { - using ValueType = typename InputIterator::value_type; - return testing_inclusive_scan( - first, last, result, - [](const ValueType& lhs, const ValueType& rhs) { return lhs + rhs; }); -} - -/* - TRANSFORM_EXCLUSIVE_SCAN - */ -template -OutputIterator testing_transform_exclusive_scan( - InputIterator first, InputIterator last, OutputIterator result, - ValueType initIn, BinaryOp binOp, UnaryOp unaryOp) { - using value_type = std::remove_const_t; - value_type init = initIn; - - while (first != last) { - auto v = init; - init = binOp(init, unaryOp(*first)); - ++first; - *result++ = v; - } - - return result; -} - -template -OutputIterator testing_transform_inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryOp binOp, UnaryOp unaryOp, - ValueType initIn) { - using value_type = std::remove_const_t; - value_type init = initIn; - - for (; first != last; ++first) { - init = binOp(init, unaryOp(*first)); - *result++ = init; - } - - return result; -} - -template -OutputIterator testing_transform_inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryOp binOp, - UnaryOp unaryOp) { - if (first != last) { - auto init = unaryOp(*first); - *result++ = init; - ++first; - if (first != last) { - result = testing_transform_inclusive_scan(first, last, result, binOp, - unaryOp, init); - } - } - - return result; -} - -#endif - template auto create_random_view_and_host_clone( LayoutTagType LayoutTag, std::size_t numRows, std::size_t numCols, diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp index c80fbfcef3c..c15aae7bc1c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp @@ -1,26 +1,14 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.std_algorithms; #else +#include #include #endif -#include #include namespace Test { @@ -514,7 +502,6 @@ struct TestStruct { } }; -#ifndef KOKKOS_ENABLE_CXX17 template constexpr bool test_kokkos_iterator_satify_std_random_access_iterator_concept() { @@ -526,7 +513,6 @@ static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< Kokkos::View>()); static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< Kokkos::View>()); -#endif } // namespace compileonly } // namespace stdalgos diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 7ab7fba3de1..90cde4dc4ac 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -1,28 +1,16 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include -#include #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; import kokkos.std_algorithms; -#include #else +#include #include #endif +#include namespace Test { namespace stdalgos { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp index 7c9e8f84bfa..91e2aaf20b3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp index 32e98837090..6e3b85c1ee0 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp index ef0209b3459..7623d3f21ef 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index 4482c62c040..f339f1e789a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES @@ -121,21 +108,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -// I had to write my own because std::exclusive_scan is ONLY found with -// std=c++17 -template -void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init, - BopType bop) { - const auto num_elements = last - first; - if (num_elements > 0) { - while (first < last - 1) { - *(dest++) = init; - init = bop(*first++, init); - } - *dest = init; - } -} - template struct MultiplyFunctor { KOKKOS_INLINE_FUNCTION @@ -166,8 +138,8 @@ struct VerifyData { using gold_view_value_type = typename ViewType2::value_type; Kokkos::View gold_h( "goldh", data_view.extent(0)); - my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), - KE::begin(gold_h), init_value, bop); + std::exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), + KE::begin(gold_h), init_value, bop); auto test_view_dc = create_deep_copyable_compatible_clone(test_view); auto test_view_h = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp index 2692df69821..3041d5e3494 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp index 5a5359b0b23..c666cc2b321 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -184,59 +171,6 @@ auto create_seq(ViewType data_view, std::size_t seq_extent) { return seq_view; } -// search is only avai from c++17, so I have to put it here -template -ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, - ForwardIt2 s_last, BinaryPredicate p) { - for (;; ++first) { - ForwardIt1 it = first; - for (ForwardIt2 s_it = s_first;; ++it, ++s_it) { - if (s_it == s_last) { - return first; - } - if (it == last) { - return last; - } - if (!p(*it, *s_it)) { - break; - } - } - } -} - -// only avai from c++17, so I have to put it here -template -ForwardIt1 my_std_find_end(ForwardIt1 first, ForwardIt1 last, - ForwardIt2 s_first, ForwardIt2 s_last, - BinaryPredicate p) { - if (s_first == s_last) { - return last; - } - - ForwardIt1 result = last; - while (true) { - ForwardIt1 new_result = my_std_search(first, last, s_first, s_last, p); - if (new_result == last) { - break; - } else { - result = new_result; - first = result; - ++first; - } - } - return result; -} - -template -ForwardIt1 my_std_find_end(ForwardIt1 first, ForwardIt1 last, - ForwardIt2 s_first, ForwardIt2 s_last) { - using value_type1 = typename ForwardIt1::value_type; - using value_type2 = typename ForwardIt2::value_type; - - using pred_t = IsEqualFunctor; - return my_std_find_end(first, last, s_first, s_last, pred_t()); -} - std::string value_type_to_string(int) { return "int"; } std::string value_type_to_string(double) { return "double"; } @@ -273,8 +207,8 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, auto view_h = create_host_space_copy(view); auto s_view_h = create_host_space_copy(s_view); auto stdrit = - my_std_find_end(KE::cbegin(view_h), KE::cend(view_h), - KE::cbegin(s_view_h), KE::cend(s_view_h), args...); + std::find_end(KE::cbegin(view_h), KE::cend(view_h), KE::cbegin(s_view_h), + KE::cend(s_view_h), args...); { auto myrit = KE::find_end(exespace(), KE::cbegin(view), KE::cend(view), diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp index d77edb5fed3..8939e7f5541 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp index b24730ff009..7ffdc829b07 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp index 2b3361743e4..9e35c7fa572 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp @@ -1,23 +1,15 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_HELPERS_FUNCTORS_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_HELPERS_FUNCTORS_HPP +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include namespace Test { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index d27c33f7643..df3dcef39ed 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES @@ -121,32 +108,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -// my own because std::inclusive_scan is ONLY found with std=c++17 -template -void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop) { - if (first != last) { - auto init = *first; - *dest = init; - while (++first < last) { - init = bop(*first, init); - *(++dest) = init; - } - } -} - -template -void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop, - ValType init) { - if (first != last) { - init = bop(*first, init); - *dest = init; - while (++first < last) { - init = bop(*first, init); - *(++dest) = init; - } - } -} - template struct MultiplyFunctor { KOKKOS_INLINE_FUNCTION @@ -177,8 +138,8 @@ struct VerifyData { using gold_view_value_type = typename ViewType2::value_type; Kokkos::View gold_h( "goldh", data_view.extent(0)); - my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), - KE::begin(gold_h), bop, args...); + std::inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), + KE::begin(gold_h), bop, args...); auto test_view_dc = create_deep_copyable_compatible_clone(test_view); auto test_view_h = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp index f5d7e34b85f..13fa517b15e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index c6ace67788c..51d48ca2600 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp index 5d9e7db803c..f387e096e16 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp index 4d678b241be..14433e9d97e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -436,8 +423,7 @@ TEST_F(std_algorithms_min_max_element_test, } #endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_CLANG) && \ - (KOKKOS_COMPILER_CLANG >= 1300) +#if defined(KOKKOS_ENABLE_OPENMPTARGET) TEST_F(std_algorithms_min_max_element_test, minmax_element_empty_range) { test_minmax_element_empty_range(m_static_view); test_minmax_element_empty_range(m_dynamic_view); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp index df5df756d2a..825e01e3c47 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 1fbeab3d9d7..f667902dfc2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp index 42a17d73779..d3f0ac91506 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp index 8b91b72366f..9ca5fc52dc9 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index a3d7df533b4..494f9d5d4b0 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp index e47cacdd7d9..644394730be 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp index fafc36d3dfa..839bb359745 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp index f897e9b6574..6e3c368a957 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp index 3137880ea81..e13f2687d29 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp index d88ab5473de..2c6dc37f9df 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp index e42788799e4..e42af52b875 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp index 4596726cf3c..6d3df0e3859 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp index b18c859af59..07d03f17832 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp index 82f859bac12..a0723125350 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp index 5ae2ff42785..1540c606f30 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -104,17 +91,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -// my own because std::replace_if is ONLY found with std=c++20 -template -void my_host_replace_if(ForwardIt first, ForwardIt last, UnaryPredicate p, - const T& new_value) { - for (; first != last; ++first) { - if (p(*first)) { - *first = new_value; - } - } -} - template void verify_data(ViewType1 data_view, // contains data @@ -125,8 +101,8 @@ void verify_data(ViewType1 data_view, // contains data auto data_view_dc = create_deep_copyable_compatible_clone(data_view); auto data_view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc); - my_host_replace_if(KE::begin(data_view_h), KE::end(data_view_h), pred, - new_value); + std::replace_if(KE::begin(data_view_h), KE::end(data_view_h), pred, + new_value); auto test_view_dc = create_deep_copyable_compatible_clone(test_view); auto test_view_h = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp index 3c934d64850..e1f667c6132 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index b9545e8b2e1..28165e2641b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp index 1a860c58cee..2df5db5cb3e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp index 195f88a0b73..b36b785bb5d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -184,37 +171,6 @@ auto create_seq_to_search(ViewType data_view, std::size_t seq_extent) { return seq_view; } -// search is only avai from c++17, so I have to put it here -template -ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, - ForwardIt2 s_last, BinaryPredicate p) { - for (;; ++first) { - ForwardIt1 it = first; - for (ForwardIt2 s_it = s_first;; ++it, ++s_it) { - if (s_it == s_last) { - return first; - } - if (it == last) { - return last; - } - if (!p(*it, *s_it)) { - break; - } - } - } -} - -// search is only avai from c++17, so I have to put it here -template -ForwardIt1 my_std_search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, - ForwardIt2 s_last) { - using value_type1 = typename ForwardIt1::value_type; - using value_type2 = typename ForwardIt2::value_type; - - using pred_t = IsEqualFunctor; - return my_std_search(first, last, s_first, s_last, pred_t()); -} - std::string value_type_to_string(int) { return "int"; } std::string value_type_to_string(double) { return "double"; } @@ -250,9 +206,8 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, // run std auto view_h = create_host_space_copy(view); auto s_view_h = create_host_space_copy(s_view); - auto stdrit = - my_std_search(KE::cbegin(view_h), KE::cend(view_h), KE::cbegin(s_view_h), - KE::cend(s_view_h), args...); + auto stdrit = std::search(KE::cbegin(view_h), KE::cend(view_h), + KE::cbegin(s_view_h), KE::cend(s_view_h), args...); { auto myrit = KE::search(exespace(), KE::cbegin(view), KE::cend(view), diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp index 79d88bec23f..c75cf5d8d41 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -23,50 +10,6 @@ namespace Search_n { namespace KE = Kokkos::Experimental; -// search_n is only available from c++20, so I have to put it here -template -ForwardIt my_std_search_n(ForwardIt first, ForwardIt last, Size count, - const T& value, BinaryPredicate p) { - if (count <= 0) { - return first; - } - for (; first != last; ++first) { - if (!p(*first, value)) { - continue; - } - - ForwardIt candidate = first; - Size cur_count = 0; - - while (true) { - ++cur_count; - if (cur_count >= count) { - // success - return candidate; - } - ++first; - if (first == last) { - // exhausted the list - return last; - } - if (!p(*first, value)) { - // too few in a row - break; - } - } - } - - return last; -} - -template -ForwardIt my_std_search_n(ForwardIt first, ForwardIt last, Size count, - const T& value) { - using iter_value_type = typename ForwardIt::value_type; - using p_type = IsEqualFunctor; - return my_std_search_n(first, last, count, value, p_type()); -} - std::string value_type_to_string(int) { return "int"; } std::string value_type_to_string(double) { return "double"; } @@ -195,8 +138,8 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t count, // run std auto view_h = create_host_space_copy(view); - auto stdrit = my_std_search_n(KE::cbegin(view_h), KE::cend(view_h), count, - value, args...); + auto stdrit = std::search_n(KE::cbegin(view_h), KE::cend(view_h), count, + value, args...); const auto stddiff = stdrit - KE::cbegin(view_h); { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp index 12835d5a2f7..5611dda800f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -73,32 +60,11 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -template -ForwardIterator my_std_shift_left( - ForwardIterator first, ForwardIterator last, - typename std::iterator_traits::difference_type n) { - // copied from - // https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_left.h - - if (n == 0) { - return last; - } - - ForwardIterator m = first; - for (; n > 0; --n) { - if (m == last) { - return first; - } - ++m; - } - return std::move(m, last, first); -} - template void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, std::size_t shift_value) { - auto std_rit = my_std_shift_left(KE::begin(data_view_host), - KE::end(data_view_host), shift_value); + auto std_rit = std::shift_left(KE::begin(data_view_host), + KE::end(data_view_host), shift_value); // make sure results match const auto my_diff = result_it - KE::begin(view); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp index 3e350cf3b38..fcd7945f8ea 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -73,30 +60,11 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -template -ForwardIterator my_std_shift_right( - ForwardIterator first, ForwardIterator last, - typename std::iterator_traits::difference_type n) { - // copied from - // https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_right.h - - if (n == 0) { - return first; - } - - decltype(n) d = last - first; - if (n >= d) { - return last; - } - ForwardIterator m = first + (d - n); - return std::move_backward(first, m, last); -} - template void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, std::size_t shift_value) { - auto std_rit = my_std_shift_right(KE::begin(data_view_host), - KE::end(data_view_host), shift_value); + auto std_rit = std::shift_right(KE::begin(data_view_host), + KE::end(data_view_host), shift_value); // make sure results match const auto my_diff = KE::end(view) - result_it; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp index 1dfdcfd5687..f680f579978 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp index 88fc649a9b4..c8e4bb6b5b7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAllOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAllOf.cpp index 7b3dca330af..db86e8325ba 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAllOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAllOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAnyOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAnyOf.cpp index f99617008ee..b408c828447 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAnyOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAnyOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy.cpp index 071ecd5a9a8..d4c37f8d92b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyBackward.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyBackward.cpp index c17c1810dea..21e05366599 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyBackward.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyBackward.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp index 65c55ed7f79..fab8e9a4ac9 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy_n.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy_n.cpp index 9b509af55bf..aa54e35f2be 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy_n.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopy_n.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCount.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCount.cpp index 23edf82e6e9..b6bdbc1d662 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCount.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCount.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCountIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCountIf.cpp index 5a9be4cd111..061745a3c44 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCountIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCountIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp index 592bb4c864d..ee676d82953 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp index 22affa9c37a..18212af6de3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -196,18 +183,11 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_TRUE(intraTeamSentinelView_h(i)); -// libstdc++ as provided by GCC 8 does not have exclusive_scan and -// for GCC 9.1, 9.2 fails to compile for missing overload not accepting policy -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define exclusive_scan testing_exclusive_scan -#else -#define exclusive_scan std::exclusive_scan -#endif switch (apiId) { case 0: case 1: { - auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), - KE::begin(rowDest), initValue); + auto it = std::exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), + KE::begin(rowDest), initValue); const std::size_t stdDistance = KE::distance(KE::begin(rowDest), it); ASSERT_EQ(stdDistance, distancesView_h(i)); break; @@ -216,8 +196,8 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { #ifndef KOKKOS_ENABLE_OPENMPTARGET case 2: case 3: { - auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), - KE::begin(rowDest), initValue, binaryOp); + auto it = std::exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), + KE::begin(rowDest), initValue, binaryOp); const std::size_t stdDistance = KE::distance(KE::begin(rowDest), it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -226,8 +206,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { #endif default: Kokkos::abort("unreachable"); } - -#undef exclusive_scan } if constexpr (std::is_same_v) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill.cpp index bd3ef3bd67f..f7a8c9e1cc7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill_n.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill_n.cpp index 0a97f15c176..35df1c9c621 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill_n.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFill_n.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFind.cpp index 88c5e21f312..5692491ced8 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFind.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp index 21a905be56c..0c02a4c2052 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp index ad1043362e4..01857e75d25 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp index f21f947e977..2e557bbd549 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp index 0794dc0a790..8d601678b92 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEach.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEach.cpp index 26e00c21e53..7ed0cab6c99 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEach.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEach.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEachN.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEachN.cpp index 0f4793490f2..919251ad669 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEachN.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamForEachN.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate.cpp index 28c10c95d2f..92588792760 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate_n.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate_n.cpp index 265cdf47461..cd9f9d7464a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate_n.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamGenerate_n.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp index 96009b5fd07..d1a199443e4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include "std_algorithms/Kokkos_BeginEnd.hpp" @@ -218,18 +205,10 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_TRUE(intraTeamSentinelView_h(i)); -// libstdc++ as provided by GCC 8 does not have inclusive_scan and -// for GCC 9.1, 9.2 fails to compile for missing overload not accepting policy -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define inclusive_scan testing_inclusive_scan -#else -#define inclusive_scan std::inclusive_scan -#endif - switch (apiId) { case 0: case 1: { - auto it = inclusive_scan(first, last, firstDest); + auto it = std::inclusive_scan(first, last, firstDest); const std::size_t stdDistance = KE::distance(firstDest, it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -238,7 +217,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 2: case 3: { - auto it = inclusive_scan(first, last, firstDest, binaryOp); + auto it = std::inclusive_scan(first, last, firstDest, binaryOp); const std::size_t stdDistance = KE::distance(firstDest, it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -247,7 +226,8 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 4: case 5: { - auto it = inclusive_scan(first, last, firstDest, binaryOp, initValue); + auto it = + std::inclusive_scan(first, last, firstDest, binaryOp, initValue); const std::size_t stdDistance = KE::distance(firstDest, it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -255,8 +235,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { } default: Kokkos::abort("unreachable"); } - -#undef inclusive_scan } if constexpr (std::is_same_v) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp index 21da333e75d..b3bc1aa78fd 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp index f76a595b3f4..fe19b5605ee 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp index 024d17a3122..8956ef440dd 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp @@ -1,20 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include +#include namespace Test { namespace stdalgos { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp index 9d2d2721c64..7c7405caf6e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp index 452a48df216..c93b57fb10a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp index 2c79370b926..d56b0c5f96b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp index 25a4487855b..f7133f1ed24 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp index 9b245508e38..5158be24634 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMove.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMove.cpp index 2c445dacf8e..41adbf06b01 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMove.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMove.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMoveBackward.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMoveBackward.cpp index eb46f0301f8..3bda36893f7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMoveBackward.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMoveBackward.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamNoneOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamNoneOf.cpp index 373c6c662b9..55051bac9b9 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamNoneOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamNoneOf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp index 78ab6bf1f8d..661b584cb6a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp index 370e91cc1ff..3c40ecb1000 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp index 88264b45c05..553aefbae7b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -194,21 +181,10 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_TRUE(intraTeamSentinelView_h(i)); - // libstdc++ as provided by GCC 8 does not have reduce, transform_reduce, - // exclusive_scan, inclusive_scan, transform_exclusive_scan, - // transform_inclusive_scan and for GCC 9.1, 9.2 fails to compile them for - // missing overload not accepting policy - -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define reduce testing_reduce -#else -#define reduce std::reduce -#endif - switch (apiId) { case 0: case 1: { - const ValueType result = reduce(rowFromBegin, rowFromEnd); + const ValueType result = std::reduce(rowFromBegin, rowFromEnd); if constexpr (std::is_floating_point_v) { EXPECT_FLOAT_EQ(result, reduceResultsView_h(i)); } else { @@ -220,7 +196,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 2: case 3: { - const ValueType result = reduce(rowFromBegin, rowFromEnd, initVal); + const ValueType result = std::reduce(rowFromBegin, rowFromEnd, initVal); if constexpr (std::is_floating_point_v) { EXPECT_FLOAT_EQ(result, reduceResultsView_h(i)); } else { @@ -233,7 +209,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 4: case 5: { const ValueType result = - reduce(rowFromBegin, rowFromEnd, initVal, binaryPred); + std::reduce(rowFromBegin, rowFromEnd, initVal, binaryPred); if constexpr (std::is_floating_point_v) { EXPECT_FLOAT_EQ(result, reduceResultsView_h(i)); } else { @@ -244,8 +220,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { } default: Kokkos::abort("unreachable"); } - -#undef reduce } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemove.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemove.cpp index 2defa1dc6fc..77be2c5dae7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemove.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemove.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopy.cpp index cde57021d6d..a76aa0be7de 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp index faeeb47a876..b71504b2da3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp index 3dd7cb764c6..83427a5c24c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplace.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplace.cpp index fddc0f3b5bc..358e7a7d4a2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplace.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplace.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopy.cpp index 64f172e401c..2cc0a565399 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp index 9c3699320d8..833a9690dad 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp index d79b53d3551..e43002960be 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverse.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverse.cpp index 46d27aa16b0..ea028bed003 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverse.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverse.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverseCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverseCopy.cpp index a06ae839e1f..4e896fdfbed 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverseCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReverseCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotate.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotate.cpp index 3188e029103..98b849eb98f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotate.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotate.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotateCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotateCopy.cpp index 51f600fabad..d812dd3a16f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotateCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRotateCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp index 1f0f4b6c1b9..747290e6242 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp index 6d8a34e842d..110e4e82f3b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftLeft.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftLeft.cpp index 60edb377d02..5cc99544767 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftLeft.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftLeft.cpp @@ -1,20 +1,8 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include +#include namespace Test { namespace stdalgos { @@ -71,27 +59,6 @@ struct TestFunctorA { } }; -// shift_left is only supported starting from C++20, -// so put here a working version of the std algo copied from -// https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_left.h -template -ForwardIterator my_std_shift_left( - ForwardIterator first, ForwardIterator last, - typename std::iterator_traits::difference_type n) { - if (n == 0) { - return last; - } - - ForwardIterator m = first; - for (; n > 0; --n) { - if (m == last) { - return first; - } - ++m; - } - return std::move(m, last, first); -} - template void test_A(std::size_t numTeams, std::size_t numCols, std::size_t shift, int apiId) { @@ -138,7 +105,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, std::size_t shift, auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView); for (std::size_t i = 0; i < cloneOfDataViewBeforeOp_h.extent(0); ++i) { auto myRow = Kokkos::subview(cloneOfDataViewBeforeOp_h, i, Kokkos::ALL()); - auto it = my_std_shift_left(KE::begin(myRow), KE::end(myRow), shift); + auto it = std::shift_left(KE::begin(myRow), KE::end(myRow), shift); const std::size_t stdDistance = KE::distance(KE::begin(myRow), it); ASSERT_EQ(stdDistance, distancesView_h(i)); ASSERT_TRUE(intraTeamSentinelView_h(i)); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftRight.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftRight.cpp index 08ff8fbbca6..b347998efae 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftRight.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamShiftRight.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -71,25 +58,6 @@ struct TestFunctorA { } }; -// shift_right is only supported starting from C++20, -// so put here a working version of the std algo copied from -// https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/shift_right.h -template -ForwardIterator my_std_shift_right( - ForwardIterator first, ForwardIterator last, - typename std::iterator_traits::difference_type n) { - if (n == 0) { - return first; - } - - decltype(n) d = last - first; - if (n >= d) { - return last; - } - ForwardIterator m = first + (d - n); - return std::move_backward(first, m, last); -} - template void test_A(std::size_t numTeams, std::size_t numCols, std::size_t shift, int apiId) { @@ -136,7 +104,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, std::size_t shift, auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView); for (std::size_t i = 0; i < cloneOfDataViewBeforeOp_h.extent(0); ++i) { auto myRow = Kokkos::subview(cloneOfDataViewBeforeOp_h, i, Kokkos::ALL()); - auto it = my_std_shift_right(KE::begin(myRow), KE::end(myRow), shift); + auto it = std::shift_right(KE::begin(myRow), KE::end(myRow), shift); const std::size_t stdDistance = KE::distance(KE::begin(myRow), it); ASSERT_EQ(stdDistance, distancesView_h(i)); ASSERT_TRUE(intraTeamSentinelView_h(i)); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSwapRanges.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSwapRanges.cpp index 60cb3f08377..018ebeeee30 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSwapRanges.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSwapRanges.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformBinaryOp.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformBinaryOp.cpp index b3557d8afb3..2ac467cac9c 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformBinaryOp.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformBinaryOp.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp index 60e199a350e..b7c0b7da24e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -185,18 +172,10 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { auto initValue = initValuesView_h(i); ASSERT_TRUE(intraTeamSentinelView_h(i)); -// libstdc++ as provided by GCC 8 does not have transform_exclusive_scan and -// for GCC 9.1, 9.2 fails to compile for missing overload not accepting policy -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define transform_exclusive_scan testing_transform_exclusive_scan -#else -#define transform_exclusive_scan std::transform_exclusive_scan -#endif - switch (apiId) { case 0: case 1: { - auto it = transform_exclusive_scan( + auto it = std::transform_exclusive_scan( KE::cbegin(rowFrom), KE::cend(rowFrom), KE::begin(rowDest), initValue, binaryOp, unaryOp); const std::size_t stdDistance = KE::distance(KE::begin(rowDest), it); @@ -205,8 +184,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { } default: Kokkos::abort("unreachable"); } - -#undef transform_exclusive_scan } if constexpr (std::is_same_v) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp index 0dc3e68b1d6..efbe0af81e5 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -212,19 +199,11 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_TRUE(intraTeamSentinelView_h(i)); -// libstdc++ as provided by GCC 8 does not have transform_inclusive_scan and -// for GCC 9.1, 9.2 fails to compile for missing overload not accepting policy -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define transform_inclusive_scan testing_transform_inclusive_scan -#else -#define transform_inclusive_scan std::transform_inclusive_scan -#endif - switch (apiId) { case 0: case 1: { - const auto it = - transform_inclusive_scan(first, last, firstDest, binaryOp, unaryOp); + const auto it = std::transform_inclusive_scan(first, last, firstDest, + binaryOp, unaryOp); const std::size_t stdDistance = KE::distance(firstDest, it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -233,8 +212,8 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 2: case 3: { - const auto it = transform_inclusive_scan(first, last, firstDest, - binaryOp, unaryOp, initValue); + const auto it = std::transform_inclusive_scan( + first, last, firstDest, binaryOp, unaryOp, initValue); const std::size_t stdDistance = KE::distance(firstDest, it); ASSERT_EQ(stdDistance, distancesView_h(i)); @@ -243,7 +222,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { default: Kokkos::abort("unreachable"); } } -#undef transform_inclusive_scan if constexpr (std::is_same_v) { auto dataViewAfterOp_h = create_host_space_copy(sourceView); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp index 69c38342dfa..359bdea3bde 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include @@ -239,21 +226,11 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_TRUE(intraTeamSentinelView_h(i)); -// libstdc++ as provided by GCC 8 does not have reduce, transform_reduce, -// exclusive_scan, inclusive_scan, transform_exclusive_scan, -// transform_inclusive_scan and for GCC 9.1, 9.2 fails to compile them for -// missing overload not accepting policy -#if defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE <= 9) -#define transform_reduce testing_transform_reduce -#else -#define transform_reduce std::transform_reduce -#endif - switch (apiId) { case 0: case 1: { - const auto result = transform_reduce(firstDataRowBegin, firstDataRowEnd, - secondDataRowBegin, initVal); + const auto result = std::transform_reduce( + firstDataRowBegin, firstDataRowEnd, secondDataRowBegin, initVal); if constexpr (std::is_floating_point_v) { EXPECT_FLOAT_EQ(result, resultsView_h(i)); @@ -266,7 +243,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 2: case 3: { - const ValueType result = transform_reduce( + const ValueType result = std::transform_reduce( firstDataRowBegin, firstDataRowEnd, secondDataRowBegin, initVal, binaryJoiner, binaryTransform); @@ -282,8 +259,8 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { case 4: case 5: { const ValueType result = - transform_reduce(firstDataRowBegin, firstDataRowEnd, initVal, - binaryJoiner, unaryTransform); + std::transform_reduce(firstDataRowBegin, firstDataRowEnd, initVal, + binaryJoiner, unaryTransform); if constexpr (std::is_floating_point_v) { EXPECT_FLOAT_EQ(result, resultsView_h(i)); @@ -295,8 +272,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { } default: Kokkos::abort("unreachable"); } - -#undef transform_reduce } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformUnaryOp.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformUnaryOp.cpp index 0fbb040020e..b13d8a29ed3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformUnaryOp.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformUnaryOp.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUnique.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUnique.cpp index cef0f7c13d0..757ec8c7c44 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUnique.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUnique.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUniqueCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUniqueCopy.cpp index 6773797097e..a00f0153a90 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUniqueCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamUniqueCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index 3f87d569678..be3dab55fe1 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -1,24 +1,13 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES #include #include #endif +#include + #include #include @@ -127,21 +116,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -// I had to write my own because std::transform_exclusive_scan is ONLY found -// with std=c++17 -template -void my_host_transform_exclusive_scan(it1 first, it1 last, it2 dest, - ValType init, BopType bop, UopType uop) { - const auto num_elements = last - first; - if (num_elements > 0) { - while (first < last - 1) { - *(dest++) = init; - init = bop(uop(*(first++)), init); - } - *dest = init; - } -} - template void verify_data(ViewType1 data_view, // contains data @@ -156,9 +130,8 @@ void verify_data(ViewType1 data_view, // contains data using gold_view_value_type = typename ViewType2::value_type; Kokkos::View gold_h( "goldh", data_view.extent(0)); - my_host_transform_exclusive_scan(KE::cbegin(data_view_h), - KE::cend(data_view_h), KE::begin(gold_h), - init_value, bop, uop); + std::transform_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), + KE::begin(gold_h), init_value, bop, uop); auto test_view_dc = create_deep_copyable_compatible_clone(test_view); auto test_view_h = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index ca9eca48ceb..1e4925e4259 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES @@ -128,34 +115,6 @@ void fill_view(ViewType dest_view, const std::string& name) { Kokkos::parallel_for("copy", dest_view.extent(0), F1); } -// I had to write my own because std::transform_inclusive_scan is ONLY found -// with std=c++17 -template -void my_host_transform_inclusive_scan(it1 first, it1 last, it2 dest, - BopType bop, UopType uop) { - if (first != last) { - auto init = uop(*first); - *dest = init; - while (++first < last) { - init = bop(uop(*first), init); - *(++dest) = init; - } - } -} - -template -void my_host_transform_inclusive_scan(it1 first, it1 last, it2 dest, - BopType bop, UopType uop, ValType init) { - if (first != last) { - init = bop(uop(*first), init); - *dest = init; - while (++first < last) { - init = bop(uop(*first), init); - *(++dest) = init; - } - } -} - template void verify_data(ViewType1 data_view, // contains data ViewType2 test_view, // the view to test @@ -169,9 +128,8 @@ void verify_data(ViewType1 data_view, // contains data using gold_view_value_type = typename ViewType2::value_type; Kokkos::View gold_h( "goldh", data_view.extent(0)); - my_host_transform_inclusive_scan(KE::cbegin(data_view_h), - KE::cend(data_view_h), KE::begin(gold_h), - args...); + std::transform_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h), + KE::begin(gold_h), args...); auto test_view_dc = create_deep_copyable_compatible_clone(test_view); auto test_view_h = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp index 6070c1a60d3..482fdfa9823 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp index 6ee93e3d5fa..5ff48981d45 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp index e3e96964583..430534d59f5 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include #include @@ -23,33 +10,6 @@ namespace UniqueCopy { namespace KE = Kokkos::Experimental; -// impl is here for std because it is only avail from c++>=17 -template -auto my_unique_copy(InputIterator first, InputIterator last, - OutputIterator result, BinaryPredicate pred) { - if (first != last) { - typename OutputIterator::value_type t(*first); - *result = t; - ++result; - while (++first != last) { - if (!pred(t, *first)) { - t = *first; - *result = t; - ++result; - } - } - } - return result; -} - -template -auto my_unique_copy(InputIterator first, InputIterator last, - OutputIterator result) { - using value_type = typename OutputIterator::value_type; - using func_t = IsEqualFunctor; - return my_unique_copy(first, last, result, func_t()); -} - template struct UnifDist; @@ -141,7 +101,7 @@ std::size_t fill_view(ViewType dest_view, const std::string& name) { std::fill(tmp.begin(), tmp.end(), static_cast(0)); using func_t = IsEqualFunctor; auto std_r = - my_unique_copy(KE::cbegin(v_h), KE::cend(v_h), tmp.begin(), func_t()); + std::unique_copy(KE::cbegin(v_h), KE::cend(v_h), tmp.begin(), func_t()); count = (std::size_t)(std_r - tmp.begin()); } @@ -225,8 +185,8 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, std::vector tmp(view_test_h.extent(0)); std::fill(tmp.begin(), tmp.end(), static_cast(0)); - auto std_r = my_unique_copy(KE::cbegin(view_from_h), KE::cend(view_from_h), - tmp.begin(), args...); + auto std_r = std::unique_copy(KE::cbegin(view_from_h), + KE::cend(view_from_h), tmp.begin(), args...); (void)std_r; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdReducers.cpp b/lib/kokkos/algorithms/unit_tests/TestStdReducers.cpp index 0044b935587..575fcd6cbd8 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdReducers.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdReducers.cpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include // purpose of this test is to check that the reducers used diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp index 11a1cb717a3..cc9e6719470 100644 --- a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -1,21 +1,13 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif int main(int argc, char *argv[]) { Kokkos::initialize(argc, argv); diff --git a/lib/kokkos/benchmarks/atomic/Makefile b/lib/kokkos/benchmarks/atomic/Makefile deleted file mode 100644 index c59de75ce8e..00000000000 --- a/lib/kokkos/benchmarks/atomic/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda -KOKKOS_ARCH = "SNB,Volta70" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper -EXE = atomic_perf.cuda -else -CXX = g++ -EXE = atomic_perf.exe -endif - -CXXFLAGS ?= -O3 -g -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o atomic_perf.cuda atomic_perf.exe - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/atomic/main.cpp b/lib/kokkos/benchmarks/atomic/main.cpp index aa9076ff597..4c7c022015e 100644 --- a/lib/kokkos/benchmarks/atomic/main.cpp +++ b/lib/kokkos/benchmarks/atomic/main.cpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include #include diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile deleted file mode 100644 index 4b6f084d20e..00000000000 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda -KOKKOS_ARCH = "SNB,Volta70" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper -EXE = bytes_and_flops.cuda -else -CXX = g++ -EXE = bytes_and_flops.exe -endif - -CXXFLAGS ?= -O3 -g -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o *.cuda *.host - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp index 88830af624b..af7c29504fe 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include template diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_double.cpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_double.cpp index 2fda1ae3d42..2c3d9138b20 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_double.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_double.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include "bench.hpp" diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_float.cpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_float.cpp index 3210116a9ee..2a2369c4781 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_float.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_float.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include "bench.hpp" diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp index 24a5dcd3899..fb50428068b 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include "bench.hpp" diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp index 0634700c31e..2b33e977414 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include "bench.hpp" diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp index 80f017fbe8f..ad65f4dcb33 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #define UNROLL 1 #include "bench_unroll_stride.hpp" diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp index 762cc988f14..fda6aefb4e1 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project template struct Run { diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp index fdfcc4ea64f..e25146cc625 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include #include "bench.hpp" #include diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile deleted file mode 100644 index e1bfce21a62..00000000000 --- a/lib/kokkos/benchmarks/gather/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda -KOKKOS_ARCH = "SNB,Volta70" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper -EXE = gather.cuda -else -CXX = g++ -EXE = gather.exe -endif - -CXXFLAGS ?= -O3 -g -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o gather.cuda gather.exe - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/gather/gather.hpp b/lib/kokkos/benchmarks/gather/gather.hpp index 90b1101c1d5..d5e8b4dc6f1 100644 --- a/lib/kokkos/benchmarks/gather/gather.hpp +++ b/lib/kokkos/benchmarks/gather/gather.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project template struct RunGather { diff --git a/lib/kokkos/benchmarks/gather/gather_unroll.hpp b/lib/kokkos/benchmarks/gather/gather_unroll.hpp index 1aa73091bc5..6278a5d74f4 100644 --- a/lib/kokkos/benchmarks/gather/gather_unroll.hpp +++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include template diff --git a/lib/kokkos/benchmarks/gather/main.cpp b/lib/kokkos/benchmarks/gather/main.cpp index 7b44c7af403..f118462699b 100644 --- a/lib/kokkos/benchmarks/gather/main.cpp +++ b/lib/kokkos/benchmarks/gather/main.cpp @@ -1,20 +1,12 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include #include "gather.hpp" #include diff --git a/lib/kokkos/benchmarks/gups/gups.cpp b/lib/kokkos/benchmarks/gups/gups.cpp index 251f572207d..3ce7afcfe60 100644 --- a/lib/kokkos/benchmarks/gups/gups.cpp +++ b/lib/kokkos/benchmarks/gups/gups.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project /*! \brief file gups.cpp diff --git a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp index 0935706ee87..f3fe45d586d 100644 --- a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp +++ b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project /*! \file launch_latency.cpp @@ -33,7 +20,15 @@ 3. Avg functor completion latency: (M (launch + sync)) / M */ +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif + +#include +#include template struct TestFunctor { diff --git a/lib/kokkos/benchmarks/policy_performance/Makefile b/lib/kokkos/benchmarks/policy_performance/Makefile deleted file mode 100644 index 21365f36c6a..00000000000 --- a/lib/kokkos/benchmarks/policy_performance/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda -KOKKOS_ARCH = "SNB,Volta70" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper -EXE = policy_perf.cuda -else -CXX = g++ -EXE = policy_perf.exe -endif - -CXXFLAGS ?= -O3 -g -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o policy_perf.cuda policy_perf.exe - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp index dd61ba65020..9b3327e2b1b 100644 --- a/lib/kokkos/benchmarks/policy_performance/main.cpp +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -1,22 +1,16 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include "policy_perf_test.hpp" +#include + int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 8a874e0139a..2a8c0f963f5 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -1,20 +1,14 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif + +#include template struct ParallelScanFunctor { diff --git a/lib/kokkos/benchmarks/stream/Makefile b/lib/kokkos/benchmarks/stream/Makefile deleted file mode 100644 index 529e7892475..00000000000 --- a/lib/kokkos/benchmarks/stream/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda -KOKKOS_ARCH = "SNB,Volta70" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper -EXE = stream.cuda -else -CXX = g++ -EXE = stream.exe -endif - -CXXFLAGS ?= -O3 -g -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o stream.cuda stream.exe - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/stream/stream-kokkos.cpp b/lib/kokkos/benchmarks/stream/stream-kokkos.cpp index 65523b0e736..a10de4e96a6 100644 --- a/lib/kokkos/benchmarks/stream/stream-kokkos.cpp +++ b/lib/kokkos/benchmarks/stream/stream-kokkos.cpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #include "Kokkos_Core.hpp" #include @@ -28,7 +15,7 @@ using StreamDeviceArray = Kokkos::View>; -using StreamHostArray = typename StreamDeviceArray::HostMirror; +using StreamHostArray = typename StreamDeviceArray::host_mirror_type; using StreamIndex = int; using Policy = Kokkos::RangePolicy>; diff --git a/lib/kokkos/benchmarks/view_copy_constructor/Makefile b/lib/kokkos/benchmarks/view_copy_constructor/Makefile deleted file mode 100644 index 77845a22b1e..00000000000 --- a/lib/kokkos/benchmarks/view_copy_constructor/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -KOKKOS_DEVICES=Serial -KOKKOS_ARCH = "" - -KOKKOS_USE_DEPRECATED_MAKEFILES=1 - -MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef KOKKOS_PATH - KOKKOS_PATH = $(MAKEFILE_PATH)../.. -endif - -SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -vpath %.cpp $(sort $(dir $(SRC))) - -default: build - echo "Start Build" - -CXX = clang++ -EXE = view_copy_constructor.exe - -CXXFLAGS ?= -Ofast -override CXXFLAGS += -I$(MAKEFILE_PATH) - -DEPFLAGS = -M -LINK = ${CXX} -LINKFLAGS = -Ofast -KOKKOS_CXX_STANDARD=c++20 - -OBJ = $(notdir $(SRC:.cpp=.o)) -LIB = - -include $(KOKKOS_PATH)/Makefile.kokkos - -build: $(EXE) - -$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) - -clean: kokkos-clean - rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe - -# Compilation rules - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp b/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp index 63c49f09c01..7d4431b2740 100644 --- a/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp +++ b/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp @@ -1,29 +1,22 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project // The function "test_view_collection" exposes the copy constructor // and destructor overheads in Kokkos View objects // Please see the lines marked by "NOTE". #include +#include #include #include #include #include +#include +#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES +import kokkos.core; +#else #include +#endif #include // NVIEWS is the number of Kokkos View objects in our ViewCollection object diff --git a/lib/kokkos/bin/runtest b/lib/kokkos/bin/runtest deleted file mode 100755 index 92411fe5bad..00000000000 --- a/lib/kokkos/bin/runtest +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env bash - -function get_path() { - cd "$(dirname "$0")" - cd .. - echo "$(pwd -P)" -} - -KOKKOS_PATH="$(get_path "$0")" - -function show_help() { - local cmd=$(basename "$0") - echo "Usage: ${cmd} " - echo " Build and run the tests" - echo "" - echo "Options:" - echo " -j=N|--make-j=N Build the tests in parallel" - echo " -c|--clean Clean build and regenerate make files" - echo " --clean-on-pass Clean build when runtest passes" - echo " --output-prefix=
  Prefix of log files  Default: runtest"
-  echo "  --build-only           Only build the tests"
-  echo "  -v|--verbose           Tee STDOUT and STDERR to screen and files"
-  echo "  -h|--help              Show this message"
-  echo ""
-  ${KOKKOS_PATH}/generate_makefile.bash --help
-  return 0
-}
-
-
-declare -a GENERATE_ARGS=()
-declare -i VERBOSE=0
-declare -i CLEAN=0
-declare -i CLEAN_ON_PASS=0
-declare -i BUILD_ONLY=0
-OUTPUT="runtest"
-
-declare -i MAKE_J=${HPCBIND_NUM_PUS:-1}
-
-for i in $@; do
-  case $i in
-    -j=*|--make-j=*)
-      MAKE_J=${i#*=}
-      shift
-      ;;
-    -c|--clean)
-      CLEAN=1
-      shift
-      ;;
-    --clean-on-pass)
-      CLEAN_ON_PASS=1
-      shift
-      ;;
-    --output-prefix=*)
-      OUTPUT=${i#*=}
-      shift
-      ;;
-    --build-only)
-      BUILD_ONLY=1
-      shift
-      ;;
-    -v|--verbose)
-      VERBOSE=1
-      shift
-      ;;
-    -h|--help)
-      show_help
-      exit 0
-      ;;
-    *)
-      GENERATE_ARGS+=("$i")
-      shift
-      ;;
-  esac
-done
-
-if [[ "$(pwd -P)" == ${KOKKOS_PATH} ]]; then
-  echo "Cannot call $0 from root repository path ${KOKKOS_PATH}"
-  exit 1
-fi
-
-# Some makefile dependencies are incorrect, so clean needs to force
-# a new call to generate_makefiles.bash
-if [[ ${CLEAN} -eq 1 ]]; then
-  START=${SECONDS}
-  echo "Cleaning"
-  /bin/rm -rf algorithms containers core example install Makefile >/dev/null 2>&1
-  END=${SECONDS}
-  echo "    $((END-START)) seconds"
-  if [[ ${VERBOSE} -eq 1 ]]; then
-    echo ""
-    echo ""
-  fi
-fi
-
-declare -i START=${SECONDS}
-echo "Generating Makefile"
-echo "    ${KOKKOS_PATH}/generate_makefile.bash --kokkos-path=${KOKKOS_PATH} ${GENERATE_ARGS[@]}"
-
-if [[ ${VERBOSE} -eq 0 ]]; then
-  "${KOKKOS_PATH}"/generate_makefile.bash --kokkos-path="${KOKKOS_PATH}" "${GENERATE_ARGS[@]}" > ${OUTPUT}.out 2> >(tee ${OUTPUT}.err >&2)
-else
-  "${KOKKOS_PATH}"/generate_makefile.bash --kokkos-path="${KOKKOS_PATH}" "${GENERATE_ARGS[@]}" > >(tee ${OUTPUT}.out) 2> >(tee ${OUTPUT}.err >&2)
-fi
-declare -i RESULT=$?
-declare -i END=${SECONDS}
-if [[ ${RESULT} -eq 0 ]]; then
-  echo "    PASS:  $((END-START)) seconds"
-  if [[ ${VERBOSE} -eq 1 ]]; then
-    echo ""
-    echo ""
-  fi
-else
-  cat ${OUTPUT}.out | grep "FAIL"
-  cat ${OUTPUT}.err | grep "FAIL"
-  echo "    FAIL:  $((END-START)) seconds"
-  exit 1
-fi
-
-START=${SECONDS}
-echo "Building"
-if [[ ${VERBOSE} -eq 0 ]]; then
-  make --keep-going -j ${MAKE_J} build-test >> ${OUTPUT}.out 2> >(tee -a ${OUTPUT}.err >&2)
-else
-  make --keep-going -j ${MAKE_J} build-test > >(tee -a ${OUTPUT}.out) 2> >(tee -a ${OUTPUT}.err >&2)
-fi
-RESULT=$?
-END=${SECONDS}
-if [[ ${RESULT} -eq 0 ]]; then
-  echo "    PASS:  $((END-START)) seconds"
-  if [[ ${VERBOSE} -eq 1 ]]; then
-    echo ""
-    echo ""
-  fi
-else
-  cat ${OUTPUT}.out | grep -E "[[:space:]]error:[[:space:]]"
-  cat ${OUTPUT}.err | grep -E "[[:space:]]error:[[:space:]]"
-  echo "    FAIL:  $((END-START)) seconds"
-  exit 1
-fi
-
-if [[ ${BUILD_ONLY} -eq 0 ]]; then
-  START=${SECONDS}
-  echo "Testing"
-  if [[ ${VERBOSE} -eq 0 ]]; then
-    make --keep-going test >> ${OUTPUT}.out 2> >(tee -a ${OUTPUT}.err >&2)
-  else
-    make --keep-going test > >(tee -a ${OUTPUT}.out) 2> >(tee -a ${OUTPUT}.err >&2)
-  fi
-  RESULT=$?
-  END=${SECONDS}
-  if [[ ${RESULT} -eq 0 ]]; then
-    echo "    PASS:  $((END-START)) seconds"
-    if [[ ${CLEAN_ON_PASS} -eq 1 ]]; then
-      make clean
-    fi
-  else
-    cat ${OUTPUT}.out | grep "FAIL"
-    cat ${OUTPUT}.err | grep "FAIL"
-    echo "    FAIL:  $((END-START)) seconds"
-    exit 1
-  fi
-fi
-
-exit ${RESULT}
-
diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in
index aed9f1060ca..c62dc0a9005 100644
--- a/lib/kokkos/cmake/KokkosConfig.cmake.in
+++ b/lib/kokkos/cmake/KokkosConfig.cmake.in
@@ -42,8 +42,8 @@ if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
   )
   kokkos_compilation(GLOBAL CHECK_CUDA_COMPILES)
 
-elseif(@Kokkos_ENABLE_CUDA@ AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA AND NOT "separable_compilation" IN_LIST
-                                                                                Kokkos_FIND_COMPONENTS
+elseif(@Kokkos_ENABLE_CUDA@ AND (NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA OR Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES)
+       AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS
 )
   #
   # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
diff --git a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in
index 769dff6b109..527b0c579c6 100644
--- a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in
+++ b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in
@@ -9,12 +9,6 @@ set(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@)
 
 # Required to be a TriBITS-compliant external package
 if(NOT TARGET Kokkos::all_libs)
-  # CMake Error at /lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY):
-  #   ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target
-  #   "Kokkos::kokkos" is imported but not globally visible.
-  if(CMAKE_VERSION VERSION_LESS "3.18")
-    set_target_properties(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON)
-  endif()
   add_library(Kokkos::all_libs ALIAS Kokkos::kokkos)
 endif()
 
@@ -187,11 +181,11 @@ int main()
   if(NOT _RET EQUAL 0)
     # save the command for debugging
     set(_COMMANDS
-        "${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu"
+        "${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_50 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu"
     )
     # try the compile test again with clang arguments
     execute_process(
-      COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
+      COMMAND ${_COMPILER} --cuda-gpu-arch=sm_50 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
       RESULT_VARIABLE _RET
       WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
       TIMEOUT 15
@@ -226,12 +220,8 @@ function(kokkos_compilation)
   # if built w/o CUDA support, we want to basically make this a no-op
   set(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
 
-  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
-    set(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
-  endif()
-
   # search relative first and then absolute
-  set(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
+  set(_HINTS "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../.." "@CMAKE_INSTALL_PREFIX@")
 
   # find kokkos_launch_compiler
   find_program(
diff --git a/lib/kokkos/cmake/KokkosCore_Config_HeaderSet.in b/lib/kokkos/cmake/KokkosCore_Config_HeaderSet.in
index 73fa0b736c6..2f31957e6a0 100644
--- a/lib/kokkos/cmake/KokkosCore_Config_HeaderSet.in
+++ b/lib/kokkos/cmake/KokkosCore_Config_HeaderSet.in
@@ -1,20 +1,5 @@
-/*
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-//
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-*/
 
 #ifndef @HEADER_GUARD_TAG@
 #define @HEADER_GUARD_TAG@
diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in
index fcc40214817..852292f5bbe 100644
--- a/lib/kokkos/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/cmake/KokkosCore_config.h.in
@@ -16,6 +16,8 @@
 #define KOKKOS_VERSION_MINOR @KOKKOS_VERSION_MINOR@
 #define KOKKOS_VERSION_PATCH @KOKKOS_VERSION_PATCH@
 
+#cmakedefine KOKKOS_IMPL_BUILD_SHARED_LIBS
+
 /* Execution Spaces */
 #cmakedefine KOKKOS_ENABLE_SERIAL
 #cmakedefine KOKKOS_ENABLE_OPENMP
@@ -28,13 +30,12 @@
 #cmakedefine KOKKOS_ENABLE_SYCL
 
 /* General Settings */
-#cmakedefine KOKKOS_ENABLE_CXX17
 #cmakedefine KOKKOS_ENABLE_CXX20
 #cmakedefine KOKKOS_ENABLE_CXX23
 #cmakedefine KOKKOS_ENABLE_CXX26
 
 #cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-#cmakedefine KOKKOS_ENABLE_CUDA_UVM
+#cmakedefine KOKKOS_ENABLE_CUDA_UVM  // deprecated
 #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA  // deprecated
 #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
 #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
@@ -48,10 +49,11 @@
 #cmakedefine KOKKOS_ENABLE_OPENACC_FORCE_HOST_AS_DEVICE
 #cmakedefine KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH
 #cmakedefine KOKKOS_ENABLE_DEBUG
-#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
+#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK  // deprecated
 #cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
 #cmakedefine KOKKOS_ENABLE_TUNING
 #cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_4
+#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_5
 #cmakedefine KOKKOS_ENABLE_DEPRECATION_WARNINGS
 #cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
 #cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN
@@ -72,6 +74,10 @@
 #cmakedefine KOKKOS_ENABLE_ONEDPL
 #cmakedefine KOKKOS_ENABLE_ROCTHRUST
 
+/* Embedded dependencies */
+#define KOKKOS_IMPL_DESUL_VERSION "@KOKKOS_DESUL_VERSION@"
+#define KOKKOS_IMPL_MDSPAN_VERSION "@KOKKOS_MDSPAN_VERSION@"
+
 #cmakedefine KOKKOS_ARCH_ARMV80
 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
 #cmakedefine KOKKOS_ARCH_ARMV81
@@ -142,6 +148,7 @@
 #cmakedefine KOKKOS_ARCH_AMD_GFX1030
 #cmakedefine KOKKOS_ARCH_AMD_GFX1100
 #cmakedefine KOKKOS_ARCH_AMD_GFX1103
+#cmakedefine KOKKOS_ARCH_AMD_GFX1201
 #cmakedefine KOKKOS_ARCH_AMD_GPU "@KOKKOS_ARCH_AMD_GPU@"
 #cmakedefine KOKKOS_ARCH_VEGA // deprecated
 #cmakedefine KOKKOS_ARCH_VEGA906 // deprecated
diff --git a/lib/kokkos/cmake/Kokkos_Version_Info.cpp.in b/lib/kokkos/cmake/Kokkos_Version_Info.cpp.in
index 3665282e7b6..78831ca9dda 100644
--- a/lib/kokkos/cmake/Kokkos_Version_Info.cpp.in
+++ b/lib/kokkos/cmake/Kokkos_Version_Info.cpp.in
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "Kokkos_Version_Info.hpp"
 
diff --git a/lib/kokkos/cmake/Kokkos_Version_Info.hpp b/lib/kokkos/cmake/Kokkos_Version_Info.hpp
index 831247115e2..db065673d34 100644
--- a/lib/kokkos/cmake/Kokkos_Version_Info.hpp
+++ b/lib/kokkos/cmake/Kokkos_Version_Info.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_GIT_VERSION_INFO_H
 #define KOKKOS_GIT_VERSION_INFO_H
diff --git a/lib/kokkos/cmake/Modules/CudaToolkit.cmake b/lib/kokkos/cmake/Modules/CudaToolkit.cmake
deleted file mode 100644
index b8ac2048b5f..00000000000
--- a/lib/kokkos/cmake/Modules/CudaToolkit.cmake
+++ /dev/null
@@ -1,880 +0,0 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-#[=======================================================================[.rst:
-FindCUDAToolkit
----------------
-
-This script locates the NVIDIA CUDA toolkit and the associated libraries, but
-does not require the ``CUDA`` language be enabled for a given project. This
-module does not search for the NVIDIA CUDA Samples.
-
-Search Behavior
-^^^^^^^^^^^^^^^
-
-Finding the CUDA Toolkit requires finding the ``nvcc`` executable, which is
-searched for in the following order:
-
-1. If the ``CUDA`` language has been enabled we will use the directory
-   containing the compiler as the first search location for ``nvcc``.
-
-2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g.,
-   ``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it
-   will be searched.  If both an environment variable **and** a
-   configuration variable are specified, the *configuration* variable takes
-   precedence.
-
-   The directory specified here must be such that the executable ``nvcc`` can be
-   found underneath the directory specified by ``CUDAToolkit_ROOT``.  If
-   ``CUDAToolkit_ROOT`` is specified, but no ``nvcc`` is found underneath, this
-   package is marked as **not** found.  No subsequent search attempts are
-   performed.
-
-3. If the CUDA_PATH environment variable is defined, it will be searched.
-
-4. The user's path is searched for ``nvcc`` using :command:`find_program`.  If
-   this is found, no subsequent search attempts are performed.  Users are
-   responsible for ensuring that the first ``nvcc`` to show up in the path is
-   the desired path in the event that multiple CUDA Toolkits are installed.
-
-5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is
-   used.  No subsequent search attempts are performed.  No default symbolic link
-   location exists for the Windows platform.
-
-6. The platform specific default install locations are searched.  If exactly one
-   candidate is found, this is used.  The default CUDA Toolkit install locations
-   searched are:
-
-   +-------------+-------------------------------------------------------------+
-   | Platform    | Search Pattern                                              |
-   +=============+=============================================================+
-   | macOS       | ``/Developer/NVIDIA/CUDA-X.Y``                              |
-   +-------------+-------------------------------------------------------------+
-   | Other Unix  | ``/usr/local/cuda-X.Y``                                     |
-   +-------------+-------------------------------------------------------------+
-   | Windows     | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` |
-   +-------------+-------------------------------------------------------------+
-
-   Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as
-   ``/usr/local/cuda-9.0`` or
-   ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0``
-
-   .. note::
-
-       When multiple CUDA Toolkits are installed in the default location of a
-       system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0``
-       exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this
-       package is marked as **not** found.
-
-       There are too many factors involved in making an automatic decision in
-       the presence of multiple CUDA Toolkits being installed.  In this
-       situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or
-       (2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for
-       :command:`find_program` to find.
-
-Options
-^^^^^^^
-
-``VERSION``
-    If specified, describes the version of the CUDA Toolkit to search for.
-
-``REQUIRED``
-    If specified, configuration will error if a suitable CUDA Toolkit is not
-    found.
-
-``QUIET``
-    If specified, the search for a suitable CUDA Toolkit will not produce any
-    messages.
-
-``EXACT``
-    If specified, the CUDA Toolkit is considered found only if the exact
-    ``VERSION`` specified is recovered.
-
-Imported targets
-^^^^^^^^^^^^^^^^
-
-An :ref:`imported target ` named ``CUDA::toolkit`` is provided.
-
-This module defines :prop_tgt:`IMPORTED` targets for each
-of the following libraries that are part of the CUDAToolkit:
-
-- :ref:`CUDA Runtime Library`
-- :ref:`CUDA Driver Library`
-- :ref:`cuBLAS`
-- :ref:`cuFFT`
-- :ref:`cuRAND`
-- :ref:`cuSOLVER`
-- :ref:`cuSPARSE`
-- :ref:`cuPTI`
-- :ref:`NPP`
-- :ref:`nvBLAS`
-- :ref:`nvGRAPH`
-- :ref:`nvJPEG`
-- :ref:`nvidia-ML`
-- :ref:`nvRTC`
-- :ref:`nvToolsExt`
-- :ref:`OpenCL`
-- :ref:`cuLIBOS`
-
-.. _`cuda_toolkit_rt_lib`:
-
-CUDA Runtime Library
-""""""""""""""""""""
-
-The CUDA Runtime library (cudart) are what most applications will typically
-need to link against to make any calls such as `cudaMalloc`, and `cudaFree`.
-
-Targets Created:
-
-- ``CUDA::cudart``
-- ``CUDA::cudart_static``
-
-.. _`cuda_toolkit_driver_lib`:
-
-CUDA Driver Library
-""""""""""""""""""""
-
-The CUDA Driver library (cuda) are used by applications that use calls
-such as `cuMemAlloc`, and `cuMemFree`. This is generally used by advanced
-
-
-Targets Created:
-
-- ``CUDA::cuda_driver``
-- ``CUDA::cuda_driver``
-
-.. _`cuda_toolkit_cuBLAS`:
-
-cuBLAS
-""""""
-
-The `cuBLAS `_ library.
-
-Targets Created:
-
-- ``CUDA::cublas``
-- ``CUDA::cublas_static``
-
-.. _`cuda_toolkit_cuFFT`:
-
-cuFFT
-"""""
-
-The `cuFFT `_ library.
-
-Targets Created:
-
-- ``CUDA::cufft``
-- ``CUDA::cufftw``
-- ``CUDA::cufft_static``
-- ``CUDA::cufftw_static``
-
-cuRAND
-""""""
-
-The `cuRAND `_ library.
-
-Targets Created:
-
-- ``CUDA::curand``
-- ``CUDA::curand_static``
-
-.. _`cuda_toolkit_cuSOLVER`:
-
-cuSOLVER
-""""""""
-
-The `cuSOLVER `_ library.
-
-Targets Created:
-
-- ``CUDA::cusolver``
-- ``CUDA::cusolver_static``
-
-.. _`cuda_toolkit_cuSPARSE`:
-
-cuSPARSE
-""""""""
-
-The `cuSPARSE `_ library.
-
-Targets Created:
-
-- ``CUDA::cusparse``
-- ``CUDA::cusparse_static``
-
-.. _`cuda_toolkit_cupti`:
-
-cupti
-"""""
-
-The `NVIDIA CUDA Profiling Tools Interface `_.
-
-Targets Created:
-
-- ``CUDA::cupti``
-- ``CUDA::cupti_static``
-
-.. _`cuda_toolkit_NPP`:
-
-NPP
-"""
-
-The `NPP `_ libraries.
-
-Targets Created:
-
-- `nppc`:
-
-  - ``CUDA::nppc``
-  - ``CUDA::nppc_static``
-
-- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h`
-
-  - ``CUDA::nppial``
-  - ``CUDA::nppial_static``
-
-- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h`
-
-  - ``CUDA::nppicc``
-  - ``CUDA::nppicc_static``
-
-- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h`
-
-  - ``CUDA::nppicom``
-  - ``CUDA::nppicom_static``
-
-- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h`
-
-  - ``CUDA::nppidei``
-  - ``CUDA::nppidei_static``
-
-- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h`
-
-  - ``CUDA::nppif``
-  - ``CUDA::nppif_static``
-
-- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h`
-
-  - ``CUDA::nppig``
-  - ``CUDA::nppig_static``
-
-- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h`
-
-  - ``CUDA::nppim``
-  - ``CUDA::nppim_static``
-
-- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h`
-
-  - ``CUDA::nppist``
-  - ``CUDA::nppist_static``
-
-- `nppisu`: Memory support functions in `nppi_support_functions.h`
-
-  - ``CUDA::nppisu``
-  - ``CUDA::nppisu_static``
-
-- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h`
-
-  - ``CUDA::nppitc``
-  - ``CUDA::nppitc_static``
-
-- `npps`:
-
-  - ``CUDA::npps``
-  - ``CUDA::npps_static``
-
-.. _`cuda_toolkit_nvBLAS`:
-
-nvBLAS
-""""""
-
-The `nvBLAS `_ libraries.
-This is a shared library only.
-
-Targets Created:
-
-- ``CUDA::nvblas``
-
-.. _`cuda_toolkit_nvGRAPH`:
-
-nvGRAPH
-"""""""
-
-The `nvGRAPH `_ library.
-
-Targets Created:
-
-- ``CUDA::nvgraph``
-- ``CUDA::nvgraph_static``
-
-
-.. _`cuda_toolkit_nvJPEG`:
-
-nvJPEG
-""""""
-
-The `nvJPEG `_ library.
-Introduced in CUDA 10.
-
-Targets Created:
-
-- ``CUDA::nvjpeg``
-- ``CUDA::nvjpeg_static``
-
-.. _`cuda_toolkit_nvRTC`:
-
-nvRTC
-"""""
-
-The `nvRTC `_ (Runtime Compilation) library.
-This is a shared library only.
-
-Targets Created:
-
-- ``CUDA::nvrtc``
-
-.. _`cuda_toolkit_nvml`:
-
-nvidia-ML
-"""""""""
-
-The `NVIDIA Management Library `_.
-This is a shared library only.
-
-Targets Created:
-
-- ``CUDA::nvml``
-
-.. _`cuda_toolkit_nvToolsExt`:
-
-nvToolsExt
-""""""""""
-
-The `NVIDIA Tools Extension `_.
-This is a shared library only.
-
-Targets Created:
-
-- ``CUDA::nvToolsExt``
-
-.. _`cuda_toolkit_opencl`:
-
-OpenCL
-""""""
-
-The `NVIDIA OpenCL Library `_.
-This is a shared library only.
-
-Targets Created:
-
-- ``CUDA::OpenCL``
-
-.. _`cuda_toolkit_cuLIBOS`:
-
-cuLIBOS
-"""""""
-
-The cuLIBOS library is a backend thread abstraction layer library which is
-static only.  The ``CUDA::cublas_static``, ``CUDA::cusparse_static``,
-``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP
-libraries all automatically have this dependency linked.
-
-Target Created:
-
-- ``CUDA::culibos``
-
-**Note**: direct usage of this target by consumers should not be necessary.
-
-.. _`cuda_toolkit_cuRAND`:
-
-
-
-Result variables
-^^^^^^^^^^^^^^^^
-
-``CUDAToolkit_FOUND``
-    A boolean specifying whether or not the CUDA Toolkit was found.
-
-``CUDAToolkit_VERSION``
-    The exact version of the CUDA Toolkit found (as reported by
-    ``nvcc --version``).
-
-``CUDAToolkit_VERSION_MAJOR``
-    The major version of the CUDA Toolkit.
-
-``CUDAToolkit_VERSION_MAJOR``
-    The minor version of the CUDA Toolkit.
-
-``CUDAToolkit_VERSION_PATCH``
-    The patch version of the CUDA Toolkit.
-
-``CUDAToolkit_BIN_DIR``
-    The path to the CUDA Toolkit library directory that contains the CUDA
-    executable ``nvcc``.
-
-``CUDAToolkit_INCLUDE_DIRS``
-    The path to the CUDA Toolkit ``include`` folder containing the header files
-    required to compile a project linking against CUDA.
-
-``CUDAToolkit_LIBRARY_DIR``
-    The path to the CUDA Toolkit library directory that contains the CUDA
-    Runtime library ``cudart``.
-
-``CUDAToolkit_TARGET_DIR``
-    The path to the CUDA Toolkit directory including the target architecture
-    when cross-compiling. When not cross-compiling this will be equivalant to
-    ``CUDAToolkit_ROOT_DIR``.
-
-``CUDAToolkit_NVCC_EXECUTABLE``
-    The path to the NVIDIA CUDA compiler ``nvcc``.  Note that this path may
-    **not** be the same as
-    :variable:`CMAKE_CUDA_COMPILER _COMPILER>`.  ``nvcc`` must be
-    found to determine the CUDA Toolkit version as well as determining other
-    features of the Toolkit.  This variable is set for the convenience of
-    modules that depend on this one.
-
-
-#]=======================================================================]
-
-# NOTE: much of this was simply extracted from FindCUDA.cmake.
-
-#   James Bigler, NVIDIA Corp (nvidia.com - jbigler)
-#   Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
-#
-#   Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
-#
-#   Copyright (c) 2007-2009
-#   Scientific Computing and Imaging Institute, University of Utah
-#
-#   This code is licensed under the MIT License.  See the FindCUDA.cmake script
-#   for the text of the license.
-
-# The MIT License
-#
-# License for the specific language governing rights and limitations under
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-#
-###############################################################################
-
-# For NVCC we can easily deduce the SDK binary directory from the compiler path.
-if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
-  get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
-  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
-  mark_as_advanced(CUDAToolkit_BIN_DIR)
-  unset(cuda_dir)
-endif()
-
-# Try language- or user-provided path first.
-if(CUDAToolkit_BIN_DIR)
-  find_program(
-    CUDAToolkit_NVCC_EXECUTABLE
-    NAMES nvcc nvcc.exe
-    PATHS ${CUDAToolkit_BIN_DIR}
-    NO_DEFAULT_PATH
-  )
-endif()
-
-# Search using CUDAToolkit_ROOT
-find_program(
-  CUDAToolkit_NVCC_EXECUTABLE
-  NAMES nvcc nvcc.exe
-  PATHS ENV CUDA_PATH
-  PATH_SUFFIXES bin
-)
-
-# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
-if(NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
-  # Declare error messages now, print later depending on find_package args.
-  set(fail_base "Could not find nvcc executable in path specified by")
-  set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
-  set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
-
-  if(CUDAToolkit_FIND_REQUIRED)
-    if(DEFINED CUDAToolkit_ROOT)
-      message(FATAL_ERROR ${cuda_root_fail})
-    elseif(DEFINED ENV{CUDAToolkit_ROOT})
-      message(FATAL_ERROR ${env_cuda_root_fail})
-    endif()
-  else()
-    if(NOT CUDAToolkit_FIND_QUIETLY)
-      if(DEFINED CUDAToolkit_ROOT)
-        message(STATUS ${cuda_root_fail})
-      elseif(DEFINED ENV{CUDAToolkit_ROOT})
-        message(STATUS ${env_cuda_root_fail})
-      endif()
-    endif()
-    set(CUDAToolkit_FOUND FALSE)
-    unset(fail_base)
-    unset(cuda_root_fail)
-    unset(env_cuda_root_fail)
-    return()
-  endif()
-endif()
-
-# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
-#
-# - Linux: /usr/local/cuda-X.Y
-# - macOS: /Developer/NVIDIA/CUDA-X.Y
-# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
-#
-# We will also search the default symlink location /usr/local/cuda first since
-# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
-# directory is the desired location.
-if(NOT CUDAToolkit_NVCC_EXECUTABLE)
-  if(UNIX)
-    if(NOT APPLE)
-      set(platform_base "/usr/local/cuda-")
-    else()
-      set(platform_base "/Developer/NVIDIA/CUDA-")
-    endif()
-  else()
-    set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
-  endif()
-
-  # Build out a descending list of possible cuda installations, e.g.
-  file(GLOB possible_paths "${platform_base}*")
-  # Iterate the glob results and create a descending list.
-  set(possible_versions)
-  foreach(p ${possible_paths})
-    # Extract version number from end of string
-    string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
-    if(IS_DIRECTORY ${p} AND p_version)
-      list(APPEND possible_versions ${p_version})
-    endif()
-  endforeach()
-
-  # Cannot use list(SORT) because that is alphabetical, we need numerical.
-  # NOTE: this is not an efficient sorting strategy.  But even if a user had
-  # every possible version of CUDA installed, this wouldn't create any
-  # significant overhead.
-  set(versions)
-  foreach(v ${possible_versions})
-    list(LENGTH versions num_versions)
-    # First version, nothing to compare with so just append.
-    if(num_versions EQUAL 0)
-      list(APPEND versions ${v})
-    else()
-      # Loop through list.  Insert at an index when comparison is
-      # VERSION_GREATER since we want a descending list.  Duplicates will not
-      # happen since this came from a glob list of directories.
-      set(i 0)
-      set(early_terminate FALSE)
-      while(i LESS num_versions)
-        list(GET versions ${i} curr)
-        if(v VERSION_GREATER curr)
-          list(INSERT versions ${i} ${v})
-          set(early_terminate TRUE)
-          break()
-        endif()
-        math(EXPR i "${i} + 1")
-      endwhile()
-      # If it did not get inserted, place it at the end.
-      if(NOT early_terminate)
-        list(APPEND versions ${v})
-      endif()
-    endif()
-  endforeach()
-
-  # With a descending list of versions, populate possible paths to search.
-  set(search_paths)
-  foreach(v ${versions})
-    list(APPEND search_paths "${platform_base}${v}")
-  endforeach()
-
-  # Force the global default /usr/local/cuda to the front on Unix.
-  if(UNIX)
-    list(INSERT search_paths 0 "/usr/local/cuda")
-  endif()
-
-  # Now search for nvcc again using the platform default search paths.
-  find_program(
-    CUDAToolkit_NVCC_EXECUTABLE
-    NAMES nvcc nvcc.exe
-    PATHS ${search_paths}
-    PATH_SUFFIXES bin
-  )
-
-  # We are done with these variables now, cleanup for caller.
-  unset(platform_base)
-  unset(possible_paths)
-  unset(possible_versions)
-  unset(versions)
-  unset(i)
-  unset(early_terminate)
-  unset(search_paths)
-
-  if(NOT CUDAToolkit_NVCC_EXECUTABLE)
-    if(CUDAToolkit_FIND_REQUIRED)
-      message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
-    elseif(NOT CUDAToolkit_FIND_QUIETLY)
-      message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
-    endif()
-
-    set(CUDAToolkit_FOUND FALSE)
-    return()
-  endif()
-endif()
-
-if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
-  get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
-  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
-  mark_as_advanced(CUDAToolkit_BIN_DIR)
-  unset(cuda_dir)
-endif()
-
-if(CUDAToolkit_NVCC_EXECUTABLE AND CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
-  # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
-  # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
-  if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
-    set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
-    set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
-    set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
-    set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
-  endif()
-else()
-  # Compute the version by invoking nvcc
-  execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
-  if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
-    set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
-    set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
-    set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
-    set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
-  endif()
-  unset(NVCC_OUT)
-endif()
-
-get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
-
-# Handle cross compilation
-if(CMAKE_CROSSCOMPILING)
-  if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
-    # Support for NVPACK
-    set(CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
-    # Support for arm cross compilation
-    set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-    # Support for aarch64 cross compilation
-    if(ANDROID_ARCH_NAME STREQUAL "arm64")
-      set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
-    else()
-      set(CUDAToolkit_TARGET_NAME "aarch64-linux")
-    endif(ANDROID_ARCH_NAME STREQUAL "arm64")
-  elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
-    set(CUDAToolkit_TARGET_NAME "x86_64-linux")
-  endif()
-
-  if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
-    set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
-    # add known CUDA target root path to the set of directories we search for programs, libraries and headers
-    list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
-
-    # Mark that we need to pop the root search path changes after we have
-    # found all cuda libraries so that searches for our cross-compilation
-    # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
-    # PATh
-    set(_CUDAToolkit_Pop_ROOT_PATH True)
-  endif()
-else()
-  # Not cross compiling
-  set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
-  # Now that we have the real ROOT_DIR, find components inside it.
-  list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
-
-  # Mark that we need to pop the prefix path changes after we have
-  # found the cudart library.
-  set(_CUDAToolkit_Pop_Prefix True)
-endif()
-
-# Find the include/ directory
-find_path(CUDAToolkit_INCLUDE_DIR NAMES cuda_runtime.h)
-
-# And find the CUDA Runtime Library libcudart
-find_library(CUDA_CUDART NAMES cudart PATH_SUFFIXES lib64 lib/x64)
-if(NOT CUDA_CUDART)
-  find_library(CUDA_CUDART NAMES cudart PATH_SUFFIXES lib64/stubs lib/x64/stubs)
-endif()
-
-if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
-  message(STATUS "Unable to find cudart library.")
-endif()
-
-unset(CUDAToolkit_ROOT_DIR)
-if(_CUDAToolkit_Pop_Prefix)
-  list(REMOVE_AT CMAKE_PREFIX_PATH -1)
-  unset(_CUDAToolkit_Pop_Prefix)
-endif()
-
-#-----------------------------------------------------------------------------
-# Perform version comparison and validate all required variables are set.
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(
-  CUDAToolkit REQUIRED_VARS CUDAToolkit_INCLUDE_DIR CUDA_CUDART CUDAToolkit_NVCC_EXECUTABLE
-  VERSION_VAR CUDAToolkit_VERSION
-)
-mark_as_advanced(CUDA_CUDART CUDAToolkit_INCLUDE_DIR CUDAToolkit_NVCC_EXECUTABLE)
-
-#-----------------------------------------------------------------------------
-# Construct result variables
-if(CUDAToolkit_FOUND)
-  set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
-  get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
-endif()
-
-#-----------------------------------------------------------------------------
-# Construct import targets
-if(CUDAToolkit_FOUND)
-
-  function(_CUDAToolkit_find_and_add_import_lib lib_name)
-    cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN})
-
-    set(search_names ${lib_name} ${arg_ALT})
-
-    find_library(
-      CUDA_${lib_name}_LIBRARY
-      NAMES ${search_names}
-      HINTS ${CUDAToolkit_LIBRARY_DIR} ENV CUDA_PATH
-      PATH_SUFFIXES nvidia/current lib64 lib/x64 lib ${arg_EXTRA_PATH_SUFFIXES}
-    )
-    # Don't try any stub directories intil we have exhausted all other
-    # search locations.
-    if(NOT CUDA_${lib_name}_LIBRARY)
-      find_library(
-        CUDA_${lib_name}_LIBRARY
-        NAMES ${search_names}
-        HINTS ${CUDAToolkit_LIBRARY_DIR} ENV CUDA_PATH
-        PATH_SUFFIXES lib64/stubs lib/x64/stubs lib/stubs stubs
-      )
-    endif()
-
-    mark_as_advanced(CUDA_${lib_name}_LIBRARY)
-
-    if(NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
-      add_library(CUDA::${lib_name} IMPORTED INTERFACE)
-      target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
-      target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}")
-      foreach(dep ${arg_DEPS})
-        if(TARGET CUDA::${dep})
-          target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep})
-        endif()
-      endforeach()
-    endif()
-  endfunction()
-
-  if(NOT TARGET CUDA::toolkit)
-    add_library(CUDA::toolkit IMPORTED INTERFACE)
-    target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
-    target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}")
-  endif()
-
-  _cudatoolkit_find_and_add_import_lib(cuda_driver ALT cuda)
-
-  _cudatoolkit_find_and_add_import_lib(cudart)
-  _cudatoolkit_find_and_add_import_lib(cudart_static)
-
-  # setup dependencies that are required for cudart_static when building
-  # on linux. These are generally only required when using the CUDA toolkit
-  # when CUDA language is disabled
-  if(NOT TARGET CUDA::cudart_static_deps AND TARGET CUDA::cudart_static)
-
-    add_library(CUDA::cudart_static_deps IMPORTED INTERFACE)
-    target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps)
-
-    if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER))
-      find_package(Threads REQUIRED)
-      target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS})
-    endif()
-
-    if(UNIX AND NOT APPLE)
-      # On Linux, you must link against librt when using the static cuda runtime.
-      find_library(CUDAToolkit_rt_LIBRARY rt)
-      mark_as_advanced(CUDAToolkit_rt_LIBRARY)
-      if(NOT CUDAToolkit_rt_LIBRARY)
-        message(WARNING "Could not find librt library, needed by CUDA::cudart_static")
-      else()
-        target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY})
-      endif()
-    endif()
-  endif()
-
-  _cudatoolkit_find_and_add_import_lib(culibos) # it's a static library
-  foreach(cuda_lib cublas cufft curand cusparse nppc nvjpeg)
-    _cudatoolkit_find_and_add_import_lib(${cuda_lib})
-    _cudatoolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos)
-  endforeach()
-
-  # cuFFTW depends on cuFFT
-  _cudatoolkit_find_and_add_import_lib(cufftw DEPS cufft)
-  _cudatoolkit_find_and_add_import_lib(cufftw DEPS cufft_static)
-
-  # cuSOLVER depends on cuBLAS, and cuSPARSE
-  _cudatoolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse)
-  _cudatoolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos)
-
-  # nvGRAPH depends on cuRAND, and cuSOLVER.
-  _cudatoolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver)
-  _cudatoolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static)
-
-  # Process the majority of the NPP libraries.
-  foreach(
-    cuda_lib
-    nppial
-    nppicc
-    nppidei
-    nppif
-    nppig
-    nppim
-    nppist
-    nppitc
-    npps
-    nppicom
-    nppisu
-  )
-    _cudatoolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc)
-    _cudatoolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static)
-  endforeach()
-
-  _cudatoolkit_find_and_add_import_lib(cupti EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ ../extras/CUPTI/lib/)
-  _cudatoolkit_find_and_add_import_lib(cupti_static EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ ../extras/CUPTI/lib/)
-
-  _cudatoolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver)
-
-  _cudatoolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml)
-
-  if(WIN32)
-    # nvtools can be installed outside the CUDA toolkit directory
-    # so prefer the NVTOOLSEXT_PATH windows only environment variable
-    # In addition on windows the most common name is nvToolsExt64_1
-    find_library(
-      CUDA_nvToolsExt_LIBRARY
-      NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt
-      PATHS ENV NVTOOLSEXT_PATH ENV CUDA_PATH
-      PATH_SUFFIXES lib/x64 lib
-    )
-  endif()
-  _cudatoolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64)
-
-  _cudatoolkit_find_and_add_import_lib(OpenCL)
-endif()
-
-if(_CUDAToolkit_Pop_ROOT_PATH)
-  list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0)
-  unset(_CUDAToolkit_Pop_ROOT_PATH)
-endif()
diff --git a/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake
index 89e704377f9..db4ff90e7da 100644
--- a/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake
+++ b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake
@@ -7,10 +7,6 @@ if(NOT CUDAToolkit_ROOT)
   endif()
 endif()
 
-if(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1")
-  message(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
-endif()
-
 set(TPL_CUDA_LIBRARIES "")
 if(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
   set(TPL_CUDA_LIBRARIES CUDA::cuda_driver)
@@ -18,30 +14,6 @@ else()
   set(TPL_CUDA_LIBRARIES CUDA::cuda_driver CUDA::cudart)
 endif()
 
-if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
-  find_package(CUDAToolkit REQUIRED)
-  kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
-  kokkos_export_cmake_tpl(CUDAToolkit REQUIRED)
-else()
-  include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
-
-  if(TARGET CUDA::cudart)
-    set(FOUND_CUDART TRUE)
-    kokkos_export_imported_tpl(CUDA::cudart)
-  else()
-    set(FOUND_CUDART FALSE)
-  endif()
-
-  if(TARGET CUDA::cuda_driver)
-    set(FOUND_CUDA_DRIVER TRUE)
-    kokkos_export_imported_tpl(CUDA::cuda_driver)
-  else()
-    set(FOUND_CUDA_DRIVER FALSE)
-  endif()
-
-  include(FindPackageHandleStandardArgs)
-  find_package_handle_standard_args(TPLCUDA ${DEFAULT_MSG} FOUND_CUDA_DRIVER FOUND_CUDART)
-  if(FOUND_CUDA_DRIVER AND FOUND_CUDART)
-    kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
-  endif()
-endif()
+find_package(CUDAToolkit REQUIRED)
+kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
+kokkos_export_cmake_tpl(CUDAToolkit REQUIRED)
diff --git a/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake b/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake
deleted file mode 100644
index 85ae0b82244..00000000000
--- a/lib/kokkos/cmake/Modules/FindTPLLIBDL.cmake
+++ /dev/null
@@ -1 +0,0 @@
-kokkos_find_imported(LIBDL HEADER dlfcn.h INTERFACE LIBRARIES ${CMAKE_DL_LIBS})
diff --git a/lib/kokkos/cmake/Modules/FindTPLLIBQUADMATH.cmake b/lib/kokkos/cmake/Modules/FindTPLLIBQUADMATH.cmake
index ce428b0aeec..1fa930c7a2e 100644
--- a/lib/kokkos/cmake/Modules/FindTPLLIBQUADMATH.cmake
+++ b/lib/kokkos/cmake/Modules/FindTPLLIBQUADMATH.cmake
@@ -13,6 +13,8 @@ check_cxx_source_compiles(
   }"
   KOKKOS_QUADMATH_COMPILER_SUPPORT
 )
+unset(CMAKE_REQUIRED_LIBRARIES)
+
 if(KOKKOS_QUADMATH_COMPILER_SUPPORT)
   kokkos_create_imported_tpl(LIBQUADMATH INTERFACE LINK_LIBRARIES quadmath)
 else()
diff --git a/lib/kokkos/cmake/Modules/FindTPLROCM.cmake b/lib/kokkos/cmake/Modules/FindTPLROCM.cmake
index 9673af0b9d9..3a69358bebb 100644
--- a/lib/kokkos/cmake/Modules/FindTPLROCM.cmake
+++ b/lib/kokkos/cmake/Modules/FindTPLROCM.cmake
@@ -3,29 +3,7 @@ include(FindPackageHandleStandardArgs)
 find_library(AMD_HIP_LIBRARY amdhip64 PATHS ENV ROCM_PATH PATH_SUFFIXES lib)
 find_library(HSA_RUNTIME_LIBRARY hsa-runtime64 PATHS ENV ROCM_PATH PATH_SUFFIXES lib)
 
-# FIXME_HIP Starting with ROCm 5.5 it is not necessary to link againt clang_rt.
-# We keep the code as is for now because it is hard to find the version of ROCM
-# found.
-# clang_rt.builtins is necessary to use half precision. The following code to
-# find clang_rt.buitins is based on
-# https://github.com/ROCm-Developer-Tools/hipamd/blob/d1e0ee98a0f3d79f7bf43295f82d0053a69ec742/hip-config.cmake.in#L241
-# NOTE: Per the above, we still search for the clang-rt library,
-# but use the user's specified compiler to find the library to avoid use of
-# environment variables / relative paths.
-execute_process(
-  COMMAND ${CMAKE_CXX_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt
-  OUTPUT_VARIABLE CLANG_RT_LIBRARY
-  OUTPUT_STRIP_TRAILING_WHITESPACE
-  RESULT_VARIABLE CLANG_RT_CHECK
-)
-
-if(NOT "${CLANG_RT_CHECK}" STREQUAL "0")
-  # if the above failed, we delete CLANG_RT_LIBRARY to make the args check
-  # below fail
-  unset(CLANG_RT_LIBRARY)
-endif()
-
-find_package_handle_standard_args(TPLROCM DEFAULT_MSG AMD_HIP_LIBRARY HSA_RUNTIME_LIBRARY CLANG_RT_LIBRARY)
+find_package_handle_standard_args(TPLROCM DEFAULT_MSG AMD_HIP_LIBRARY HSA_RUNTIME_LIBRARY)
 
 kokkos_create_imported_tpl(
   ROCM
@@ -33,7 +11,6 @@ kokkos_create_imported_tpl(
   LINK_LIBRARIES
   ${HSA_RUNTIME_LIBRARY}
   ${AMD_HIP_LIBRARY}
-  ${CLANG_RT_LIBRARY}
   COMPILE_DEFINITIONS
   __HIP_ROCclr__
 )
diff --git a/lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake b/lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake
index b4b905795dd..b56b36e1f7c 100644
--- a/lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake
+++ b/lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake
@@ -1,13 +1,3 @@
-# ROCm 5.6 and earlier set AMDGPU_TARGETS and GPU_TARGETS to all the supported
-# architectures. Therefore, we end up compiling Kokkos for all the supported
-# architecture. Starting with ROCm 5.7 AMDGPU_TARGETS and GPU_TARGETS are empty.
-# It is the user's job to set the variables. Since we are injecting the
-# architecture flag ourselves, we can let the variables empty. To replicate the
-# behavior of ROCm 5.7 and later for earlier version of ROCm we set
-# AMDGPU_TARGETS and GPU_TARGETS to empty and set the values in the cache. If
-# the values are not cached, FIND_PACKAGE(rocthrust) will overwrite them.
-set(AMDGPU_TARGETS "" CACHE STRING "AMD GPU targets to compile for")
-set(GPU_TARGETS "" CACHE STRING "GPU targets to compile for")
 find_package(rocthrust REQUIRED)
 kokkos_create_imported_tpl(ROCTHRUST INTERFACE LINK_LIBRARIES roc::rocthrust)
 
diff --git a/lib/kokkos/cmake/compile_tests/amd_apu.cc b/lib/kokkos/cmake/compile_tests/amd_apu.cc
index a9c1edbd57b..bc78f189c1c 100644
--- a/lib/kokkos/cmake/compile_tests/amd_apu.cc
+++ b/lib/kokkos/cmake/compile_tests/amd_apu.cc
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
diff --git a/lib/kokkos/cmake/compile_tests/clang_omp.cpp b/lib/kokkos/cmake/compile_tests/clang_omp.cpp
index 153130f968e..349b951aad1 100644
--- a/lib/kokkos/cmake/compile_tests/clang_omp.cpp
+++ b/lib/kokkos/cmake/compile_tests/clang_omp.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
diff --git a/lib/kokkos/cmake/compile_tests/cplusplus17.cpp b/lib/kokkos/cmake/compile_tests/cplusplus17.cpp
deleted file mode 100644
index b425d23adf8..00000000000
--- a/lib/kokkos/cmake/compile_tests/cplusplus17.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#include 
-
-int main() {
-  // _v versions of type traits were added in C++17
-  if constexpr (std::is_same_v)
-    return 0;
-  else
-    return 1;
-}
diff --git a/lib/kokkos/cmake/compile_tests/cplusplus20.cpp b/lib/kokkos/cmake/compile_tests/cplusplus20.cpp
new file mode 100644
index 00000000000..80e24e89846
--- /dev/null
+++ b/lib/kokkos/cmake/compile_tests/cplusplus20.cpp
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include 
+
+// consteval specifier
+consteval int sqr(int n) { return n * n; }
+static_assert(sqr(100) == 10000);
+
+// conditional explicit
+struct S {
+  explicit(sizeof(int) > 0) S(int) {}
+};
+
+// concepts library
+constexpr std::floating_point auto x2(std::floating_point auto x) {
+  return x + x;
+}
+constexpr std::integral auto x2(std::integral auto x) { return x << 1; }
+
+int main() { return 0; }
diff --git a/lib/kokkos/cmake/compile_tests/cuda_compute_capability.cc b/lib/kokkos/cmake/compile_tests/cuda_compute_capability.cc
index f74bc0f6ca5..daa5228b264 100644
--- a/lib/kokkos/cmake/compile_tests/cuda_compute_capability.cc
+++ b/lib/kokkos/cmake/compile_tests/cuda_compute_capability.cc
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
@@ -32,10 +19,6 @@ int main() {
 #else
   switch (compute_capability) {
       // clang-format off
-    case 30:  std::cout << "Set -DKokkos_ARCH_KEPLER30=ON ." << std::endl; break;
-    case 32:  std::cout << "Set -DKokkos_ARCH_KEPLER32=ON ." << std::endl; break;
-    case 35:  std::cout << "Set -DKokkos_ARCH_KEPLER35=ON ." << std::endl; break;
-    case 37:  std::cout << "Set -DKokkos_ARCH_KEPLER37=ON ." << std::endl; break;
     case 50:  std::cout << "Set -DKokkos_ARCH_MAXWELL50=ON ." << std::endl; break;
     case 52:  std::cout << "Set -DKokkos_ARCH_MAXWELL52=ON ." << std::endl; break;
     case 53:  std::cout << "Set -DKokkos_ARCH_MAXWELL53=ON ." << std::endl; break;
diff --git a/lib/kokkos/cmake/compile_tests/get_sve_hw_vl.cpp b/lib/kokkos/cmake/compile_tests/get_sve_hw_vl.cpp
index 14ff76c2fd9..5e661c3468b 100644
--- a/lib/kokkos/cmake/compile_tests/get_sve_hw_vl.cpp
+++ b/lib/kokkos/cmake/compile_tests/get_sve_hw_vl.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake
index 49eaf883a46..d7556425c1f 100644
--- a/lib/kokkos/cmake/deps/CUDA.cmake
+++ b/lib/kokkos/cmake/deps/CUDA.cmake
@@ -1,19 +1,5 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 # Check for CUDA support
 
diff --git a/lib/kokkos/cmake/deps/HWLOC.cmake b/lib/kokkos/cmake/deps/HWLOC.cmake
index 52d8368d041..71a790ead07 100644
--- a/lib/kokkos/cmake/deps/HWLOC.cmake
+++ b/lib/kokkos/cmake/deps/HWLOC.cmake
@@ -1,19 +1,5 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 #-----------------------------------------------------------------------------
 #  Hardware locality detection and control library.
diff --git a/lib/kokkos/cmake/deps/Pthread.cmake b/lib/kokkos/cmake/deps/Pthread.cmake
index b811f850841..fcfac9a0ec0 100644
--- a/lib/kokkos/cmake/deps/Pthread.cmake
+++ b/lib/kokkos/cmake/deps/Pthread.cmake
@@ -1,19 +1,5 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 set(USE_THREADS FALSE)
 
diff --git a/lib/kokkos/cmake/deps/quadmath.cmake b/lib/kokkos/cmake/deps/quadmath.cmake
index 9006d0cb9ef..faeff696930 100644
--- a/lib/kokkos/cmake/deps/quadmath.cmake
+++ b/lib/kokkos/cmake/deps/quadmath.cmake
@@ -1,18 +1,4 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 kokkos_tpl_find_include_dirs_and_libraries(quadmath REQUIRED_HEADERS quadmath.h REQUIRED_LIBS_NAMES quadmath)
diff --git a/lib/kokkos/cmake/fake_tribits.cmake b/lib/kokkos/cmake/fake_tribits.cmake
index d3fe1e6e2f6..f0323d98cad 100644
--- a/lib/kokkos/cmake/fake_tribits.cmake
+++ b/lib/kokkos/cmake/fake_tribits.cmake
@@ -84,9 +84,7 @@ function(KOKKOS_ADD_TEST)
   endif()
   if(TEST_TOOL)
     add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool
-    set_property(
-      TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$"
-    )
+    set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_TOOLS_LIBS=$")
   endif()
   verify_empty(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS})
 endfunction()
diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake
index 474160313a5..afca80b6030 100644
--- a/lib/kokkos/cmake/kokkos_arch.cmake
+++ b/lib/kokkos/cmake/kokkos_arch.cmake
@@ -83,10 +83,6 @@ if(Kokkos_ENABLE_CUDA
   set(KOKKOS_SHOW_CUDA_ARCHS ON)
 endif()
 
-kokkos_arch_option(KEPLER30 GPU "NVIDIA Kepler generation CC 3.0" "KOKKOS_SHOW_CUDA_ARCHS")
-kokkos_arch_option(KEPLER32 GPU "NVIDIA Kepler generation CC 3.2" "KOKKOS_SHOW_CUDA_ARCHS")
-kokkos_arch_option(KEPLER35 GPU "NVIDIA Kepler generation CC 3.5" "KOKKOS_SHOW_CUDA_ARCHS")
-kokkos_arch_option(KEPLER37 GPU "NVIDIA Kepler generation CC 3.7" "KOKKOS_SHOW_CUDA_ARCHS")
 kokkos_arch_option(MAXWELL50 GPU "NVIDIA Maxwell generation CC 5.0" "KOKKOS_SHOW_CUDA_ARCHS")
 kokkos_arch_option(MAXWELL52 GPU "NVIDIA Maxwell generation CC 5.2" "KOKKOS_SHOW_CUDA_ARCHS")
 kokkos_arch_option(MAXWELL53 GPU "NVIDIA Maxwell generation CC 5.3" "KOKKOS_SHOW_CUDA_ARCHS")
@@ -121,9 +117,12 @@ list(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
 list(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
 list(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
 list(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
-list(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
-list(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
-list(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)
+list(APPEND SUPPORTED_AMD_GPUS RX9070XT RX7900XTX V620/W6800 V620/W6800)
+list(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1201 AMD_GFX1100 NAVI1030 AMD_GFX1030)
+list(APPEND CORRESPONDING_AMD_FLAGS gfx1201 gfx1100 gfx1030 gfx1030)
+list(APPEND SUPPORTED_AMD_GPUS PHOENIX)
+list(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103)
+list(APPEND CORRESPONDING_AMD_FLAGS gfx1103)
 
 #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
 foreach(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
@@ -243,12 +242,12 @@ mark_as_advanced(Kokkos_IMPL_AMDGPU_LINK)
 #clear anything that might be in the cache
 global_set(KOKKOS_AMDGPU_OPTIONS)
 if(KOKKOS_ENABLE_HIP)
+  global_append(KOKKOS_AMDGPU_OPTIONS -xhip)
   set(AMDGPU_ARCH_FLAG "--offload-arch")
   if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
     if(NOT CMAKE_CXX_STANDARD)
-      message(FATAL_ERROR "Kokkos requires CMAKE_CXX_STANDARD to set to 17 or higher")
+      message(FATAL_ERROR "Kokkos requires CMAKE_CXX_STANDARD to set to 20 or higher")
     endif()
-    global_append(KOKKOS_AMDGPU_OPTIONS -xhip)
     if(DEFINED ENV{ROCM_PATH})
       global_append(KOKKOS_AMDGPU_OPTIONS --rocm-path=$ENV{ROCM_PATH})
     endif()
@@ -569,19 +568,15 @@ if(KOKKOS_ARCH_HSW)
 endif()
 
 if(KOKKOS_ARCH_RISCV_SG2042)
-  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
-     OR (KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
-  )
-    message(SEND_ERROR "Only gcc >= 12 and clang >= 14 support RISC-V.")
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12))
+    message(SEND_ERROR "Only gcc >= 12 support RISC-V.")
   endif()
   compiler_specific_flags(COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT -march=rv64imafdcv)
 endif()
 
 if(KOKKOS_ARCH_RISCV_RVA22V)
-  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
-     OR (KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
-  )
-    message(SEND_ERROR "Only gcc >= 12 and clang >= 14 support RISC-V.")
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12))
+    message(SEND_ERROR "Only gcc >= 12 support RISC-V.")
   endif()
   compiler_specific_flags(
     COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT
@@ -590,10 +585,8 @@ if(KOKKOS_ARCH_RISCV_RVA22V)
 endif()
 
 if(KOKKOS_ARCH_RISCV_U74MC)
-  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
-     OR (KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
-  )
-    message(SEND_ERROR "Only gcc >= 12 and clang >= 14 support RISC-V.")
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12))
+    message(SEND_ERROR "Only gcc >= 12 support RISC-V.")
   endif()
   compiler_specific_flags(COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT -march=rv64imafdc_zicntr_zicsr_zifencei_zihpm)
 endif()
@@ -773,7 +766,6 @@ if(KOKKOS_ARCH_NATIVE)
     check_cxx_symbol_exists(__ARM_NEON "" KOKKOS_COMPILER_HAS_ARM_NEON)
     unset(KOKKOS_COMPILER_HAS_AVX CACHE)
     check_cxx_symbol_exists(__AVX__ "" KOKKOS_COMPILER_HAS_AVX)
-    set(CMAKE_REQUIRED_FLAGS "${KOKKOS_COMPILE_OPTIONS}")
 
     unset(CMAKE_REQUIRED_QUIET)
     unset(CMAKE_REQUIRED_FLAGS)
@@ -918,6 +910,7 @@ if(KOKKOS_ENABLE_SYCL)
   endif()
 
   check_cxx_symbol_exists(SYCL_EXT_ONEAPI_GRAPH "sycl/sycl.hpp" KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_GRAPH)
+  unset(CMAKE_REQUIRED_FLAGS)
 endif()
 
 set(CUDA_ARCH_ALREADY_SPECIFIED "")
@@ -965,10 +958,6 @@ endfunction()
 
 #These will define KOKKOS_CUDA_ARCH_FLAG
 #to the corresponding flag name if ON
-check_cuda_arch(KEPLER30 sm_30)
-check_cuda_arch(KEPLER32 sm_32)
-check_cuda_arch(KEPLER35 sm_35)
-check_cuda_arch(KEPLER37 sm_37)
 check_cuda_arch(MAXWELL50 sm_50)
 check_cuda_arch(MAXWELL52 sm_52)
 check_cuda_arch(MAXWELL53 sm_53)
@@ -1265,15 +1254,6 @@ if(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
   endif()
 endif()
 
-#Regardless of version, make sure we define the general architecture name
-if(KOKKOS_ARCH_KEPLER30
-   OR KOKKOS_ARCH_KEPLER32
-   OR KOKKOS_ARCH_KEPLER35
-   OR KOKKOS_ARCH_KEPLER37
-)
-  set(KOKKOS_ARCH_KEPLER ON)
-endif()
-
 #Regardless of version, make sure we define the general architecture name
 if(KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53)
   set(KOKKOS_ARCH_MAXWELL ON)
diff --git a/lib/kokkos/cmake/kokkos_compiler_id.cmake b/lib/kokkos/cmake/kokkos_compiler_id.cmake
index 40e80a75055..dfd218fa28d 100644
--- a/lib/kokkos/cmake/kokkos_compiler_id.cmake
+++ b/lib/kokkos/cmake/kokkos_compiler_id.cmake
@@ -68,12 +68,11 @@ if(INTERNAL_HAVE_COMPILER_NVCC)
   set(KOKKOS_CXX_HOST_COMPILER_ID ${KOKKOS_CXX_COMPILER_ID})
 
   # SET the compiler id to nvcc.  We use the value used by CMake 3.8.
-  set(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL FORCE)
+  set(KOKKOS_CXX_COMPILER_ID NVIDIA)
 
   string(REGEX MATCH "V[0-9]+\\.[0-9]+\\.[0-9]+" TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
   string(SUBSTRING ${TEMP_CXX_COMPILER_VERSION} 1 -1 TEMP_CXX_COMPILER_VERSION)
-  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
-  message(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
+  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
   if(INTERNAL_USE_COMPILER_LAUNCHER)
     message(STATUS "kokkos_launch_compiler (${Kokkos_COMPILE_LAUNCHER}) is enabled...")
     kokkos_compilation(GLOBAL)
@@ -90,41 +89,26 @@ if(Kokkos_ENABLE_HIP)
 
   string(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "HIP version" INTERNAL_COMPILER_VERSION_CONTAINS_HIP)
   if(INTERNAL_COMPILER_VERSION_CONTAINS_HIP GREATER -1)
-    set(KOKKOS_CXX_COMPILER_ID HIPCC CACHE STRING INTERNAL FORCE)
+    set(KOKKOS_CXX_COMPILER_ID HIPCC)
   endif()
 
   string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE})
-  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
-  message(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}")
+  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
 endif()
 
 if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
-  # The Cray compiler reports as Clang to most versions of CMake
-  execute_process(
-    COMMAND ${CMAKE_CXX_COMPILER} --version
-    COMMAND grep -c Cray
-    OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-  )
-  if(INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
-    set(KOKKOS_CLANG_IS_CRAY TRUE)
-    set(KOKKOS_CXX_COMPILER_ID CrayClang)
-  endif()
-  # The clang based Intel compiler reports as Clang to most versions of CMake
-  execute_process(
-    COMMAND ${CMAKE_CXX_COMPILER} --version
-    COMMAND grep -c "DPC++\\|icpx"
-    OUTPUT_VARIABLE INTERNAL_HAVE_INTEL_COMPILER
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-  )
-  if(INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
-    set(KOKKOS_CLANG_IS_INTEL TRUE)
-    set(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE)
+  if(CMAKE_VERSION VERSION_LESS "3.28")
+    # The Cray compiler reports as Clang on older versions of CMake
     execute_process(
-      COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
+      COMMAND ${CMAKE_CXX_COMPILER} --version
+      COMMAND grep -c Cray
+      OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER
       OUTPUT_STRIP_TRAILING_WHITESPACE
     )
-    string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
+    if(INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
+      set(KOKKOS_CLANG_IS_CRAY TRUE)
+      set(KOKKOS_CXX_COMPILER_ID CrayClang)
+    endif()
   endif()
 endif()
 
@@ -139,7 +123,7 @@ if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY)
   if(KOKKOS_CLANG_IS_CRAY)
     set(KOKKOS_CLANG_CRAY_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
   else()
-    set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
+    set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
   endif()
 endif()
 
@@ -151,37 +135,36 @@ if(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu)
   )
 
   string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
-  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
+  set(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
 endif()
 
-# Enforce the minimum compilers supported by Kokkos.
-if(NOT CMAKE_CXX_STANDARD)
-  set(CMAKE_CXX_STANDARD 17)
-endif()
-if(CMAKE_CXX_STANDARD EQUAL 17)
-  set(KOKKOS_CLANG_CPU_MINIMUM 8.0.0)
-  set(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
-  set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
-  set(KOKKOS_GCC_MINIMUM 8.2.0)
-  set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
-  set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2024.2.1)
-  set(KOKKOS_NVCC_MINIMUM 11.0.0)
-  set(KOKKOS_HIPCC_MINIMUM 5.2.0)
-  set(KOKKOS_NVHPC_MINIMUM 22.3)
-  set(KOKKOS_MSVC_MINIMUM 19.29)
-else()
-  set(KOKKOS_CLANG_CPU_MINIMUM 14.0.0)
-  set(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
-  set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
-  set(KOKKOS_GCC_MINIMUM 10.4.0)
-  set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
-  set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2024.2.1)
-  set(KOKKOS_NVCC_MINIMUM 12.0.0)
-  set(KOKKOS_HIPCC_MINIMUM 5.2.0)
-  set(KOKKOS_NVHPC_MINIMUM 22.3)
-  set(KOKKOS_MSVC_MINIMUM 19.30)
+set(Kokkos_LANGUAGES "${KOKKOS_COMPILE_LANGUAGE}")
+if(Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES)
+  if(Kokkos_ENABLE_HIP)
+    set(Kokkos_LANGUAGES "HIP,CXX")
+  elseif(Kokkos_ENABLE_CUDA)
+    set(Kokkos_LANGUAGES "CUDA,CXX")
+  endif()
 endif()
 
+message(
+  STATUS
+    "Kokkos is configured for CMake languages ${Kokkos_LANGUAGES} compilation (using ${KOKKOS_CXX_COMPILER_ID} version ${KOKKOS_CXX_COMPILER_VERSION})"
+)
+unset(Kokkos_LANGUAGES)
+
+# Enforce the minimum compilers supported by Kokkos.
+set(KOKKOS_CLANG_CPU_MINIMUM 14.0.0)
+set(KOKKOS_CLANG_CUDA_MINIMUM 15.0.0)
+set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
+set(KOKKOS_GCC_MINIMUM 10.4.0)
+set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
+set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2024.2.1)
+set(KOKKOS_NVCC_MINIMUM 12.2.0)
+set(KOKKOS_HIPCC_MINIMUM 6.2.0)
+set(KOKKOS_NVHPC_MINIMUM 22.3)
+set(KOKKOS_MSVC_MINIMUM 19.30)
+
 set(KOKKOS_MESSAGE_TEXT
     "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:"
 )
@@ -244,7 +227,7 @@ elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV
   # Treat PGI internally as NVHPC to simplify handling both compilers.
   # Before CMake 3.20 NVHPC was identified as PGI, nvc++ is
   # backward-compatible to pgc++.
-  set(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
+  set(KOKKOS_CXX_COMPILER_ID NVHPC)
 elseif(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
   if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM})
     message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
@@ -260,7 +243,7 @@ endif()
 if(NOT DEFINED KOKKOS_CXX_HOST_COMPILER_ID)
   set(KOKKOS_CXX_HOST_COMPILER_ID ${KOKKOS_CXX_COMPILER_ID})
 elseif(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL PGI)
-  set(KOKKOS_CXX_HOST_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
+  set(KOKKOS_CXX_HOST_COMPILER_ID NVHPC)
 endif()
 
 string(REPLACE "." ";" VERSION_LIST ${KOKKOS_CXX_COMPILER_VERSION})
diff --git a/lib/kokkos/cmake/kokkos_corner_cases.cmake b/lib/kokkos/cmake/kokkos_corner_cases.cmake
deleted file mode 100644
index 530e9e8fd8e..00000000000
--- a/lib/kokkos/cmake/kokkos_corner_cases.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_ENABLE_CUDA_CONSTEXPR AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS
-                                                                               11.2
-)
-  message(
-    WARNING
-      "You have requested -DKokkos_ENABLE_CUDA_CONSTEXPR=ON for NVCC ${KOKKOS_CXX_COMPILER_VERSION} which is known to trigger compiler bugs before NVCC version 11.2. See https://github.com/kokkos/kokkos/issues/3496"
-  )
-endif()
diff --git a/lib/kokkos/cmake/kokkos_enable_devices.cmake b/lib/kokkos/cmake/kokkos_enable_devices.cmake
index 075e832984b..b1da1826771 100644
--- a/lib/kokkos/cmake/kokkos_enable_devices.cmake
+++ b/lib/kokkos/cmake/kokkos_enable_devices.cmake
@@ -112,8 +112,5 @@ kokkos_device_option(SYCL OFF DEVICE "Whether to build SYCL backend")
 
 ## SYCL has extra setup requirements, turn on Kokkos_Setup_SYCL.hpp in macros
 if(KOKKOS_ENABLE_SYCL)
-  if(KOKKOS_CXX_STANDARD LESS 17)
-    message(FATAL_ERROR "SYCL backend requires C++17 or newer!")
-  endif()
   list(APPEND DEVICE_SETUP_LIST SYCL)
 endif()
diff --git a/lib/kokkos/cmake/kokkos_enable_options.cmake b/lib/kokkos/cmake/kokkos_enable_options.cmake
index 9470bc9bb05..0280a18a8f2 100644
--- a/lib/kokkos/cmake/kokkos_enable_options.cmake
+++ b/lib/kokkos/cmake/kokkos_enable_options.cmake
@@ -39,7 +39,8 @@ kokkos_enable_option(IMPL_CUDA_MALLOC_ASYNC OFF "Whether to enable CudaMallocAsy
 kokkos_enable_option(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
 kokkos_enable_option(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
 
-kokkos_enable_option(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available")
+kokkos_enable_option(DEPRECATED_CODE_4 OFF "Whether code deprecated in major release 4 is available")
+kokkos_enable_option(DEPRECATED_CODE_5 ON "Whether code deprecated in major release 5 is available")
 kokkos_enable_option(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings")
 kokkos_enable_option(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
 
@@ -85,6 +86,18 @@ kokkos_enable_option(COMPILER_WARNINGS OFF "Whether to print all compiler warnin
 kokkos_enable_option(TUNING OFF "Whether to create bindings for tuning tools")
 kokkos_enable_option(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
 kokkos_enable_option(COMPILE_AS_CMAKE_LANGUAGE OFF "Whether to use native cmake language support")
+kokkos_enable_option(MULTIPLE_CMAKE_LANGUAGES OFF "Whether to allow Kokkos to be used with multiple CMake languages")
+if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES)
+  message(
+    FATAL_ERROR
+      "Using both Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE and Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES is not allowed."
+  )
+endif()
+if((Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE OR Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES) AND Kokkos_ENABLE_CUDA)
+  if(CMAKE_VERSION VERSION_LESS "3.25.2")
+    message(FATAL_ERROR "Building Kokkos with CUDA as language and c++20 requires CMake version 3.25.2 or higher.")
+  endif()
+endif()
 kokkos_enable_option(
   HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF
   "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time"
@@ -119,7 +132,7 @@ if(Kokkos_ENABLE_EXPERIMENTAL_CXX20_MODULES)
   endif()
 endif()
 
-kokkos_enable_option(IMPL_MDSPAN ON "Whether to enable experimental mdspan support")
+kokkos_enable_option(IMPL_MDSPAN ON "Whether to enable mdspan support (internal use only)")
 kokkos_enable_option(MDSPAN_EXTERNAL OFF "Whether to use an external version of mdspan")
 kokkos_enable_option(
   IMPL_CHECK_POSSIBLY_BREAKING_LAYOUTS
@@ -130,7 +143,20 @@ mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
 mark_as_advanced(Kokkos_ENABLE_MDSPAN_EXTERNAL)
 mark_as_advanced(IMPL_CHECK_POSSIBLY_BREAKING_LAYOUTS)
 
-kokkos_enable_option(IMPL_VIEW_LEGACY ON "Whether to use the legacy implementation of View")
+if(Kokkos_ENABLE_IMPL_MDSPAN)
+  # CUDA 12.9 has a bug that causes it to segfault when mdspan-based view is used:
+  #   see https://github.com/kokkos/kokkos/issues/8126
+  if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9
+     AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 13
+  )
+    set(VIEW_LEGACY_DEFAULT ON)
+  else()
+    set(VIEW_LEGACY_DEFAULT OFF)
+  endif()
+else()
+  set(VIEW_LEGACY_DEFAULT ON)
+endif()
+kokkos_enable_option(IMPL_VIEW_LEGACY ${VIEW_LEGACY_DEFAULT} "Whether to use the legacy implementation of View")
 mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_LEGACY)
 if(NOT Kokkos_ENABLE_IMPL_VIEW_LEGACY AND NOT Kokkos_ENABLE_IMPL_MDSPAN)
   message(FATAL_ERROR "Kokkos_ENABLE_IMPL_MDSPAN must be set to use the new View implementation")
diff --git a/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake b/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake
index 0d31e6d131f..a5a07e4f81d 100644
--- a/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake
+++ b/lib/kokkos/cmake/kokkos_pick_cxx_std.cmake
@@ -5,11 +5,10 @@ kokkos_option(
   CXX_STANDARD
   ""
   STRING
-  "[[DEPRECATED - USE CMAKE_CXX_STANDARD INSTEAD]] The C++ standard for Kokkos to use: 17 or 20. If empty, this will default to CMAKE_CXX_STANDARD. If both CMAKE_CXX_STANDARD and Kokkos_CXX_STANDARD are empty, this will default to 17"
+  "[[DEPRECATED - USE CMAKE_CXX_STANDARD INSTEAD]] The C++ standard for Kokkos to use: 20, 23, and 26. If empty, this will default to CMAKE_CXX_STANDARD. If both CMAKE_CXX_STANDARD and Kokkos_CXX_STANDARD are empty, this will default to 20"
 )
 
 # Set CXX standard flags
-set(KOKKOS_ENABLE_CXX17 OFF)
 set(KOKKOS_ENABLE_CXX20 OFF)
 set(KOKKOS_ENABLE_CXX23 OFF)
 set(KOKKOS_ENABLE_CXX26 OFF)
@@ -21,7 +20,7 @@ if(KOKKOS_CXX_STANDARD)
 endif()
 
 if(NOT CMAKE_CXX_STANDARD)
-  set(KOKKOS_CXX_STANDARD "17")
+  set(KOKKOS_CXX_STANDARD "20")
 else()
   set(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD})
 endif()
diff --git a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake
index 9d1e2e723dd..3a10ff3f6ea 100644
--- a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake
+++ b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake
@@ -31,10 +31,10 @@ function(kokkos_set_cxx_standard_feature standard)
     if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL GNU
                                                    OR KOKKOS_CXX_HOST_COMPILER_ID STREQUAL Clang)
     )
-      if(${KOKKOS_CXX_COMPILER_VERSION} VERSION_LESS 12.0.0)
-        set(SUPPORTED_NVCC_FLAGS "-std=c++17")
+      if(${KOKKOS_CXX_COMPILER_VERSION} VERSION_LESS 13.0.0)
+        set(SUPPORTED_NVCC_FLAGS "-std=c++20")
       else()
-        set(SUPPORTED_NVCC_FLAGS "-std=c++17" "-std=c++20")
+        set(SUPPORTED_NVCC_FLAGS "-std=c++20")
       endif()
       if(NOT ${${STANDARD_NAME}} IN_LIST SUPPORTED_NVCC_FLAGS)
         message(
@@ -73,11 +73,7 @@ function(kokkos_set_cxx_standard_feature standard)
   endif()
 endfunction()
 
-if(KOKKOS_CXX_STANDARD STREQUAL "17")
-  kokkos_set_cxx_standard_feature(17)
-  set(KOKKOS_CXX_INTERMEDIATE_STANDARD "1Z")
-  set(KOKKOS_ENABLE_CXX17 ON)
-elseif(KOKKOS_CXX_STANDARD STREQUAL "20")
+if(KOKKOS_CXX_STANDARD STREQUAL "20")
   kokkos_set_cxx_standard_feature(20)
   set(KOKKOS_CXX_INTERMEDIATE_STANDARD "2A")
   set(KOKKOS_ENABLE_CXX20 ON)
@@ -90,31 +86,31 @@ elseif(KOKKOS_CXX_STANDARD STREQUAL "26")
   set(KOKKOS_CXX_INTERMEDIATE_STANDARD "2C")
   set(KOKKOS_ENABLE_CXX26 ON)
 else()
-  message(FATAL_ERROR "Kokkos requires C++17 or newer but requested ${KOKKOS_CXX_STANDARD}!")
+  message(FATAL_ERROR "Kokkos requires C++20 or newer but requested ${KOKKOS_CXX_STANDARD}!")
 endif()
 
-# Enforce that we can compile a simple C++17 program
+# Enforce that we can compile a simple C++20 program
 
 try_compile(
-  CAN_COMPILE_CPP17 ${KOKKOS_TOP_BUILD_DIR}/corner_cases ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/cplusplus17.cpp
-  OUTPUT_VARIABLE ERROR_MESSAGE CXX_STANDARD 17
+  CAN_COMPILE_CPP20 ${KOKKOS_TOP_BUILD_DIR}/corner_cases ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/cplusplus20.cpp
+  OUTPUT_VARIABLE ERROR_MESSAGE CXX_STANDARD 20
 )
-if(NOT CAN_COMPILE_CPP17)
-  unset(CAN_COMPILE_CPP17 CACHE) #make sure CMake always re-runs this
+if(NOT CAN_COMPILE_CPP20)
+  unset(CAN_COMPILE_CPP20 CACHE) #make sure CMake always re-runs this
   message(
     FATAL_ERROR
-      "C++${KOKKOS_CXX_STANDARD}-compliant compiler detected, but unable to compile C++17 or later program. Verify that ${CMAKE_CXX_COMPILER_ID}:${CMAKE_CXX_COMPILER_VERSION} is set up correctly (e.g., check that correct library headers are being used).\nFailing output:\n ${ERROR_MESSAGE}"
+      "C++${KOKKOS_CXX_STANDARD}-compliant compiler detected, but unable to compile C++20 or later program. Verify that ${CMAKE_CXX_COMPILER_ID}:${CMAKE_CXX_COMPILER_VERSION} is set up correctly (e.g., check that correct library headers are being used).\nFailing output:\n ${ERROR_MESSAGE}"
   )
 endif()
-unset(CAN_COMPILE_CPP17 CACHE) #make sure CMake always re-runs this
+unset(CAN_COMPILE_CPP20 CACHE) #make sure CMake always re-runs this
 
 # Enforce that extensions are turned off for nvcc_wrapper.
 # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's
-# flags for turning on C++17.  Since for compiler ID and versioning purposes
+# flags for turning on C++20.  Since for compiler ID and versioning purposes
 # CMake recognizes the host compiler when calling nvcc_wrapper, this just
-# works.  Both NVCC and nvcc_wrapper only recognize '-std=c++17' which means
+# works.  Both NVCC and nvcc_wrapper only recognize '-std=c++20' which means
 # that we can only use host compilers for CUDA builds that use those flags.
-# It also means that extensions (gnu++17) can't be turned on for CUDA builds.
+# It also means that extensions (gnu++20) can't be turned on for CUDA builds.
 
 if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
   if(NOT DEFINED CMAKE_CXX_EXTENSIONS)
@@ -127,9 +123,6 @@ endif()
 if(KOKKOS_ENABLE_CUDA)
   # ENFORCE that the compiler can compile CUDA code.
   if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
-    if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.0.0)
-      message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.")
-    endif()
     if(NOT DEFINED CMAKE_CXX_EXTENSIONS)
       set(CMAKE_CXX_EXTENSIONS OFF)
     elseif(CMAKE_CXX_EXTENSIONS)
diff --git a/lib/kokkos/cmake/kokkos_tpls.cmake b/lib/kokkos/cmake/kokkos_tpls.cmake
index f43aff4d1f0..e811a5b4c4e 100644
--- a/lib/kokkos/cmake/kokkos_tpls.cmake
+++ b/lib/kokkos/cmake/kokkos_tpls.cmake
@@ -33,19 +33,6 @@ else()
 endif()
 kokkos_tpl_option(ROCM ${ROCM_DEFAULT})
 kokkos_tpl_option(ROCTHRUST ${ROCTHRUST_DEFAULT})
-if(Kokkos_ENABLE_ROCTHRUST)
-  include(CheckCXXSourceCompiles)
-  check_cxx_source_compiles(
-    "
-    #include 
-    int main() {
-      static_assert(_GLIBCXX_RELEASE < 9);
-      return 0;
-    }
-    "
-    Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG
-  )
-endif()
 
 if(KOKKOS_ENABLE_SYCL)
   set(ONEDPL_DEFAULT ON)
@@ -59,7 +46,7 @@ if(WIN32)
 else()
   set(LIBDL_DEFAULT On)
 endif()
-kokkos_tpl_option(LIBDL ${LIBDL_DEFAULT} TRIBITS DLlib)
+kokkos_enable_option(LIBDL ${LIBDL_DEFAULT} "Whether to enable the LIBDL library")
 
 if(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX)
   set(HPX_DEFAULT ON)
@@ -81,7 +68,6 @@ kokkos_tpl_option(LIBQUADMATH ${LIBQUADMATH_DEFAULT} TRIBITS quadmath)
 kokkos_import_tpl(HPX INTERFACE)
 kokkos_import_tpl(CUDA INTERFACE)
 kokkos_import_tpl(HWLOC)
-kokkos_import_tpl(LIBDL)
 if(NOT WIN32)
   kokkos_import_tpl(THREADS INTERFACE)
 endif()
diff --git a/lib/kokkos/cmake/kokkos_tribits.cmake b/lib/kokkos/cmake/kokkos_tribits.cmake
index 83fbc964cf6..c22b56ef3e4 100644
--- a/lib/kokkos/cmake/kokkos_tribits.cmake
+++ b/lib/kokkos/cmake/kokkos_tribits.cmake
@@ -109,8 +109,6 @@ function(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
       OR Kokkos_ENABLE_EXPERIMENTAL_CXX20_MODULES
       OR Kokkos_ENABLE_SYCL
       OR Kokkos_ENABLE_HPX
-      OR Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG
-      OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.3.0)
       OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_HOST_COMPILER_ID STREQUAL "MSVC"))
   )
     if(MSVC)
@@ -163,7 +161,6 @@ macro(KOKKOS_SETUP_BUILD_ENVIRONMENT)
     include(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake)
     set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
     include(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake)
-    include(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
   endif()
 endmacro()
 
@@ -198,6 +195,9 @@ macro(KOKKOS_CONFIGURE_CORE)
     KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare"
     "${KOKKOS_ENABLED_DEVICES}"
   )
+  if(BUILD_SHARED_LIBS)
+    set(KOKKOS_IMPL_BUILD_SHARED_LIBS ON)
+  endif()
   configure_file(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
 endmacro()
 
@@ -235,44 +235,47 @@ endmacro()
 function(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
   cmake_parse_arguments(PARSE "PLAIN_STYLE" "" "" ${ARGN})
 
-  if((NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18"))
+  #allow multiple languages to be used downstream
+  set(Kokkos_LANGUAGES "${KOKKOS_COMPILE_LANGUAGE}")
+  if(Kokkos_ENABLE_MULTIPLE_CMAKE_LANGUAGES)
+    if(Kokkos_ENABLE_HIP)
+      set(Kokkos_LANGUAGES "HIP,CXX")
+    elseif(Kokkos_ENABLE_CUDA)
+      set(Kokkos_LANGUAGES "CUDA,CXX")
+    endif()
+  endif()
+
+  if(NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
     #I can use link options
     #check for CXX linkage using the simple 3.18 way
-    target_link_options(${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_LINK_OPTIONS}>)
+    target_link_options(${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_LINK_OPTIONS}>)
   else()
     #I can use link options
     #just assume CXX linkage
     target_link_options(${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS})
   endif()
 
-  #required for check_linker_flag
-  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
-    #exclude case of compiler_launcher. The launcher forwards to nvcc_wrapper and shadow the CXX compiler that CMake sees (compiler_launcher changes the compiler).
-    #The CXX compiler CMake will invoke for the check is not able to consume the cuda flags if it is not nvcc_wrapper or clang+cuda.
-    #FIXME_NVHPC nvc++ is failing the check spuriously with various version numbers.
-    if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
-       AND (NOT (KOKKOS_ENABLE_CUDA) OR ("${CMAKE_CXX_COMPILER}" MATCHES "nvcc_wrapper") OR (${KOKKOS_CXX_COMPILER_ID}
-                                                                                             STREQUAL Clang))
-    )
-      kokkos_check_flags(LINKER LANGUAGE ${KOKKOS_COMPILE_LANGUAGE} FLAGS ${KOKKOS_LINK_OPTIONS})
-    endif()
+  #exclude case of compiler_launcher. The launcher forwards to nvcc_wrapper and shadow the CXX compiler that CMake sees (compiler_launcher changes the compiler).
+  #The CXX compiler CMake will invoke for the check is not able to consume the cuda flags if it is not nvcc_wrapper or clang+cuda.
+  #FIXME_NVHPC nvc++ is failing the check spuriously with various version numbers.
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
+     AND (NOT (KOKKOS_ENABLE_CUDA) OR ("${CMAKE_CXX_COMPILER}" MATCHES "nvcc_wrapper") OR (${KOKKOS_CXX_COMPILER_ID}
+                                                                                           STREQUAL Clang))
+  )
+    kokkos_check_flags(LINKER LANGUAGE ${KOKKOS_COMPILE_LANGUAGE} FLAGS ${KOKKOS_LINK_OPTIONS})
   endif()
 
   list(APPEND ALL_KOKKOS_COMPILER_FLAGS ${KOKKOS_COMPILE_OPTIONS})
-  target_compile_options(
-    ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_COMPILE_OPTIONS}>
-  )
+  target_compile_options(${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_COMPILE_OPTIONS}>)
 
   target_compile_definitions(
-    ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_COMPILE_DEFINITIONS}>
+    ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_COMPILE_DEFINITIONS}>
   )
 
   target_link_libraries(${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_LIBRARIES})
 
   if(KOKKOS_ENABLE_CUDA)
-    target_compile_options(
-      ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_CUDA_OPTIONS}>
-    )
+    target_compile_options(${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_CUDA_OPTIONS}>)
     set(NODEDUP_CUDAFE_OPTIONS)
     foreach(OPT ${KOKKOS_CUDAFE_OPTIONS})
       list(APPEND NODEDUP_CUDAFE_OPTIONS -Xcudafe ${OPT})
@@ -286,9 +289,7 @@ function(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
   endif()
 
   if(KOKKOS_ENABLE_HIP)
-    target_compile_options(
-      ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_AMDGPU_OPTIONS}>
-    )
+    target_compile_options(${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_AMDGPU_OPTIONS}>)
     list(APPEND ALL_KOKKOS_COMPILER_FLAGS ${KOKKOS_AMDGPU_OPTIONS})
   endif()
 
@@ -313,25 +314,23 @@ function(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
     list(APPEND ALL_KOKKOS_COMPILER_FLAGS ${NODEDUP_XCOMPILER_OPTIONS})
   endif()
 
-  #required for check_compiler_flag
-  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
-    #exclude case of compiler_launcher. The launcher forwards to nvcc_wrapper and shadow the CXX compiler that CMake sees (compiler_launcher changes the compiler).
-    #The CXX compiler CMake will invoke for the check is not able to consume the cuda flags if it is not nvcc_wrapper or clang+cuda.
-    #FIXME_NVHPC nvc++ is failing the check spuriously with various version numbers.
-    if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
-       AND (NOT (KOKKOS_ENABLE_CUDA) OR ("${CMAKE_CXX_COMPILER}" MATCHES "nvcc_wrapper") OR (${KOKKOS_CXX_COMPILER_ID}
-                                                                                             STREQUAL Clang))
+  #exclude case of compiler_launcher. The launcher forwards to nvcc_wrapper and shadow the CXX compiler that CMake sees (compiler_launcher changes the compiler).
+  #The CXX compiler CMake will invoke for the check is not able to consume the cuda flags if it is not nvcc_wrapper or clang+cuda.
+  #FIXME_NVHPC nvc++ is failing the check spuriously with various version numbers.
+  #FIXME CLANG+RDC with std=c++20 and CMake 3.22+ the runtime is not linked in the CheckCompilerFlag leading to false positives
+  if(NOT (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
+     AND (NOT (KOKKOS_ENABLE_CUDA) OR ("${CMAKE_CXX_COMPILER}" MATCHES "nvcc_wrapper")
+          OR (${KOKKOS_CXX_COMPILER_ID} STREQUAL Clang AND NOT KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE))
+  )
+    kokkos_check_flags(
+      COMPILER
+      LANGUAGE
+      ${KOKKOS_COMPILE_LANGUAGE}
+      FLAGS
+      ${ALL_KOKKOS_COMPILER_FLAGS}
+      LINKER_FLAGS
+      ${KOKKOS_LINK_OPTIONS}
     )
-      kokkos_check_flags(
-        COMPILER
-        LANGUAGE
-        ${KOKKOS_COMPILE_LANGUAGE}
-        FLAGS
-        ${ALL_KOKKOS_COMPILER_FLAGS}
-        LINKER_FLAGS
-        ${KOKKOS_LINK_OPTIONS}
-      )
-    endif()
   endif()
 
   if(KOKKOS_CXX_STANDARD_FEATURE)
@@ -384,7 +383,9 @@ function(KOKKOS_ADD_LIBRARY LIBRARY_NAME)
   if(PARSE_SHARED OR BUILD_SHARED_LIBS)
     set_target_properties(
       ${LIBRARY_NAME} PROPERTIES VERSION ${Kokkos_VERSION} SOVERSION ${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}
+                                 WINDOWS_EXPORT_ALL_SYMBOLS ON
     )
+    target_compile_definitions(${LIBRARY_NAME} PRIVATE KOKKOS_IMPL_EXPORT_SYMBOLS)
   endif()
 
   kokkos_internal_add_library_install(${LIBRARY_NAME})
diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake
index 52d8368d041..71a790ead07 100644
--- a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake
+++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake
@@ -1,19 +1,5 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 #-----------------------------------------------------------------------------
 #  Hardware locality detection and control library.
diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake
index f51bce5d64d..f27b0a0e106 100644
--- a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake
+++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake
@@ -1,19 +1,5 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 set(USE_THREADS FALSE)
 
diff --git a/lib/kokkos/cmake/tpls/FindTPLquadmath.cmake b/lib/kokkos/cmake/tpls/FindTPLquadmath.cmake
index b449f45135a..a14048cc9f0 100644
--- a/lib/kokkos/cmake/tpls/FindTPLquadmath.cmake
+++ b/lib/kokkos/cmake/tpls/FindTPLquadmath.cmake
@@ -1,18 +1,4 @@
-#@HEADER
-# ************************************************************************
-#
-#                        Kokkos v. 4.0
-#       Copyright (2022) National Technology & Engineering
-#               Solutions of Sandia, LLC (NTESS).
-#
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-#
-# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-#
+# SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-# ************************************************************************
-# @HEADER
 
 tribits_tpl_find_include_dirs_and_libraries(quadmath REQUIRED_HEADERS quadmath.h REQUIRED_LIBS_NAMES quadmath)
diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile
deleted file mode 100644
index 914193da764..00000000000
--- a/lib/kokkos/containers/performance_tests/Makefile
+++ /dev/null
@@ -1,103 +0,0 @@
-KOKKOS_PATH = ../..
-
-GTEST_PATH = ../../TPL/gtest
-
-vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
-
-default: build_all
-	echo "End Build"
-
-ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
-else
-  CXX = g++
-endif
-
-CXXFLAGS = -O3
-LINK ?= $(CXX)
-LDFLAGS ?=
-override LDFLAGS += -lpthread
-
-KOKKOS_USE_DEPRECATED_MAKEFILES=1
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
-
-TEST_TARGETS =
-TARGETS =
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
-	TARGETS += KokkosContainers_PerformanceTest_Cuda
-	TEST_TARGETS += test-cuda
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
-	OBJ_HIP = TestHIP.o TestMain.o gtest-all.o
-	TARGETS += KokkosContainers_PerformanceTest_HIP
-	TEST_TARGETS += test-hip
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
-	OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
-	TARGETS += KokkosContainers_PerformanceTest_Threads
-	TEST_TARGETS += test-threads
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-	OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
-	TARGETS += KokkosContainers_PerformanceTest_OpenMP
-	TEST_TARGETS += test-openmp
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
-	OBJ_HPX = TestHPX.o TestMain.o gtest-all.o
-	TARGETS += KokkosContainers_PerformanceTest_HPX
-	TEST_TARGETS += test-hpx
-endif
-
-KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
-
-KokkosContainers_PerformanceTest_HIP: $(OBJ_HIP) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HIP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_HIP
-
-KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
-
-KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
-
-KokkosContainers_PerformanceTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_HPX
-
-test-cuda: KokkosContainers_PerformanceTest_Cuda
-	./KokkosContainers_PerformanceTest_Cuda
-
-test-hip: KokkosContainers_PerformanceTest_HIP
-	./KokkosContainers_PerformanceTest_HIP
-
-test-threads: KokkosContainers_PerformanceTest_Threads
-	./KokkosContainers_PerformanceTest_Threads
-
-test-openmp: KokkosContainers_PerformanceTest_OpenMP
-	./KokkosContainers_PerformanceTest_OpenMP
-
-test-hpx: KokkosContainers_PerformanceTest_HPX
-	./KokkosContainers_PerformanceTest_HPX
-
-build_all: $(TARGETS)
-
-test: $(TEST_TARGETS)
-
-clean: kokkos-clean
-	rm -f *.o $(TARGETS)
-
-# Compilation rules
-
-%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
-
-gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp
index 71b1c1d4ee8..fa7e13b102a 100644
--- a/lib/kokkos/containers/performance_tests/TestCuda.cpp
+++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
@@ -25,14 +12,17 @@
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_impl;
+#else
 #include 
-
-#include 
-
 #include 
+#endif
 
+#include 
 #include 
-
 #include 
 
 namespace Performance {
diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
index 18de6c9723e..674610a4974 100644
--- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
+++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp
@@ -1,24 +1,17 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_DYNRANKVIEW_HPP
 #define KOKKOS_TEST_DYNRANKVIEW_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dyn_rank_view;
+#else
 #include 
 #include 
+#endif
 #include 
 
 #include 
diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp
index ac11d2493ff..05df2590cfa 100644
--- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp
+++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp
@@ -1,24 +1,17 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
 #define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_map;
+#else
 #include 
 #include 
+#endif
 #include 
 #include 
 
diff --git a/lib/kokkos/containers/performance_tests/TestHIP.cpp b/lib/kokkos/containers/performance_tests/TestHIP.cpp
index 56680f7d648..64f3c4f11ae 100644
--- a/lib/kokkos/containers/performance_tests/TestHIP.cpp
+++ b/lib/kokkos/containers/performance_tests/TestHIP.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
@@ -25,14 +12,17 @@
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_map;
+#else
 #include 
-
-#include 
-
 #include 
+#endif
 
+#include 
 #include 
-
 #include 
 
 namespace Performance {
diff --git a/lib/kokkos/containers/performance_tests/TestHPX.cpp b/lib/kokkos/containers/performance_tests/TestHPX.cpp
index b3bfb2140b2..b1bdcc22da2 100644
--- a/lib/kokkos/containers/performance_tests/TestHPX.cpp
+++ b/lib/kokkos/containers/performance_tests/TestHPX.cpp
@@ -1,26 +1,18 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_map;
+#else
 #include 
-
 #include 
+#endif
 
 #include 
 #include 
diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp
index 0bf647012ac..d05f0e5deba 100644
--- a/lib/kokkos/containers/performance_tests/TestMain.cpp
+++ b/lib/kokkos/containers/performance_tests/TestMain.cpp
@@ -1,23 +1,15 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 int main(int argc, char *argv[]) {
   Kokkos::initialize(argc, argv);
diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
index 0e16200961d..db4f4ec7aac 100644
--- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
+++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
@@ -1,26 +1,18 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_map;
+#else
 #include 
-
 #include 
+#endif
 
 #include 
 #include 
diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp
index 953b8bff6e5..777565c51a2 100644
--- a/lib/kokkos/containers/performance_tests/TestScatterView.hpp
+++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp
@@ -1,23 +1,15 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
 #define KOKKOS_TEST_SCATTER_VIEW_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.scatter_view;
+#else
 #include 
+#endif
 #include 
 
 namespace Perf {
diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp
index d32b09702af..ccb6488a998 100644
--- a/lib/kokkos/containers/performance_tests/TestThreads.cpp
+++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp
@@ -1,26 +1,18 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.unordered_map;
+#else
 #include 
-
 #include 
+#endif
 
 #include 
 
diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp
index 6d3764a989d..ada0f659e21 100644
--- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp
+++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
 #define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt
index b386fbe6750..2ea2f6a2c64 100644
--- a/lib/kokkos/containers/src/CMakeLists.txt
+++ b/lib/kokkos/containers/src/CMakeLists.txt
@@ -10,6 +10,21 @@ set(KOKKOS_CONTAINER_HEADERS)
 append_glob(KOKKOS_CONTAINERS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
 append_glob(KOKKOS_CONTAINERS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
 
+set(KOKKOS_CONTAINERS_MODULES
+    Kokkos_Bitset.cppm
+    Kokkos_DualView.cppm
+    Kokkos_DynamicView.cppm
+    Kokkos_DynRankView.cppm
+    Kokkos_DynRankView_Impl.cppm
+    Kokkos_ErrorReporter.cppm
+    Kokkos_Functional.cppm
+    Kokkos_OffsetView.cppm
+    Kokkos_ScatterView.cppm
+    Kokkos_ScatterView_Impl.cppm
+    Kokkos_UnorderedMap.cppm
+    Kokkos_UnorderedMap_Impl.cppm
+)
+
 install(
   DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
   DESTINATION ${KOKKOS_HEADER_DIR}
@@ -17,7 +32,15 @@ install(
   PATTERN "*.hpp"
 )
 
-kokkos_add_library(kokkoscontainers SOURCES ${KOKKOS_CONTAINERS_SRCS} HEADERS ${KOKKOS_CONTAINERS_HEADERS})
+kokkos_add_library(
+  kokkoscontainers
+  SOURCES
+  ${KOKKOS_CONTAINERS_SRCS}
+  HEADERS
+  ${KOKKOS_CONTAINERS_HEADERS}
+  MODULE_INTERFACE
+  ${KOKKOS_CONTAINERS_MODULES}
+)
 
 kokkos_lib_include_directories(
   kokkoscontainers ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.cppm b/lib/kokkos/containers/src/Kokkos_Bitset.cppm
new file mode 100644
index 00000000000..c5d7d2ee051
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_Bitset.cppm
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.bitset;
+
+export {
+  namespace Kokkos {
+  using ::Kokkos::Bitset;
+  using ::Kokkos::ConstBitset;
+
+  using ::Kokkos::deep_copy;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp
index ed1936db1b3..6d2f739d7fc 100644
--- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp
+++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_BITSET_HPP
 #define KOKKOS_BITSET_HPP
@@ -21,9 +8,14 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
-#include 
 
 #include 
 
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.cppm b/lib/kokkos/containers/src/Kokkos_DualView.cppm
new file mode 100644
index 00000000000..dc8a2abfffe
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_DualView.cppm
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.dual_view;
+
+export {
+  namespace Kokkos {
+  using ::Kokkos::DualView;
+
+  using ::Kokkos::is_dual_view;
+  using ::Kokkos::is_dual_view_v;
+
+  using ::Kokkos::deep_copy;
+
+  using ::Kokkos::realloc;
+  using ::Kokkos::resize;
+
+  using ::Kokkos::subview;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp
index b902bbcb34e..fe0726df0f8 100644
--- a/lib/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /// \file Kokkos_DualView.hpp
 /// \brief Declaration and definition of Kokkos::DualView.
@@ -27,9 +14,17 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DUALVIEW
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
 
+#include 
+
 namespace Kokkos {
 
 /* \class DualView
@@ -140,7 +135,7 @@ class DualView : public ViewTraits {
 
   /// \typedef t_host
   /// \brief The type of a Kokkos::View host mirror of \c t_dev.
-  using t_host = typename t_dev::HostMirror;
+  using t_host = typename t_dev::host_mirror_type;
 
   //! The type of a const View on the device.
   //! The type of a Kokkos::View on the device.
@@ -153,7 +148,7 @@ class DualView : public ViewTraits {
 
   /// \typedef t_host_const
   /// \brief The type of a const View host mirror of \c t_dev_const.
-  using t_host_const = typename t_dev_const::HostMirror;
+  using t_host_const = typename t_dev_const::host_mirror_type;
 
   //! The type of a const, random-access View on the device.
   using t_dev_const_randomread =
@@ -164,7 +159,8 @@ class DualView : public ViewTraits {
   /// \typedef t_host_const_randomread
   /// \brief The type of a const, random-access View host mirror of
   ///   \c t_dev_const_randomread.
-  using t_host_const_randomread = typename t_dev_const_randomread::HostMirror;
+  using t_host_const_randomread =
+      typename t_dev_const_randomread::host_mirror_type;
 
   //! The type of an unmanaged View on the device.
   using t_dev_um =
@@ -196,7 +192,7 @@ class DualView : public ViewTraits {
   /// \brief The type of a const, random-access View host mirror of
   ///   \c t_dev_const_randomread.
   using t_host_const_randomread_um =
-      typename t_dev_const_randomread_um::HostMirror;
+      typename t_dev_const_randomread_um::host_mirror_type;
 
   //@}
   //! \name Counters to keep track of changes ("modified" flags)
@@ -332,7 +328,7 @@ class DualView : public ViewTraits {
   /// modify() methods to ensure synchronization of the View objects.
   ///
   /// \param d_view_ Device View
-  /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
+  /// \param h_view_ Host View (must have type t_host = t_dev::host_mirror_type)
   DualView(const t_dev& d_view_, const t_host& h_view_)
       : modified_flags(t_modified_flags("DualView::modified_flags")),
         d_view(d_view_),
@@ -1009,8 +1005,7 @@ class DualView : public ViewTraits {
         resync_host(properties);
 
         /* Mark Device copy as modified */
-        if constexpr (!impl_dualview_is_single_device)
-          ++modified_flags(1);
+        if constexpr (!impl_dualview_is_single_device) ++modified_flags(1);
       }
     };
 
@@ -1024,8 +1019,7 @@ class DualView : public ViewTraits {
         resync_device(properties);
 
         /* Mark Host copy as modified */
-        if constexpr (!impl_dualview_is_single_device)
-          ++modified_flags(0);
+        if constexpr (!impl_dualview_is_single_device) ++modified_flags(0);
       }
     };
 
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.cppm b/lib/kokkos/containers/src/Kokkos_DynRankView.cppm
new file mode 100644
index 00000000000..488f05f235d
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView.cppm
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.dyn_rank_view;
+
+export {
+  namespace Kokkos {
+  using ::Kokkos::DynRankView;
+
+  using ::Kokkos::is_dyn_rank_view;
+  using ::Kokkos::is_dyn_rank_view_v;
+
+  using ::Kokkos::Subdynrankview;
+  using ::Kokkos::subdynrankview;
+  using ::Kokkos::subview;
+
+  using ::Kokkos::rank;
+
+  using ::Kokkos::deep_copy;
+  using ::Kokkos::realloc;
+  using ::Kokkos::resize;
+
+  using ::Kokkos::create_mirror;
+  using ::Kokkos::create_mirror_view;
+  using ::Kokkos::create_mirror_view_and_copy;
+
+  using ::Kokkos::operator!=;
+  using ::Kokkos::operator==;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
index db75076addf..4586b5ad097 100644
--- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /// \file Kokkos_DynRankView.hpp
 /// \brief Declaration and definition of Kokkos::DynRankView.
@@ -27,7 +14,13 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNRANKVIEW
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
 #include 
 #include 
@@ -65,31 +58,10 @@ struct DynRankDimTraits {
   KOKKOS_INLINE_FUNCTION
   static size_t computeRank(const size_t N0, const size_t N1, const size_t N2,
                             const size_t N3, const size_t N4, const size_t N5,
-                            const size_t N6, const size_t /* N7 */) {
-    return (
-        (N6 == unspecified && N5 == unspecified && N4 == unspecified &&
-         N3 == unspecified && N2 == unspecified && N1 == unspecified &&
-         N0 == unspecified)
-            ? 0
-            : ((N6 == unspecified && N5 == unspecified && N4 == unspecified &&
-                N3 == unspecified && N2 == unspecified && N1 == unspecified)
-                   ? 1
-                   : ((N6 == unspecified && N5 == unspecified &&
-                       N4 == unspecified && N3 == unspecified &&
-                       N2 == unspecified)
-                          ? 2
-                          : ((N6 == unspecified && N5 == unspecified &&
-                              N4 == unspecified && N3 == unspecified)
-                                 ? 3
-                                 : ((N6 == unspecified && N5 == unspecified &&
-                                     N4 == unspecified)
-                                        ? 4
-                                        : ((N6 == unspecified &&
-                                            N5 == unspecified)
-                                               ? 5
-                                               : ((N6 == unspecified)
-                                                      ? 6
-                                                      : 7)))))));
+                            const size_t N6, const size_t N7) {
+    return (N0 != unspecified) + (N1 != unspecified) + (N2 != unspecified) +
+           (N3 != unspecified) + (N4 != unspecified) + (N5 != unspecified) +
+           (N6 != unspecified) + (N7 != unspecified);
   }
 
   // Compute the rank of the view from the nonzero layout arguments.
@@ -119,7 +91,8 @@ struct DynRankDimTraits {
       (std::is_same_v ||
        std::is_same_v),
       Layout>
-  createLayout(const Layout& layout) {
+  createLayout(const Layout& layout,
+               [[maybe_unused]] size_t new_rank = unspecified) {
     Layout new_layout(
         layout.dimension[0] != unspecified ? layout.dimension[0] : 1,
         layout.dimension[1] != unspecified ? layout.dimension[1] : 1,
@@ -146,6 +119,11 @@ struct DynRankDimTraits {
     } else
 #endif
       new_layout.stride = layout.stride;
+    if constexpr (std::is_same_v) {
+      if (new_rank != unspecified && new_rank > 0 &&
+          layout.dimension[new_rank - 1] == layout.stride)
+        new_layout.stride = unspecified;
+    }
     return new_layout;
   }
 
@@ -153,7 +131,8 @@ struct DynRankDimTraits {
   template 
   KOKKOS_INLINE_FUNCTION static std::enable_if_t<
       (std::is_same_v), Layout>
-  createLayout(const Layout& layout) {
+  createLayout(const Layout& layout,
+               [[maybe_unused]] size_t new_rank = unspecified) {
     return Layout(
         layout.dimension[0] != unspecified ? layout.dimension[0] : 1,
         layout.stride[0],
@@ -394,7 +373,8 @@ class ViewMapping<
         src.layout());  // Check this for integer input1 for padding, etc
     dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle::assign(
         src.m_map.m_impl_handle, src.m_track.m_tracker);
-    dst.m_track.m_tracker.assign(src.m_track.m_tracker, DstTraits::is_managed);
+    dst.m_track.m_tracker.assign(src.m_track.m_tracker,
+                                 !DstTraits::memory_traits::is_unmanaged);
     dst.m_rank = Kokkos::View::rank();
   }
 };
@@ -479,9 +459,14 @@ class DynRankView : private View {
   using reference_type = typename view_type::reference_type;
   using pointer_type   = typename view_type::pointer_type;
 
-  using scalar_array_type           = value_type;
-  using const_scalar_array_type     = const_value_type;
-  using non_const_scalar_array_type = non_const_value_type;
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
+  using scalar_array_type KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use data_type instead.") = data_type;
+  using const_scalar_array_type KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use const_data_type instead.") = const_data_type;
+  using non_const_scalar_array_type KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use non_const_data_type instead.") = non_const_data_type;
+#endif
 #ifndef KOKKOS_ENABLE_IMPL_VIEW_LEGACY
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
   using specialize KOKKOS_DEPRECATED = void;
@@ -499,6 +484,11 @@ class DynRankView : private View {
   using reference        = reference_type;
   using data_handle_type = pointer_type;
 
+#ifndef KOKKOS_ENABLE_IMPL_VIEW_LEGACY
+  using accessor_type = typename view_type::accessor_type;
+  using mapping_type  = typename view_type::mapping_type;
+#endif
+
   KOKKOS_FUNCTION
   view_type& DownCast() const { return (view_type&)(*this); }
 
@@ -511,14 +501,19 @@ class DynRankView : private View {
   // KOKKOS_FUNCTION
   // view_type to_view() const { return *this; }
 
-  // Types below - at least the HostMirror requires the value_type, NOT the rank
-  // 7 data_type of the traits
+  // Types below - at least host_mirror_type requires the value_type, NOT the
+  // rank 7 data_type of the traits
 
   /** \brief  Compatible view of array of scalar types */
-  using array_type = DynRankView<
-      typename drvtraits::scalar_array_type, typename drvtraits::array_layout,
+  using type = DynRankView<
+      typename drvtraits::data_type, typename drvtraits::array_layout,
       typename drvtraits::device_type, typename drvtraits::memory_traits>;
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
+  /** \brief  Compatible view of array of scalar types */
+  using array_type KOKKOS_DEPRECATED_WITH_COMMENT("Use type instead.") = type;
+#endif
+
   /** \brief  Compatible view of const data type */
   using const_type = DynRankView<
       typename drvtraits::const_data_type, typename drvtraits::array_layout,
@@ -529,12 +524,16 @@ class DynRankView : private View {
       typename drvtraits::non_const_data_type, typename drvtraits::array_layout,
       typename drvtraits::device_type, typename drvtraits::memory_traits>;
 
-  /** \brief  Compatible HostMirror view */
-  using HostMirror = DynRankView;
+  /** \brief  Compatible host mirror view */
+  using host_mirror_type = DynRankView;
 
-  using host_mirror_type = HostMirror;
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  /** \brief  Compatible HostMirror view */
+  using HostMirror KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use host_mirror_type instead.") = host_mirror_type;
+#endif
   //----------------------------------------
   // Domain rank and extents
 
@@ -607,14 +606,16 @@ class DynRankView : private View {
   using view_type::span;
   using view_type::span_is_contiguous;  // FIXME: not tested
   using view_type::stride;              // FIXME: not tested
-  using view_type::stride_0;            // FIXME: not tested
-  using view_type::stride_1;            // FIXME: not tested
-  using view_type::stride_2;            // FIXME: not tested
-  using view_type::stride_3;            // FIXME: not tested
-  using view_type::stride_4;            // FIXME: not tested
-  using view_type::stride_5;            // FIXME: not tested
-  using view_type::stride_6;            // FIXME: not tested
-  using view_type::stride_7;            // FIXME: not tested
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  using view_type::stride_0;  // FIXME: not tested
+  using view_type::stride_1;  // FIXME: not tested
+  using view_type::stride_2;  // FIXME: not tested
+  using view_type::stride_3;  // FIXME: not tested
+  using view_type::stride_4;  // FIXME: not tested
+  using view_type::stride_5;  // FIXME: not tested
+  using view_type::stride_6;  // FIXME: not tested
+  using view_type::stride_7;  // FIXME: not tested
+#endif
   using view_type::use_count;
 
 #ifdef KOKKOS_ENABLE_CUDA
@@ -756,7 +757,7 @@ class DynRankView : private View {
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
   KOKKOS_FUNCTION reference_type operator[](index_type i0) const {
     if constexpr (std::is_same_v) {
+                                 typename drvtraits::data_type>) {
       return view_type::data()[i0];
     } else {
       const size_t dim_scalar = view_type::impl_map().dimension_scalar();
@@ -848,7 +849,7 @@ class DynRankView : private View {
       : view_type(rhs.data_handle(),
                   Impl::mapping_from_array_layout<
                       typename view_type::mdspan_type::mapping_type>(
-                      drdtraits::createLayout(rhs.layout())),
+                      drdtraits::createLayout(rhs.layout(), new_rank)),
                   rhs.accessor()),
         m_rank(new_rank) {
     if (new_rank > View::rank())
@@ -863,7 +864,7 @@ class DynRankView : private View {
         view_type(rhs.data_handle(),
                   Impl::mapping_from_array_layout<
                       typename view_type::mdspan_type::mapping_type>(
-                      drdtraits::createLayout(rhs.layout())),
+                      drdtraits::createLayout(rhs.layout(), rhs.rank())),
                   rhs.accessor()));
     m_rank = rhs.rank();
     return *this;
@@ -922,9 +923,10 @@ class DynRankView : private View {
       std::enable_if_t<((!std::is_same_v)&&...),
                        const typename traits::array_layout&>
           layout) {
-    if constexpr (traits::impl_is_customized) {
+    if constexpr (traits::impl_is_customized &&
+                  !Impl::ViewCtorProp::has_accessor_arg) {
       int r = 0;
-      while (r < 7 && layout.dimension[r] != KOKKOS_INVALID_INDEX) r++;
+      while (r < 8 && layout.dimension[r] != KOKKOS_INVALID_INDEX) r++;
 
       // Can't use with_properties_if_unset since its a host only function!
       return view_wrap(
@@ -943,7 +945,7 @@ class DynRankView : private View {
     if constexpr (traits::impl_is_customized &&
                   !Impl::ViewCtorProp::has_accessor_arg) {
       int r = 0;
-      while (r < 7 && layout.dimension[r] != KOKKOS_INVALID_INDEX) r++;
+      while (r < 8 && layout.dimension[r] != KOKKOS_INVALID_INDEX) r++;
       // Could use with_properties_if_unset, but rather keep same as above.
       return view_alloc(
           static_cast&>(arg_prop).value...,
@@ -956,23 +958,6 @@ class DynRankView : private View {
  public:
 #endif
 
-  // With NVCC 11.0 and 11.2 (and others likely) using GCC 8.5 a DynRankView
-  // test fails at runtime where construction from layout drops some extents.
-  // The bug goes away with O1.
-  // FIXME: NVCC GCC8 optimization bug DynRankView
-#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_GNU)
-#if KOKKOS_COMPILER_GNU < 900
-#define KOKKOS_IMPL_SKIP_OPTIMIZATION
-#endif
-#endif
-
-#ifdef KOKKOS_IMPL_SKIP_OPTIMIZATION
-// Also need to suppress warning about unrecognized GCC optimize pragma
-#pragma push
-#pragma diag_suppress = unrecognized_gcc_pragma
-#pragma GCC push_options
-#pragma GCC optimize("O1")
-#endif
   template 
   explicit KOKKOS_FUNCTION DynRankView(
       const Kokkos::Impl::ViewCtorProp& arg_prop,
@@ -1014,12 +999,6 @@ class DynRankView : private View {
   }
 #endif
 
-#ifdef KOKKOS_IMPL_SKIP_OPTIMIZATION
-#pragma GCC pop_options
-#pragma pop
-#undef KOKKOS_IMPL_SKIP_OPTIMIZATION
-#endif
-
         //----------------------------------------
         // Constructor(s)
 
@@ -1619,23 +1598,16 @@ namespace Impl {
 template 
 inline auto create_mirror(const DynRankView& src,
                           const Impl::ViewCtorProp& arg_prop) {
-  check_view_ctor_args_create_mirror();
-
-  auto prop_copy = Impl::with_properties_if_unset(
-      arg_prop, std::string(src.label()).append("_mirror"));
-
   if constexpr (Impl::ViewCtorProp::has_memory_space) {
     using dst_type = typename Impl::MirrorDRViewType<
         typename Impl::ViewCtorProp::memory_space, T,
         P...>::dest_view_type;
-    return dst_type(prop_copy,
-                    Impl::reconstructLayout(src.layout(), src.rank()));
+    return dst_type(create_mirror(arg_prop, src.DownCast()), src.rank());
   } else {
     using src_type = DynRankView;
-    using dst_type = typename src_type::HostMirror;
+    using dst_type = typename src_type::host_mirror_type;
 
-    return dst_type(prop_copy,
-                    Impl::reconstructLayout(src.layout(), src.rank()));
+    return dst_type(create_mirror(arg_prop, src.DownCast()), src.rank());
   }
 #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
     !defined(KOKKOS_COMPILER_MSVC)
@@ -1706,12 +1678,12 @@ inline auto create_mirror_view(
         arg_prop) {
   if constexpr (!Impl::ViewCtorProp::has_memory_space) {
     if constexpr (std::is_same_v::memory_space,
+                                 typename DynRankView::
+                                     host_mirror_type::memory_space> &&
+                  std::is_same_v::data_type,
                                  typename DynRankView<
-                                     T, P...>::HostMirror::memory_space> &&
-                  std::is_same_v<
-                      typename DynRankView::data_type,
-                      typename DynRankView::HostMirror::data_type>) {
-      return typename DynRankView::HostMirror(src);
+                                     T, P...>::host_mirror_type::data_type>) {
+      return typename DynRankView::host_mirror_type(src);
     } else {
       return Kokkos::Impl::choose_create_mirror(src, arg_prop);
     }
@@ -1847,7 +1819,7 @@ inline void impl_resize(const Impl::ViewCtorProp& arg_prop,
   using drview_type      = DynRankView;
   using alloc_prop_input = Impl::ViewCtorProp;
 
-  static_assert(Kokkos::ViewTraits::is_managed,
+  static_assert(!Kokkos::ViewTraits::memory_traits::is_unmanaged,
                 "Can only resize managed views");
   static_assert(!alloc_prop_input::has_label,
                 "The view constructor arguments passed to Kokkos::resize "
@@ -1927,7 +1899,7 @@ inline void impl_realloc(DynRankView& v, const size_t n0,
   using drview_type      = DynRankView;
   using alloc_prop_input = Impl::ViewCtorProp;
 
-  static_assert(Kokkos::ViewTraits::is_managed,
+  static_assert(!Kokkos::ViewTraits::memory_traits::is_unmanaged,
                 "Can only realloc managed views");
   static_assert(!alloc_prop_input::has_label,
                 "The view constructor arguments passed to Kokkos::realloc must "
@@ -1990,7 +1962,7 @@ namespace Experimental {
 template 
 struct python_view_type> {
   using type = Kokkos::Impl::python_view_type_impl_t<
-      typename DynRankView::array_type>;
+      typename DynRankView::type>;
 };
 }  // namespace Experimental
 
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView_Impl.cppm b/lib/kokkos/containers/src/Kokkos_DynRankView_Impl.cppm
new file mode 100644
index 00000000000..9308420fd8a
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView_Impl.cppm
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.dyn_rank_view_impl;
+
+export {
+  namespace Kokkos::Impl {
+  using ::Kokkos::Impl::ApplyToViewOfStaticRank;
+  using ::Kokkos::Impl::as_view_of_rank_n;
+  }  // namespace Kokkos::Impl
+}
diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.cppm b/lib/kokkos/containers/src/Kokkos_DynamicView.cppm
new file mode 100644
index 00000000000..e0259b49a31
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_DynamicView.cppm
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.dynamic_view;
+
+export {
+  namespace Kokkos {
+  namespace Experimental {
+  using ::Kokkos::Experimental::DynamicView;
+  }
+
+  using ::Kokkos::is_dynamic_view;
+  using ::Kokkos::is_dynamic_view_v;
+
+  using ::Kokkos::create_mirror;
+  using ::Kokkos::create_mirror_view;
+  using ::Kokkos::create_mirror_view_and_copy;
+
+  using ::Kokkos::deep_copy;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
index e436e8f9f28..acc75990097 100644
--- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_DYNAMIC_VIEW_HPP
 #define KOKKOS_DYNAMIC_VIEW_HPP
@@ -23,9 +10,17 @@
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
 
+#include 
+
 namespace Kokkos {
 namespace Experimental {
 
@@ -267,10 +262,15 @@ class DynamicView : public Kokkos::ViewTraits {
 
  public:
   //----------------------------------------------------------------------
+  /** \brief  Compatible view of data type */
+  using uniform_type =
+      DynamicView;
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
   /** \brief  Compatible view of array of scalar types */
-  using array_type =
-      DynamicView;
+  using array_type KOKKOS_DEPRECATED_WITH_COMMENT("Use uniform_type instead.") =
+      uniform_type;
+#endif
 
   /** \brief  Compatible view of const data type */
   using const_type = DynamicView {
                                      typename traits::device_type>;
 
   /** \brief  Must be accessible everywhere */
-  using HostMirror = DynamicView;
+  using host_mirror_type = DynamicView;
 
   /** \brief Unified types */
   using uniform_device =
       Kokkos::Device;
-  using uniform_type               = array_type;
   using uniform_const_type         = const_type;
-  using uniform_runtime_type       = array_type;
+  using uniform_runtime_type       = uniform_type;
   using uniform_runtime_const_type = const_type;
   using uniform_nomemspace_type =
       DynamicView;
@@ -334,14 +333,18 @@ class DynamicView : public Kokkos::ViewTraits {
     return r == 0 ? size() : 1;
   }
 
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return 0; }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return 0; }
+  // clang-format off
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(0) instead") KOKKOS_FUNCTION constexpr size_t stride_0() const { return stride(0); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(1) instead") KOKKOS_FUNCTION constexpr size_t stride_1() const { return stride(1); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(2) instead") KOKKOS_FUNCTION constexpr size_t stride_2() const { return stride(2); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(3) instead") KOKKOS_FUNCTION constexpr size_t stride_3() const { return stride(3); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(4) instead") KOKKOS_FUNCTION constexpr size_t stride_4() const { return stride(4); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(5) instead") KOKKOS_FUNCTION constexpr size_t stride_5() const { return stride(5); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(6) instead") KOKKOS_FUNCTION constexpr size_t stride_6() const { return stride(6); }
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use stride(7) instead") KOKKOS_FUNCTION constexpr size_t stride_7() const { return stride(7); }
+#endif
+  // clang-format on
 
   template 
   KOKKOS_INLINE_FUNCTION void stride(iType* const s) const {
@@ -617,8 +620,9 @@ inline auto create_mirror(const Kokkos::Experimental::DynamicView& src,
 
     return ret;
   } else {
-    auto ret = typename Kokkos::Experimental::DynamicView::HostMirror(
-        prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
+    auto ret =
+        typename Kokkos::Experimental::DynamicView::host_mirror_type(
+            prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
 
     ret.resize_serial(src.extent(0));
 
@@ -695,16 +699,18 @@ inline auto create_mirror_view(
     const Kokkos::Experimental::DynamicView& src,
     [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) {
   if constexpr (!Impl::ViewCtorProp::has_memory_space) {
-    if constexpr (std::is_same_v::memory_space,
-                                 typename Kokkos::Experimental::DynamicView<
-                                     T, P...>::HostMirror::memory_space> &&
+    if constexpr (std::is_same_v<
+                      typename Kokkos::Experimental::DynamicView<
+                          T, P...>::memory_space,
+                      typename Kokkos::Experimental::DynamicView<
+                          T, P...>::host_mirror_type::memory_space> &&
                   std::is_same_v::data_type,
                                  typename Kokkos::Experimental::DynamicView<
-                                     T, P...>::HostMirror::data_type>) {
+                                     T, P...>::host_mirror_type::data_type>) {
       return
-          typename Kokkos::Experimental::DynamicView::HostMirror(src);
+          typename Kokkos::Experimental::DynamicView::host_mirror_type(
+              src);
     } else {
       return Kokkos::Impl::choose_create_mirror(src, arg_prop);
     }
diff --git a/lib/kokkos/containers/src/Kokkos_ErrorReporter.cppm b/lib/kokkos/containers/src/Kokkos_ErrorReporter.cppm
new file mode 100644
index 00000000000..5bbbe3426ca
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_ErrorReporter.cppm
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.error_reporter;
+
+export {
+  namespace Kokkos {
+  namespace Experimental {
+  using ::Kokkos::Experimental::ErrorReporter;
+  }
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
index 5d5d0965a0a..7f08725ff05 100644
--- a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
+++ b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
 #define KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
@@ -21,58 +8,116 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ERRORREPORTER
 #endif
 
-#include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
-#include 
-#include 
+#endif
+
+#include 
+#include 
+#include 
+#include 
 
 namespace Kokkos {
 namespace Experimental {
-
-template 
+template 
 class ErrorReporter {
  public:
   using report_type     = ReportType;
   using device_type     = DeviceType;
   using execution_space = typename device_type::execution_space;
 
-  ErrorReporter(int max_results)
-      : m_numReportsAttempted(""),
-        m_reports("", max_results),
-        m_reporters("", max_results) {
+  ErrorReporter(const std::string &label, int max_results)
+      : m_numReportsAttempted(label + "::m_numReportsAttempted"),
+        m_reports(label + "::m_reports", max_results),
+        m_reporters(label + "::m_reporters", max_results) {
     clear();
   }
 
-  int getCapacity() const { return m_reports.view_host().extent(0); }
+  ErrorReporter(int max_results)
+      : ErrorReporter("ErrorReporter", max_results) {}
 
-  int getNumReports();
+  int capacity() const { return m_reports.extent(0); }
 
-  int getNumReportAttempts();
+  int num_reports() const {
+    return std::clamp(num_report_attempts(), 0, capacity());
+  }
 
+  int num_report_attempts() const {
+    int value;
+    Kokkos::deep_copy(value, m_numReportsAttempted);
+    return value;
+  }
+
+  auto get_reports() const {
+    int num_reps = num_reports();
+    std::vector reporters_out(num_reps);
+    std::vector reports_out(num_reps);
+
+    if (num_reps > 0) {
+      Kokkos::View h_reporters(reporters_out.data(),
+                                                         num_reps);
+      Kokkos::View h_reports(
+          reports_out.data(), num_reps);
+
+      Kokkos::deep_copy(
+          h_reporters, Kokkos::subview(m_reporters, Kokkos::pair{0, num_reps}));
+      Kokkos::deep_copy(h_reports,
+                        Kokkos::subview(m_reports, Kokkos::pair{0, num_reps}));
+    }
+    return std::pair{std::move(reporters_out), std::move(reports_out)};
+  }
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use capacity() instead")
+  int getCapacity() const { return capacity(); }
+
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use num_reports() instead")
+  int getNumReports() const { return num_reports(); }
+
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use num_report_attempts() instead")
+  int getNumReportAttempts() const { return num_report_attempts(); }
+
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use get_reports() instead")
   void getReports(std::vector &reporters_out,
                   std::vector &reports_out);
+  KOKKOS_DEPRECATED_WITH_COMMENT("Use get_reports() instead")
   void getReports(
-      typename Kokkos::View::HostMirror
-          &reporters_out,
-      typename Kokkos::View::HostMirror
+      typename Kokkos::View::
+          host_mirror_type &reporters_out,
+      typename Kokkos::View<
+          report_type *, typename DeviceType::execution_space>::host_mirror_type
           &reports_out);
+#endif
+
+  bool full() const { return (num_report_attempts() >= capacity()); }
 
-  void clear();
+  void clear() const { Kokkos::deep_copy(m_numReportsAttempted, 0); }
 
-  void resize(const size_t new_size);
+  // This function keeps reports up to new_size alive
+  // It may lose the information on attempted reports
+  void resize(const size_t new_size) {
+    // We have to reset the attempts so we don't accidently
+    // report more stored reports than there actually are
+    // after growing capacity.
+    int num_reps = num_report_attempts();
+    if (new_size > static_cast(capacity()) && num_reps > capacity())
+      Kokkos::deep_copy(m_numReportsAttempted, num_reports());
 
-  bool full() { return (getNumReportAttempts() >= getCapacity()); }
+    Kokkos::resize(m_reports, new_size);
+    Kokkos::resize(m_reporters, new_size);
+  }
 
   KOKKOS_INLINE_FUNCTION
   bool add_report(int reporter_id, report_type report) const {
-    int idx = Kokkos::atomic_fetch_add(&m_numReportsAttempted(), 1);
+    int idx = Kokkos::atomic_fetch_inc(&m_numReportsAttempted());
 
-    if (idx >= 0 &&
-        (idx < static_cast(m_reports.view_device().extent(0)))) {
-      m_reporters.view_device()(idx) = reporter_id;
-      m_reports.view_device()(idx)   = report;
+    if (idx >= 0 && (idx < m_reports.extent_int(0))) {
+      m_reporters(idx) = reporter_id;
+      m_reports(idx)   = report;
       return true;
     } else {
       return false;
@@ -80,91 +125,56 @@ class ErrorReporter {
   }
 
  private:
-  using reports_view_t     = Kokkos::View;
-  using reports_dualview_t = Kokkos::DualView;
-
-  using host_mirror_space = typename reports_dualview_t::host_mirror_space;
   Kokkos::View m_numReportsAttempted;
-  reports_dualview_t m_reports;
-  Kokkos::DualView m_reporters;
+  Kokkos::View m_reports;
+  Kokkos::View m_reporters;
 };
 
-template 
-inline int ErrorReporter::getNumReports() {
-  int num_reports = 0;
-  Kokkos::deep_copy(num_reports, m_numReportsAttempted);
-  if (num_reports > static_cast(m_reports.view_host().extent(0))) {
-    num_reports = m_reports.view_host().extent(0);
-  }
-  return num_reports;
-}
-
-template 
-inline int ErrorReporter::getNumReportAttempts() {
-  int num_reports = 0;
-  Kokkos::deep_copy(num_reports, m_numReportsAttempted);
-  return num_reports;
-}
-
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
 template 
 void ErrorReporter::getReports(
     std::vector &reporters_out, std::vector &reports_out) {
-  int num_reports = getNumReports();
   reporters_out.clear();
-  reporters_out.reserve(num_reports);
   reports_out.clear();
-  reports_out.reserve(num_reports);
+  int num_reps = num_reports();
 
-  if (num_reports > 0) {
-    m_reports.template sync();
-    m_reporters.template sync();
+  if (num_reps > 0) {
+    reporters_out.resize(num_reps);
+    reports_out.resize(num_reps);
 
-    for (int i = 0; i < num_reports; ++i) {
-      reporters_out.push_back(m_reporters.view_host()(i));
-      reports_out.push_back(m_reports.view_host()(i));
-    }
+    Kokkos::View h_reporters(reporters_out.data(),
+                                                       num_reps);
+    Kokkos::View h_reports(reports_out.data(),
+                                                             num_reps);
+
+    Kokkos::deep_copy(h_reporters,
+                      Kokkos::subview(m_reporters, Kokkos::pair{0, num_reps}));
+    Kokkos::deep_copy(h_reports,
+                      Kokkos::subview(m_reports, Kokkos::pair{0, num_reps}));
   }
 }
 
 template 
 void ErrorReporter::getReports(
-    typename Kokkos::View<
-        int *, typename DeviceType::execution_space>::HostMirror &reporters_out,
-    typename Kokkos::View::HostMirror
-        &reports_out) {
-  int num_reports = getNumReports();
-  reporters_out   = typename Kokkos::View::HostMirror(
-      "ErrorReport::reporters_out", num_reports);
-  reports_out = typename Kokkos::View::HostMirror(
-      "ErrorReport::reports_out", num_reports);
-
-  if (num_reports > 0) {
-    m_reports.template sync();
-    m_reporters.template sync();
-
-    for (int i = 0; i < num_reports; ++i) {
-      reporters_out(i) = m_reporters.view_host()(i);
-      reports_out(i)   = m_reports.view_host()(i);
-    }
+    typename Kokkos::View::
+        host_mirror_type &reporters_out,
+    typename Kokkos::View::
+        host_mirror_type &reports_out) {
+  int num_reps  = num_reports();
+  reporters_out = typename Kokkos::View::host_mirror_type(
+      "ErrorReport::reporters_out", num_reps);
+  reports_out =
+      typename Kokkos::View::host_mirror_type(
+          "ErrorReport::reports_out", num_reps);
+
+  if (num_reps > 0) {
+    Kokkos::deep_copy(reporters_out,
+                      Kokkos::subview(m_reporters, Kokkos::pair{0, num_reps}));
+    Kokkos::deep_copy(reports_out,
+                      Kokkos::subview(m_reports, Kokkos::pair{0, num_reps}));
   }
 }
-
-template 
-void ErrorReporter::clear() {
-  int num_reports = 0;
-  Kokkos::deep_copy(m_numReportsAttempted, num_reports);
-  m_reports.template modify();
-  m_reporters.template modify();
-}
-
-template 
-void ErrorReporter::resize(const size_t new_size) {
-  m_reports.resize(new_size);
-  m_reporters.resize(new_size);
-  typename DeviceType::execution_space().fence(
-      "Kokkos::Experimental::ErrorReporter::resize: fence after resizing");
-}
+#endif
 
 }  // namespace Experimental
 }  // namespace Kokkos
diff --git a/lib/kokkos/containers/src/Kokkos_Functional.cppm b/lib/kokkos/containers/src/Kokkos_Functional.cppm
new file mode 100644
index 00000000000..9a6039fd0ce
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_Functional.cppm
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.functional;
+
+export {
+  namespace Kokkos {
+  using ::Kokkos::equal_to;
+  using ::Kokkos::greater;
+  using ::Kokkos::greater_equal;
+  using ::Kokkos::less;
+  using ::Kokkos::less_equal;
+  using ::Kokkos::not_equal_to;
+  using ::Kokkos::pod_equal_to;
+  using ::Kokkos::pod_hash;
+  using ::Kokkos::pod_not_equal_to;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_Functional.hpp b/lib/kokkos/containers/src/Kokkos_Functional.hpp
index 4d39716cc3f..2064d604566 100644
--- a/lib/kokkos/containers/src/Kokkos_Functional.hpp
+++ b/lib/kokkos/containers/src/Kokkos_Functional.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_FUNCTIONAL_HPP
 #define KOKKOS_FUNCTIONAL_HPP
diff --git a/lib/kokkos/containers/src/Kokkos_OffsetView.cppm b/lib/kokkos/containers/src/Kokkos_OffsetView.cppm
new file mode 100644
index 00000000000..9df204b0b73
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_OffsetView.cppm
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.offset_view;
+
+export {
+  namespace Kokkos {
+  namespace Experimental {
+  using ::Kokkos::Experimental::OffsetView;
+
+  using ::Kokkos::Experimental::is_offset_view;
+  using ::Kokkos::Experimental::is_offset_view_v;
+
+  using ::Kokkos::Experimental::index_list_type;
+  using ::Kokkos::Experimental::IndexRange;
+
+  using ::Kokkos::Experimental::operator==;
+  using ::Kokkos::Experimental::operator!=;
+  }  // namespace Experimental
+
+  using ::Kokkos::create_mirror;
+  using ::Kokkos::create_mirror_view;
+  using ::Kokkos::create_mirror_view_and_copy;
+
+  using ::Kokkos::deep_copy;
+
+  using ::Kokkos::subview;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp
index 03af9fe7fbd..3182ff8caa1 100644
--- a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_OFFSETVIEW_HPP_
 #define KOKKOS_OFFSETVIEW_HPP_
@@ -21,7 +8,13 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_OFFSETVIEW
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 
 #include 
 
@@ -218,11 +211,15 @@ class OffsetView : public View {
 
  public:
   //----------------------------------------
+  /** \brief  Compatible view of data type */
+  using type =
+      OffsetView;
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
   /** \brief  Compatible view of array of scalar types */
-  using array_type =
-      OffsetView;
+  using array_type KOKKOS_DEPRECATED_WITH_COMMENT("Use type instead.") = type;
+#endif
 
   /** \brief  Compatible view of const data type */
   using const_type =
@@ -236,10 +233,16 @@ class OffsetView : public View {
                  typename traits::array_layout, typename traits::device_type,
                  typename traits::memory_traits>;
 
+  /** \brief  Compatible host mirror view */
+  using host_mirror_type = OffsetView;
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
   /** \brief  Compatible HostMirror view */
-  using HostMirror = OffsetView;
+  using HostMirror KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use host_mirror_type instead.") = host_mirror_type;
+#endif
 
   template 
   KOKKOS_FUNCTION typename base_t::reference_type offset_operator(
@@ -248,36 +251,21 @@ class OffsetView : public View {
   }
 
   template 
-#ifndef KOKKOS_ENABLE_CXX17
     requires(std::is_convertible_v &&
              std::is_nothrow_constructible_v &&
              (base_t::rank() == 1))
-#endif
   KOKKOS_FUNCTION constexpr typename base_t::reference_type operator[](
       const OtherIndexType& idx) const {
-#ifdef KOKKOS_ENABLE_CXX17
-    static_assert(std::is_convertible_v &&
-                  std::is_nothrow_constructible_v &&
-                  (base_t::rank() == 1));
-#endif
     return base_t::operator[](idx - m_begins[0]);
   }
 
   template 
-#ifndef KOKKOS_ENABLE_CXX17
     requires((std::is_convertible_v && ...) &&
              (std::is_nothrow_constructible_v &&
               ...) &&
              (sizeof...(OtherIndexTypes) == base_t::rank()))
-#endif
   KOKKOS_FUNCTION constexpr typename base_t::reference_type operator()(
       OtherIndexTypes... indices) const {
-#ifdef KOKKOS_ENABLE_CXX17
-    static_assert(
-        (std::is_convertible_v && ...) &&
-        (std::is_nothrow_constructible_v && ...) &&
-        (sizeof...(OtherIndexTypes) == base_t::rank()));
-#endif
     return offset_operator(std::make_index_sequence(),
                            indices...);
   }
@@ -300,7 +288,7 @@ class OffsetView : public View {
   // interoperability with View
  private:
   using view_type =
-      View;
 
  public:
@@ -1109,6 +1097,7 @@ KOKKOS_INLINE_FUNCTION
   return offsetView;
 }
 }  // namespace Impl
+}  // namespace Experimental
 
 template 
 KOKKOS_INLINE_FUNCTION
@@ -1117,15 +1106,32 @@ KOKKOS_INLINE_FUNCTION
             void /* deduce subview type from source view traits */
             ,
             ViewTraits, Args...>::type>::type
-    subview(const OffsetView& src, Args... args) {
+    subview(const Kokkos::Experimental::OffsetView& src,
+            Args... args) {
   static_assert(
-      OffsetView::rank() == sizeof...(Args),
+      Kokkos::Experimental::OffsetView::rank() == sizeof...(Args),
       "subview requires one argument for each source OffsetView rank");
 
   return Kokkos::Experimental::Impl::subview_offset(src, args...);
 }
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+namespace Experimental {
+template 
+KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::subview instead")
+KOKKOS_INLINE_FUNCTION
+    typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<
+        typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            ,
+            ViewTraits, Args...>::type>::type
+    subview(const Kokkos::Experimental::OffsetView& src,
+            Args... args) {
+  return Kokkos::subview(src, args...);
+}
 }  // namespace Experimental
+#endif
+
 }  // namespace Kokkos
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -1309,7 +1315,7 @@ inline auto create_mirror(const Kokkos::Experimental::OffsetView& src,
                                          src.begin(4), src.begin(5),
                                          src.begin(6), src.begin(7)});
   } else {
-    return typename Kokkos::Experimental::OffsetView::HostMirror(
+    return typename Kokkos::Experimental::OffsetView::host_mirror_type(
         Kokkos::create_mirror(arg_prop, src.view()), src.begins());
   }
 #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
@@ -1383,16 +1389,18 @@ inline auto create_mirror_view(
     const Kokkos::Experimental::OffsetView& src,
     [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) {
   if constexpr (!Impl::ViewCtorProp::has_memory_space) {
-    if constexpr (std::is_same_v::memory_space,
-                                 typename Kokkos::Experimental::OffsetView<
-                                     T, P...>::HostMirror::memory_space> &&
+    if constexpr (std::is_same_v<
+                      typename Kokkos::Experimental::OffsetView<
+                          T, P...>::memory_space,
+                      typename Kokkos::Experimental::OffsetView<
+                          T, P...>::host_mirror_type::memory_space> &&
                   std::is_same_v::data_type,
                                  typename Kokkos::Experimental::OffsetView<
-                                     T, P...>::HostMirror::data_type>) {
+                                     T, P...>::host_mirror_type::data_type>) {
       return
-          typename Kokkos::Experimental::OffsetView::HostMirror(src);
+          typename Kokkos::Experimental::OffsetView::host_mirror_type(
+              src);
     } else {
       return Kokkos::Impl::choose_create_mirror(src, arg_prop);
     }
diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView.cppm b/lib/kokkos/containers/src/Kokkos_ScatterView.cppm
new file mode 100644
index 00000000000..8d90c9a0eea
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_ScatterView.cppm
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.scatter_view;
+
+export {
+  namespace Kokkos {
+
+  namespace Experimental {
+  using ::Kokkos::Experimental::ScatterView;
+
+  using ::Kokkos::Experimental::contribute;
+  using ::Kokkos::Experimental::create_scatter_view;
+
+  using ::Kokkos::Experimental::is_scatter_view;
+  using ::Kokkos::Experimental::is_scatter_view_v;
+
+  using ::Kokkos::Experimental::ScatterDuplicated;
+  using ::Kokkos::Experimental::ScatterNonDuplicated;
+
+  using ::Kokkos::Experimental::ScatterAccess;
+
+  using ::Kokkos::Experimental::ScatterAtomic;
+  using ::Kokkos::Experimental::ScatterNonAtomic;
+
+  using ::Kokkos::Experimental::ScatterMax;
+  using ::Kokkos::Experimental::ScatterMin;
+  using ::Kokkos::Experimental::ScatterProd;
+  using ::Kokkos::Experimental::ScatterSum;
+  }  // namespace Experimental
+
+  using ::Kokkos::realloc;
+  using ::Kokkos::resize;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp
index e68efe56782..5a7b76df370 100644
--- a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /// \file Kokkos_ScatterView.hpp
 /// \brief Declaration and definition of Kokkos::ScatterView.
@@ -27,8 +14,16 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SCATTERVIEW
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
+#include 
+#include 
 
 namespace Kokkos {
 namespace Experimental {
diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView_Impl.cppm b/lib/kokkos/containers/src/Kokkos_ScatterView_Impl.cppm
new file mode 100644
index 00000000000..6fe02d068d2
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_ScatterView_Impl.cppm
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.scatter_view_impl;
+
+export {
+  namespace Kokkos::Impl::Experimental {
+  using ::Kokkos::Impl::Experimental::DefaultContribution;
+  using ::Kokkos::Impl::Experimental::DefaultDuplication;
+  }  // namespace Kokkos::Impl::Experimental
+}
diff --git a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
index e1f31c6f910..a3b6b871782 100644
--- a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
+++ b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_STATICCRSGRAPH_HPP
 #define KOKKOS_STATICCRSGRAPH_HPP
@@ -288,9 +275,15 @@ class StaticCrsGraph {
 
   using staticcrsgraph_type =
       StaticCrsGraph;
-  using HostMirror = StaticCrsGraph;
+
+  using host_mirror_type = StaticCrsGraph;
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  using HostMirror KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use host_mirror_type instead.") = host_mirror_type;
+#endif
 
   using row_map_type =
       View;
@@ -401,14 +394,14 @@ typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
 template 
 typename StaticCrsGraph::HostMirror
+                        SizeType>::host_mirror_type
 create_mirror_view(const StaticCrsGraph& input);
 
 template 
 typename StaticCrsGraph::HostMirror
+                        SizeType>::host_mirror_type
 create_mirror(const StaticCrsGraph& input);
 
diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.cppm b/lib/kokkos/containers/src/Kokkos_UnorderedMap.cppm
new file mode 100644
index 00000000000..e7e3264ef26
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.cppm
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.unordered_map;
+
+export {
+  namespace Kokkos {
+  using ::Kokkos::UnorderedMap;
+
+  using ::Kokkos::UnorderedMapInsertOpTypes;
+  using ::Kokkos::UnorderedMapInsertResult;
+
+  using ::Kokkos::create_mirror;
+
+  using ::Kokkos::deep_copy;
+  }  // namespace Kokkos
+}
diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
index 12030ba11f1..73121c5d1ae 100644
--- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
+++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /// \file Kokkos_UnorderedMap.hpp
 /// \brief Declaration and definition of Kokkos::UnorderedMap.
@@ -27,11 +14,17 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_UNORDEREDMAP
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.bitset;
+import kokkos.functional;
+#else
 #include 
-#include 
-
 #include 
-
+#include 
+#endif
+#include 
 #include 
 #include 
 #include 
@@ -62,6 +55,39 @@
 
 namespace Kokkos {
 
+namespace Impl {
+
+template 
+auto allocate_without_initializing_if_possible(
+    const Impl::ViewCtorProp &alloc_prop, Args &&...args) {
+  using alloc_prop_t = std::remove_cvref_t;
+
+  // if incompatible we don't add the property
+  if constexpr (alloc_prop_t::sequential_host_init)
+    return ViewType(alloc_prop, std::forward(args)...);
+  // otherwise we add it if unset
+  else
+    return ViewType(
+        Impl::with_properties_if_unset(alloc_prop, WithoutInitializing),
+        std::forward(args)...);
+}
+
+template 
+auto allocate_with_sequential_host_init_if_possible(
+    const Impl::ViewCtorProp &alloc_prop, Args &&...args) {
+  // if incompatible we don't add the property
+  if constexpr (!SpaceAccessibility<
+                    typename ViewType::execution_space::memory_space,
+                    HostSpace>::accessible)
+    return ViewType(alloc_prop, std::forward(args)...);
+  // otherwise we add it if unset
+  else
+    return ViewType(
+        Impl::with_properties_if_unset(alloc_prop, SequentialHostInit),
+        std::forward(args)...);
+}
+}  // namespace Impl
+
 enum : unsigned { UnorderedMapInvalidIndex = ~0u };
 
 /// \brief First element of the return value of UnorderedMap::insert().
@@ -278,9 +304,14 @@ class UnorderedMap {
 
   using insert_result = UnorderedMapInsertResult;
 
-  using HostMirror =
+  using host_mirror_type =
       UnorderedMap;
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  using HostMirror KOKKOS_DEPRECATED_WITH_COMMENT(
+      "Use host_mirror_type instead.") = host_mirror_type;
+#endif
+
   using histogram_type = Impl::UnorderedMapHistogram;
   //@}
 
@@ -331,7 +362,11 @@ class UnorderedMap {
   UnorderedMap(const Impl::ViewCtorProp &arg_prop,
                size_type capacity_hint = 0, hasher_type hasher = hasher_type(),
                equal_to_type equal_to = equal_to_type())
-      : m_bounded_insert(true), m_hasher(hasher), m_equal_to(equal_to) {
+      : m_bounded_insert(true),
+        m_hasher(hasher),
+        m_equal_to(equal_to),
+        m_sequential_host_init(
+            std::remove_cvref_t::sequential_host_init) {
     if (!is_insertable_map) {
       Kokkos::Impl::throw_runtime_exception(
           "Cannot construct a non-insertable (i.e. const key_type) "
@@ -339,7 +374,7 @@ class UnorderedMap {
     }
 
     //! Ensure that allocation properties are consistent.
-    using alloc_prop_t = std::decay_t;
+    using alloc_prop_t = std::remove_cvref_t;
     static_assert(alloc_prop_t::initialize,
                   "Allocation property 'initialize' should be true.");
     static_assert(
@@ -350,8 +385,6 @@ class UnorderedMap {
     /// properties.
     const auto prop_copy =
         Impl::with_properties_if_unset(arg_prop, std::string("UnorderedMap"));
-    const auto prop_copy_noinit =
-        Impl::with_properties_if_unset(prop_copy, Kokkos::WithoutInitializing);
 
     //! Initialize member views.
     m_size = shared_size_t(Kokkos::view_alloc(
@@ -362,14 +395,16 @@ class UnorderedMap {
         bitset_type(Kokkos::Impl::append_to_label(prop_copy, " - bitset"),
                     calculate_capacity(capacity_hint));
 
-    m_hash_lists = size_type_view(
-        Kokkos::Impl::append_to_label(prop_copy_noinit, " - hash list"),
-        Impl::find_hash_size(capacity()));
+    m_hash_lists =
+        Impl::allocate_without_initializing_if_possible(
+            Kokkos::Impl::append_to_label(prop_copy, " - hash list"),
+            Impl::find_hash_size(capacity()));
 
-    m_next_index = size_type_view(
-        Kokkos::Impl::append_to_label(prop_copy_noinit, " - next index"),
-        capacity() + 1);  // +1 so that the *_at functions can always return a
-                          // valid reference
+    m_next_index =
+        Impl::allocate_without_initializing_if_possible(
+            Kokkos::Impl::append_to_label(prop_copy, " - next index"),
+            capacity() + 1);  // +1 so that the *_at functions can always return
+                              // a valid reference
 
     m_keys = key_type_view(Kokkos::Impl::append_to_label(prop_copy, " - keys"),
                            capacity());
@@ -446,7 +481,12 @@ class UnorderedMap {
     requested_capacity =
         (requested_capacity < curr_size) ? curr_size : requested_capacity;
 
-    insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
+    auto tmp =
+        m_sequential_host_init
+            ? Impl::allocate_with_sequential_host_init_if_possible<
+                  insertable_map_type>(view_alloc(), requested_capacity,
+                                       m_hasher, m_equal_to)
+            : insertable_map_type(requested_capacity, m_hasher, m_equal_to);
 
     if (curr_size) {
       tmp.m_bounded_insert = false;
@@ -807,7 +847,8 @@ class UnorderedMap {
         m_next_index(src.m_next_index),
         m_keys(src.m_keys),
         m_values(src.m_values),
-        m_scalars(src.m_scalars) {}
+        m_scalars(src.m_scalars),
+        m_sequential_host_init(src.m_sequential_host_init) {}
 
   template 
   std::enable_if_t<
@@ -815,16 +856,17 @@ class UnorderedMap {
                                   SValue>::value,
       declared_map_type &>
   operator=(UnorderedMap const &src) {
-    m_bounded_insert    = src.m_bounded_insert;
-    m_hasher            = src.m_hasher;
-    m_equal_to          = src.m_equal_to;
-    m_size              = src.m_size;
-    m_available_indexes = src.m_available_indexes;
-    m_hash_lists        = src.m_hash_lists;
-    m_next_index        = src.m_next_index;
-    m_keys              = src.m_keys;
-    m_values            = src.m_values;
-    m_scalars           = src.m_scalars;
+    m_bounded_insert       = src.m_bounded_insert;
+    m_hasher               = src.m_hasher;
+    m_equal_to             = src.m_equal_to;
+    m_size                 = src.m_size;
+    m_available_indexes    = src.m_available_indexes;
+    m_hash_lists           = src.m_hash_lists;
+    m_next_index           = src.m_next_index;
+    m_keys                 = src.m_keys;
+    m_values               = src.m_values;
+    m_scalars              = src.m_scalars;
+    m_sequential_host_init = src.m_sequential_host_init;
     return *this;
   }
 
@@ -850,12 +892,14 @@ class UnorderedMap {
       UnorderedMap const &src) {
     insertable_map_type tmp;
 
-    tmp.m_bounded_insert    = src.m_bounded_insert;
-    tmp.m_hasher            = src.m_hasher;
-    tmp.m_equal_to          = src.m_equal_to;
-    tmp.m_size()            = src.m_size();
-    tmp.m_available_indexes = bitset_type(src.capacity());
-    tmp.m_hash_lists        = size_type_view(
+    tmp.m_bounded_insert       = src.m_bounded_insert;
+    tmp.m_hasher               = src.m_hasher;
+    tmp.m_equal_to             = src.m_equal_to;
+    tmp.m_size()               = src.m_size();
+    tmp.m_sequential_host_init = src.m_sequential_host_init;
+    tmp.m_available_indexes    = bitset_type(src.capacity());
+
+    tmp.m_hash_lists = size_type_view(
         view_alloc(WithoutInitializing, "UnorderedMap hash list"),
         src.m_hash_lists.extent(0));
     tmp.m_next_index = size_type_view(
@@ -865,8 +909,13 @@ class UnorderedMap {
         key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"),
                       src.m_keys.extent(0));
     tmp.m_values =
-        value_type_view(view_alloc(WithoutInitializing, "UnorderedMap values"),
-                        src.m_values.extent(0));
+        src.m_sequential_host_init
+            ? Impl::allocate_with_sequential_host_init_if_possible<
+                  value_type_view>(view_alloc("UnorderedMap values"),
+                                   src.m_values.extent(0))
+            : Impl::allocate_without_initializing_if_possible(
+                  view_alloc("UnorderedMap values"), src.m_values.extent(0));
+
     tmp.m_scalars = scalars_view("UnorderedMap scalars");
 
     *this = tmp;
@@ -966,6 +1015,7 @@ class UnorderedMap {
   key_type_view m_keys;
   value_type_view m_values;
   scalars_view m_scalars;
+  bool m_sequential_host_init = false;
 
   template 
@@ -993,11 +1043,11 @@ inline void deep_copy(
 // Specialization of create_mirror() for an UnorderedMap object.
 template 
-typename UnorderedMap::HostMirror
+typename UnorderedMap::host_mirror_type
 create_mirror(
     const UnorderedMap &src) {
-  typename UnorderedMap::HostMirror
-      dst;
+  typename UnorderedMap::host_mirror_type dst;
   dst.allocate_view(src);
   return dst;
 }
diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap_Impl.cppm b/lib/kokkos/containers/src/Kokkos_UnorderedMap_Impl.cppm
new file mode 100644
index 00000000000..f365bd191a1
--- /dev/null
+++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap_Impl.cppm
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+module;
+
+#include 
+
+export module kokkos.unordered_map_impl;
+
+export {
+  namespace Kokkos::Impl {
+  using ::Kokkos::Impl::UnorderedMapPrint;
+  }  // namespace Kokkos::Impl
+}
diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp
index 32b1c36c9bc..63eca15c3db 100644
--- a/lib/kokkos/containers/src/Kokkos_Vector.hpp
+++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_VECTOR_HPP
 #define KOKKOS_VECTOR_HPP
diff --git a/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp
index bc0937fd978..dbc1dac6c16 100644
--- a/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp
+++ b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_BITSET_IMPL_HPP
 #define KOKKOS_BITSET_IMPL_HPP
diff --git a/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
index 76d57ab2aa3..d2e5987085a 100644
--- a/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
+++ b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
 #define KOKKOS_FUNCTIONAL_IMPL_HPP
diff --git a/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
index 468e617d506..557bea04f7f 100644
--- a/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
+++ b/lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
@@ -1,25 +1,17 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
 #define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 namespace Kokkos {
@@ -27,7 +19,7 @@ namespace Kokkos {
 template 
 inline typename StaticCrsGraph::HostMirror
+                               SizeType>::host_mirror_type
 create_mirror_view(const StaticCrsGraph& view,
                    std::enable_if_t
 inline typename StaticCrsGraph::HostMirror
+                               SizeType>::host_mirror_type
 create_mirror(const StaticCrsGraph& view) {
   // Force copy:
@@ -46,10 +38,10 @@ create_mirror(const StaticCrsGraph;
 
-  typename staticcrsgraph_type::HostMirror tmp;
-  typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map =
+  typename staticcrsgraph_type::host_mirror_type tmp;
+  typename staticcrsgraph_type::row_map_type::host_mirror_type tmp_row_map =
       create_mirror(view.row_map);
-  typename staticcrsgraph_type::row_block_type::HostMirror
+  typename staticcrsgraph_type::row_block_type::host_mirror_type
       tmp_row_block_offsets = create_mirror(view.row_block_offsets);
 
   // Allocation to match:
@@ -69,7 +61,7 @@ create_mirror(const StaticCrsGraph
 inline typename StaticCrsGraph::HostMirror
+                               SizeType>::host_mirror_type
 create_mirror_view(const StaticCrsGraph& view,
                    std::enable_if_t
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.unordered_map;
+#else
 #include 
+#endif
+#include 
 
 namespace Kokkos {
 namespace Impl {
diff --git a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
index a979ee40d8c..c5c5f1a0b48 100644
--- a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
+++ b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
@@ -1,23 +1,16 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
 #define KOKKOS_UNORDERED_MAP_IMPL_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.core_impl;
+#else
 #include 
+#endif
 #include 
 
 #include 
@@ -132,7 +125,7 @@ struct UnorderedMapHistogram {
   using size_type       = typename map_type::size_type;
 
   using histogram_view      = View;
-  using host_histogram_view = typename histogram_view::HostMirror;
+  using host_histogram_view = typename histogram_view::host_mirror_type;
 
   map_type m_map;
   histogram_view m_length;
diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt
index a2a699d7718..ad64811b34d 100644
--- a/lib/kokkos/containers/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt
@@ -21,6 +21,7 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL)
       DynViewAPI_rank12345
       DynViewAPI_rank67
       DynRankView_Ctors
+      DynRankView_LayoutMember
       DynRankView_TeamScratch
       DynRankView_ViewCustomization
       ErrorReporter
diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile
deleted file mode 100644
index 73da3c4e2fa..00000000000
--- a/lib/kokkos/containers/unit_tests/Makefile
+++ /dev/null
@@ -1,174 +0,0 @@
-KOKKOS_PATH = ../..
-
-GTEST_PATH = ../../TPL/gtest
-
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/openmp
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/hpx
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/hip
-vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/cuda
-vpath %.cpp ${CURDIR}
-default: build_all
-	echo "End Build"
-
-ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
-else
-  CXX = g++
-endif
-
-CXXFLAGS = -O3
-LINK ?= $(CXX)
-LDFLAGS ?=
-override LDFLAGS += -lpthread
-
-KOKKOS_USE_DEPRECATED_MAKEFILES=1
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files
-
-TEST_TARGETS =
-TARGETS =
-
-TESTS = Bitset DualView DynamicView DynViewAPI_generic DynViewAPI_rank12345 DynViewAPI_rank67 ErrorReporter OffsetView ScatterView UnorderedMap ViewCtorPropEmbeddedDim
-tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
-  tmp2 := $(foreach test, $(TESTS), \
-    $(if $(filter Test$(device)_$(test).cpp, $(shell ls Test$(device)_$(test).cpp 2>/dev/null)),,\
-      $(shell echo "$(H)include" > Test$(device)_$(test).cpp); \
-      $(shell echo "$(H)include" >> Test$(device)_$(test).cpp); \
-     )\
-  ) \
-)
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	OBJ_CUDA = UnitTestMain.o gtest-all.o
-	OBJ_CUDA += TestCuda_Bitset.o
-	OBJ_CUDA += TestCuda_DualView.o
-	OBJ_CUDA += TestCuda_DynamicView.o
-	OBJ_CUDA += TestCuda_DynViewAPI_generic.o
-	OBJ_CUDA += TestCuda_DynViewAPI_rank12345.o
-	OBJ_CUDA += TestCuda_DynViewAPI_rank67.o
-	OBJ_CUDA += TestCuda_ErrorReporter.o
-	OBJ_CUDA += TestCuda_OffsetView.o
-	OBJ_CUDA += TestCuda_ScatterView.o
-	OBJ_CUDA += TestCuda_UnorderedMap.o
-	OBJ_CUDA += TestCuda_ViewCtorPropEmbeddedDim.o
-	TARGETS += KokkosContainers_UnitTest_Cuda
-	TEST_TARGETS += test-cuda
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
-	OBJ_THREADS = UnitTestMain.o gtest-all.o
-	OBJ_THREADS += TestThreads_Bitset.o
-	OBJ_THREADS += TestThreads_DualView.o
-	OBJ_THREADS += TestThreads_DynamicView.o
-	OBJ_THREADS += TestThreads_DynViewAPI_generic.o
-	OBJ_THREADS += TestThreads_DynViewAPI_rank12345.o
-	OBJ_THREADS += TestThreads_DynViewAPI_rank67.o
-	OBJ_THREADS += TestThreads_ErrorReporter.o
-	OBJ_THREADS += TestThreads_OffsetView.o
-	OBJ_THREADS += TestThreads_ScatterView.o
-	OBJ_THREADS += TestThreads_UnorderedMap.o
-	OBJ_THREADS += TestThreads_ViewCtorPropEmbeddedDim.o
-	TARGETS += KokkosContainers_UnitTest_Threads
-	TEST_TARGETS += test-threads
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-	OBJ_OPENMP = UnitTestMain.o gtest-all.o
-	OBJ_OPENMP += TestOpenMP_Bitset.o
-	OBJ_OPENMP += TestOpenMP_DualView.o
-	OBJ_OPENMP += TestOpenMP_DynamicView.o
-	OBJ_OPENMP += TestOpenMP_DynViewAPI_generic.o
-	OBJ_OPENMP += TestOpenMP_DynViewAPI_rank12345.o
-	OBJ_OPENMP += TestOpenMP_DynViewAPI_rank67.o
-	OBJ_OPENMP += TestOpenMP_ErrorReporter.o
-	OBJ_OPENMP += TestOpenMP_OffsetView.o
-	OBJ_OPENMP += TestOpenMP_ScatterView.o
-	OBJ_OPENMP += TestOpenMP_UnorderedMap.o
-	OBJ_OPENMP += TestOpenMP_ViewCtorPropEmbeddedDim.o
-	TARGETS += KokkosContainers_UnitTest_OpenMP
-	TEST_TARGETS += test-openmp
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
-	OBJ_HPX = UnitTestMain.o gtest-all.o
-	OBJ_HPX += TestHPX_Bitset.o
-	OBJ_HPX += TestHPX_DualView.o
-	OBJ_HPX += TestHPX_DynamicView.o
-	OBJ_HPX += TestHPX_DynViewAPI_generic.o
-	OBJ_HPX += TestHPX_DynViewAPI_rank12345.o
-	OBJ_HPX += TestHPX_DynViewAPI_rank67.o
-	OBJ_HPX += TestHPX_ErrorReporter.o
-	OBJ_HPX += TestHPX_OffsetView.o
-	OBJ_HPX += TestHPX_ScatterView.o
-	OBJ_HPX += TestHPX_UnorderedMap.o
-	OBJ_HPX += TestHPX_ViewCtorPropEmbeddedDim.o
-	TARGETS += KokkosContainers_UnitTest_HPX
-	TEST_TARGETS += test-hpx
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-	OBJ_SERIAL = UnitTestMain.o gtest-all.o
-	OBJ_SERIAL += TestSerial_Bitset.o
-	OBJ_SERIAL += TestSerial_DualView.o
-	OBJ_SERIAL += TestSerial_DynamicView.o
-	OBJ_SERIAL += TestSerial_DynViewAPI_generic.o
-	OBJ_SERIAL += TestSerial_DynViewAPI_rank12345.o
-	OBJ_SERIAL += TestSerial_DynViewAPI_rank67.o
-	OBJ_SERIAL += TestSerial_ErrorReporter.o
-	OBJ_SERIAL += TestSerial_OffsetView.o
-	OBJ_SERIAL += TestSerial_ScatterView.o
-	OBJ_SERIAL += TestSerial_UnorderedMap.o
-	OBJ_SERIAL += TestSerial_ViewCtorPropEmbeddedDim.o
-	TARGETS += KokkosContainers_UnitTest_Serial
-	TEST_TARGETS += test-serial
-endif
-
-KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda
-
-KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads
-
-KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_OpenMP
-
-KokkosContainers_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_HPX
-
-KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Serial
-
-test-cuda: KokkosContainers_UnitTest_Cuda
-	./KokkosContainers_UnitTest_Cuda
-
-test-threads: KokkosContainers_UnitTest_Threads
-	./KokkosContainers_UnitTest_Threads
-
-test-openmp: KokkosContainers_UnitTest_OpenMP
-	./KokkosContainers_UnitTest_OpenMP
-
-test-hpx: KokkosContainers_UnitTest_HPX
-	./KokkosContainers_UnitTest_HPX
-
-test-serial: KokkosContainers_UnitTest_Serial
-	./KokkosContainers_UnitTest_Serial
-
-build_all: $(TARGETS)
-
-test: $(TEST_TARGETS)
-
-clean: kokkos-clean
-	rm -f *.o $(TARGETS) *.cpp
-
-# Compilation rules
-
-%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
-
-gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/containers/unit_tests/TestBitset.hpp b/lib/kokkos/containers/unit_tests/TestBitset.hpp
index 91dc1710e5f..fc0c1e90b47 100644
--- a/lib/kokkos/containers/unit_tests/TestBitset.hpp
+++ b/lib/kokkos/containers/unit_tests/TestBitset.hpp
@@ -1,26 +1,19 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_BITSET_HPP
 #define KOKKOS_TEST_BITSET_HPP
 
 #include 
 #include 
-#include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.bitset;
+import kokkos.core;
+#else
 #include 
+#include 
+#endif
 #include 
 
 #include <../../core/unit_test/tools/include/ToolTestingUtilities.hpp>
diff --git a/lib/kokkos/containers/unit_tests/TestCompileMain.cpp b/lib/kokkos/containers/unit_tests/TestCompileMain.cpp
index fe164b5ba81..ea9d8eb1805 100644
--- a/lib/kokkos/containers/unit_tests/TestCompileMain.cpp
+++ b/lib/kokkos/containers/unit_tests/TestCompileMain.cpp
@@ -1,17 +1,4 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 int main() {}
diff --git a/lib/kokkos/containers/unit_tests/TestCreateMirror.cpp b/lib/kokkos/containers/unit_tests/TestCreateMirror.cpp
index 8556f756b0b..e40fca4a50d 100644
--- a/lib/kokkos/containers/unit_tests/TestCreateMirror.cpp
+++ b/lib/kokkos/containers/unit_tests/TestCreateMirror.cpp
@@ -1,23 +1,20 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dynamic_view;
+import kokkos.dyn_rank_view;
+import kokkos.offset_view;
+#else
 #include 
 #include 
 #include 
 #include 
+#endif
+
+#include 
 
 template 
 void check_memory_space(TestView, MemorySpace) {
@@ -40,7 +37,7 @@ void test_create_mirror_properties(const View& view) {
   // clang-format off
   
   // create_mirror
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror(WithoutInitializing,                        view), host_mirror_test_space(view));
     check_memory_space(create_mirror(                                            view), host_mirror_test_space(view));
@@ -49,7 +46,7 @@ void test_create_mirror_properties(const View& view) {
   check_memory_space(create_mirror(                     DefaultExecutionSpace{}, view), DeviceMemorySpace{});
 
   // create_mirror_view
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror_view(WithoutInitializing,                        view), host_mirror_test_space(view));
     check_memory_space(create_mirror_view(                                            view), host_mirror_test_space(view));
@@ -58,7 +55,7 @@ void test_create_mirror_properties(const View& view) {
   check_memory_space(create_mirror_view(                     DefaultExecutionSpace{}, view), DeviceMemorySpace{});
 
   // create_mirror view_alloc
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror(view_alloc(WithoutInitializing),                    view), host_mirror_test_space(view));
     check_memory_space(create_mirror(view_alloc(),                                       view), host_mirror_test_space(view));
@@ -67,7 +64,7 @@ void test_create_mirror_properties(const View& view) {
   check_memory_space(create_mirror(view_alloc(                     DeviceMemorySpace{}), view), DeviceMemorySpace{});
 
   // create_mirror_view view_alloc
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror_view(view_alloc(WithoutInitializing),                    view), host_mirror_test_space(view));
     check_memory_space(create_mirror_view(view_alloc(),                                       view), host_mirror_test_space(view));
@@ -76,7 +73,7 @@ void test_create_mirror_properties(const View& view) {
   check_memory_space(create_mirror_view(view_alloc(                     DeviceMemorySpace{}), view), DeviceMemorySpace{});
 
   // create_mirror view_alloc + execution space
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror(view_alloc(DefaultHostExecutionSpace{}, WithoutInitializing),                      view), host_mirror_test_space(view));
     check_memory_space(create_mirror(view_alloc(DefaultHostExecutionSpace{}),                                           view), host_mirror_test_space(view));
@@ -85,7 +82,7 @@ void test_create_mirror_properties(const View& view) {
   check_memory_space(create_mirror(view_alloc(DefaultExecutionSpace{},                            DeviceMemorySpace{}), view), DeviceMemorySpace{});
 
   // create_mirror_view view_alloc + execution space
-  // FIXME DynamicView: HostMirror is the same type
+  // FIXME DynamicView: host_mirror_type is the same type
   if constexpr (!is_dynamic_view::value) {
     check_memory_space(create_mirror_view(view_alloc(DefaultHostExecutionSpace{}, WithoutInitializing),                      view), host_mirror_test_space(view));
     check_memory_space(create_mirror_view(view_alloc(DefaultHostExecutionSpace{}),                                           view), host_mirror_test_space(view));
diff --git a/lib/kokkos/containers/unit_tests/TestDualView.hpp b/lib/kokkos/containers/unit_tests/TestDualView.hpp
index c4fe9c4fa03..9046236dba2 100644
--- a/lib/kokkos/containers/unit_tests/TestDualView.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDualView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_DUALVIEW_HPP
 #define KOKKOS_TEST_DUALVIEW_HPP
@@ -21,8 +8,15 @@
 #include 
 #include 
 #include 
-#include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dual_view;
+#else
+#include 
 #include 
+#endif
+#include 
 
 namespace Test {
 
@@ -775,6 +769,24 @@ TEST(TEST_CATEGORY, dualview_default_constructed) {
   ASSERT_FALSE(dv.need_sync_device());
   dv.sync_device();
 }
+
+TEST(TEST_CATEGORY, dualview_resize_single_device) {
+  using dv_t = Kokkos::DualView;
+  dv_t dv("DV", 10);
+  bool is_same_device = std::is_same_v;
+
+  dv.resize(20);
+  ASSERT_EQ(!is_same_device, dv.need_sync_host());
+  ASSERT_FALSE(dv.need_sync_device());
+
+  dv.sync_host();
+  dv.modify_host();
+  dv.resize(30);
+  ASSERT_FALSE(dv.need_sync_host());
+  ASSERT_EQ(!is_same_device, dv.need_sync_device());
+}
+
 }  // anonymous namespace
 }  // namespace Test
 
diff --git a/lib/kokkos/containers/unit_tests/TestDualViewParameterPack.cpp b/lib/kokkos/containers/unit_tests/TestDualViewParameterPack.cpp
index 861eba2b3e4..e25ca9f079b 100644
--- a/lib/kokkos/containers/unit_tests/TestDualViewParameterPack.cpp
+++ b/lib/kokkos/containers/unit_tests/TestDualViewParameterPack.cpp
@@ -1,21 +1,16 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dual_view;
+#else
 #include 
 #include 
+#endif
+
+#include 
 
 namespace {
 
diff --git a/lib/kokkos/containers/unit_tests/TestDynRankViewTypedefs.cpp b/lib/kokkos/containers/unit_tests/TestDynRankViewTypedefs.cpp
index 46bf9c7ce26..68326391b76 100644
--- a/lib/kokkos/containers/unit_tests/TestDynRankViewTypedefs.cpp
+++ b/lib/kokkos/containers/unit_tests/TestDynRankViewTypedefs.cpp
@@ -1,21 +1,19 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dyn_rank_view;
+#else
 #include 
 #include 
+#endif
+
+#include 
+
+#include 
+#include 
 
 namespace {
 
@@ -57,15 +55,19 @@ constexpr bool test_view_typedefs_impl() {
   static_assert(std::is_same_v::const_data_type>);
   static_assert(std::is_same_v::non_const_data_type>);
   
+  #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
   // FIXME: these should be deprecated and for proper testing (I.e. where this is different from data_type)
   // we would need ensemble types which use the hidden View dimension facility of View (i.e. which make
   // "specialize" not void)
+  KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
   static_assert(std::is_same_v);
   static_assert(std::is_same_v::const_data_type>);
   static_assert(std::is_same_v::non_const_data_type>);
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+  KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
+  #endif
+  #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
   static_assert(std::is_same_v);
-#endif
+  #endif
 
   // FIXME: value_type definition conflicts with mdspan value_type
   static_assert(std::is_same_v);
@@ -84,19 +86,27 @@ constexpr bool test_view_typedefs_impl() {
   static_assert(std::is_same_v);
   static_assert(std::is_same_v);
   static_assert(std::is_same_v);
- 
+
   // FIXME: should be deprecated in favor of reference
   static_assert(std::is_same_v);
   // FIXME: should be deprecated in favor of data_handle_type
   static_assert(std::is_same_v);
- 
+
   // =========================================
   // in Legacy View: some helper View variants
   // =========================================
 
   // FIXME: in contrast to View, hooks_policy is not propagated
   static_assert(std::is_same_v);
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_5
+KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
   static_assert(std::is_same_v>);
+KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
+#endif
+  static_assert(std::is_same_v>);
@@ -108,7 +118,7 @@ constexpr bool test_view_typedefs_impl() {
                                Kokkos::DynRankView>);
-  static_assert(std::is_same_v>);
@@ -173,6 +183,14 @@ constexpr bool test_view_typedefs_impl() {
   // FIXME: should come from accessor_type
   static_assert(std::is_same_v);
   static_assert(std::is_same_v);
+
+  #ifndef KOKKOS_ENABLE_IMPL_VIEW_LEGACY
+  using base_view_type = typename ViewType::view_type;
+  static_assert(
+      std::is_same_v);
+  static_assert(
+      std::is_same_v);
+  #endif
   return true;
 }
 
diff --git a/lib/kokkos/containers/unit_tests/TestDynRankView_Ctors.hpp b/lib/kokkos/containers/unit_tests/TestDynRankView_Ctors.hpp
index 47436bb6a90..445557e88dd 100644
--- a/lib/kokkos/containers/unit_tests/TestDynRankView_Ctors.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynRankView_Ctors.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
diff --git a/lib/kokkos/containers/unit_tests/TestDynRankView_LayoutMember.hpp b/lib/kokkos/containers/unit_tests/TestDynRankView_LayoutMember.hpp
new file mode 100644
index 00000000000..86caca28c2d
--- /dev/null
+++ b/lib/kokkos/containers/unit_tests/TestDynRankView_LayoutMember.hpp
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include 
+
+#include 
+
+namespace {
+
+template 
+void test_dyn_rank_view_layout_member() {
+  bool is_ll = std::is_same_v;
+  {
+    Kokkos::DynRankView a(
+        Kokkos::View("A", 11, 7, 5));
+    auto l = a.layout();
+    ASSERT_EQ(l.dimension[0], 11lu);
+    ASSERT_EQ(l.dimension[1], 7lu);
+    ASSERT_EQ(l.dimension[2], 5lu);
+    ASSERT_TRUE(
+        (l.stride == is_ll ? 11lu : 5lu || l.stride == KOKKOS_INVALID_INDEX));
+  }
+  {
+    Kokkos::DynRankView a(Kokkos::View("A", 7, 5));
+    auto l = a.layout();
+    ASSERT_EQ(l.dimension[0], 7lu);
+    ASSERT_EQ(l.dimension[1], 5lu);
+    ASSERT_TRUE(
+        (l.stride == is_ll ? 7lu : 5lu || l.stride == KOKKOS_INVALID_INDEX));
+  }
+}
+
+}  // namespace
+
+TEST(TEST_CATEGORY, dyn_rank_view_layout_member) {
+  test_dyn_rank_view_layout_member();
+  test_dyn_rank_view_layout_member();
+}
diff --git a/lib/kokkos/containers/unit_tests/TestDynRankView_TeamScratch.hpp b/lib/kokkos/containers/unit_tests/TestDynRankView_TeamScratch.hpp
index d03bf0960c1..96eb83af90f 100644
--- a/lib/kokkos/containers/unit_tests/TestDynRankView_TeamScratch.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynRankView_TeamScratch.hpp
@@ -1,22 +1,16 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dyn_rank_view;
+#else
+#include 
 #include 
+#endif
 
 namespace {
 
diff --git a/lib/kokkos/containers/unit_tests/TestDynRankView_ViewCustomization.hpp b/lib/kokkos/containers/unit_tests/TestDynRankView_ViewCustomization.hpp
index 0b8c6a341dc..7bb538b4498 100644
--- a/lib/kokkos/containers/unit_tests/TestDynRankView_ViewCustomization.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynRankView_ViewCustomization.hpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 // Duplicate from
@@ -55,13 +47,14 @@ struct TestAccessorStrided {
   KOKKOS_DEFAULTED_FUNCTION
   constexpr TestAccessorStrided() = default;
 
-  template ,
                                  Kokkos::default_accessor>,
                              int> = 0>
   KOKKOS_FUNCTION constexpr TestAccessorStrided(
-      const TestAccessorStrided& other) noexcept
+      const TestAccessorStrided&
+          other) noexcept
       : size(other.size), stride(other.stride) {}
 
   KOKKOS_FUNCTION
@@ -203,4 +196,75 @@ TEST(TEST_CATEGORY, view_customization_extra_int_arg) {
         sizeof(double);
     ASSERT_EQ(shmem, expected_shmem_size);
   }
+  // Rank 7
+  {
+    view_t a("A", 2, 3, 2, 7, 2, 11, 2, 5);
+    ASSERT_EQ(a.rank(), 7lu);
+    ASSERT_EQ(a.extent(0), 2lu);
+    ASSERT_EQ(a.extent(1), 3lu);
+    ASSERT_EQ(a.extent(2), 2lu);
+    ASSERT_EQ(a.extent(3), 7lu);
+    ASSERT_EQ(a.extent(4), 2lu);
+    ASSERT_EQ(a.extent(5), 11lu);
+    ASSERT_EQ(a.extent(2), 2lu);
+    ASSERT_EQ(a.accessor().size, 5lu);
+    ASSERT_EQ(a.accessor().stride, size_t(16 * 3 * 7 * 11));
+    view_t b(a.data(), 2, 3, 2, 7, 2, 11, 2, 5);
+    ASSERT_EQ(b.rank(), 7lu);
+    ASSERT_EQ(b.extent(0), 2lu);
+    ASSERT_EQ(b.extent(1), 3lu);
+    ASSERT_EQ(b.extent(2), 2lu);
+    ASSERT_EQ(b.extent(3), 7lu);
+    ASSERT_EQ(b.extent(4), 2lu);
+    ASSERT_EQ(b.extent(5), 11lu);
+    ASSERT_EQ(b.extent(6), 2lu);
+    ASSERT_EQ(b.accessor().size, 5lu);
+    ASSERT_EQ(b.accessor().stride, size_t(16 * 3 * 7 * 11));
+    size_t shmem = view_t::shmem_size(2, 3, 2, 7, 2, 11, 2, 5);
+    size_t expected_shmem_size =
+        2lu * 3lu * 2lu * 7lu * 2lu * 11lu * 2lu * 5lu * sizeof(double) +
+        sizeof(double);
+    ASSERT_EQ(shmem, expected_shmem_size);
+  }
+  // With accessor arg and no label
+  {
+    // This should not interpret the last argument (11) as an accessor arg since
+    // we are providing AccessorArg_t explicitly
+    // Note that with and without
+    // labels are two separate cases
+    view_t a(Kokkos::view_alloc(Kokkos::Impl::AccessorArg_t{5ul}), 3, 7, 11);
+    ASSERT_EQ(a.rank(), 3lu);
+    ASSERT_EQ(a.extent(0), 3lu);
+    ASSERT_EQ(a.extent(1), 7lu);
+    ASSERT_EQ(a.extent(2), 11lu);
+    ASSERT_EQ(a.accessor().size, 5lu);
+    ASSERT_EQ(a.accessor().stride, size_t(3 * 7 * 11));
+  }
+  // With accessor arg and label
+  {
+    // This should not interpret the last argument (11) as an accessor arg since
+    // we are providing AccessorArg_t explicitly
+    // Note that with and without
+    // labels are two separate cases
+    view_t a(Kokkos::view_alloc("A", Kokkos::Impl::AccessorArg_t{5ul}), 3, 7,
+             11);
+    ASSERT_EQ(a.rank(), 3lu);
+    ASSERT_EQ(a.extent(0), 3lu);
+    ASSERT_EQ(a.extent(1), 7lu);
+    ASSERT_EQ(a.extent(2), 11lu);
+    ASSERT_EQ(a.accessor().size, 5lu);
+    ASSERT_EQ(a.accessor().stride, size_t(3 * 7 * 11));
+  }
+  // Create mirror
+  {
+    view_t a("A", 3, 7, 11, 5);
+
+    auto b = Kokkos::create_mirror(a);
+    ASSERT_EQ(b.rank(), 3lu);
+    ASSERT_EQ(b.extent(0), 3lu);
+    ASSERT_EQ(b.extent(1), 7lu);
+    ASSERT_EQ(b.extent(2), 11lu);
+    ASSERT_EQ(b.accessor().size, 5lu);
+    ASSERT_EQ(b.accessor().stride, size_t(3 * 7 * 11));
+  }
 }
diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
index b26b0b2ff29..7adbd021ac4 100644
--- a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp
@@ -1,25 +1,19 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dyn_rank_view;
+import kokkos.dyn_rank_view_impl;
+#else
 #include 
+#include 
+#endif
 #include 
 #include 
-#include 
 
 /*--------------------------------------------------------------------------*/
 
@@ -731,7 +725,7 @@ class TestDynViewAPI {
 
   static void run_test_mirror() {
     using view_type   = Kokkos::DynRankView;
-    using mirror_type = typename view_type::HostMirror;
+    using mirror_type = typename view_type::host_mirror_type;
     view_type a("a");
     mirror_type am = Kokkos::create_mirror_view(a);
     mirror_type ax = Kokkos::create_mirror(a);
@@ -1117,8 +1111,9 @@ class TestDynViewAPI {
   }
 
   static void run_test_scalar() {
-    using hView0 = typename dView0::HostMirror;  // HostMirror of DynRankView is
-                                                 // a DynRankView
+    using hView0 =
+        typename dView0::host_mirror_type;  // host_mirror_type of DynRankView
+                                            // is a DynRankView
 
     dView0 dx, dy;
     hView0 hx, hy;
@@ -1216,7 +1211,7 @@ class TestDynViewAPI {
     // usual "(void)" marker to avoid compiler warnings for unused
     // variables.
 
-    using hView0 = typename dView0::HostMirror;
+    using hView0 = typename dView0::host_mirror_type;
 
     {
       hView0 thing;
diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI_generic.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI_generic.hpp
index d4747538d9b..b71fb2d0ede 100644
--- a/lib/kokkos/containers/unit_tests/TestDynViewAPI_generic.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI_generic.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 namespace Test {
diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank12345.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank12345.hpp
index 9cefad190a5..a32a4758c93 100644
--- a/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank12345.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank12345.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank67.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank67.hpp
index 703811e0f4b..71a920bd78c 100644
--- a/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank67.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI_rank67.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 namespace Test {
diff --git a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp
index 803650ffc6a..a78ab39ea27 100644
--- a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp
+++ b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_DYNAMICVIEW_HPP
 #define KOKKOS_TEST_DYNAMICVIEW_HPP
@@ -21,9 +8,14 @@
 #include 
 #include 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dynamic_view;
+#else
 #include 
-
 #include 
+#endif
 #include 
 
 namespace Test {
@@ -220,7 +212,8 @@ struct TestDynamicView {
     //   Case 4:
     {
       using device_view_type = Kokkos::View;
-      using host_view_type = typename Kokkos::View::HostMirror;
+      using host_view_type =
+          typename Kokkos::View::host_mirror_type;
 
       view_type device_dynamic_view("on-device DynamicView", 1024,
                                     arg_total_size);
diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
index 4ebab889c78..4ce301e6986 100644
--- a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
+++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
@@ -1,26 +1,19 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
 #define KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
 
 #include 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.error_reporter;
+#else
 #include 
 #include 
+#endif
 
 namespace Test {
 
@@ -55,6 +48,7 @@ struct ErrorReporterDriverBase {
   using report_type = ThreeValReport;
   using error_reporter_type =
       Kokkos::Experimental::ErrorReporter;
+
   error_reporter_type m_errorReporter;
 
   ErrorReporterDriverBase(int reporter_capacity, int /*test_size*/)
@@ -66,8 +60,18 @@ struct ErrorReporterDriverBase {
 
   void check_expectations(int reporter_capacity, int test_size) {
     using namespace std;
-    int num_reported = m_errorReporter.getNumReports();
-    int num_attempts = m_errorReporter.getNumReportAttempts();
+    int num_reported = m_errorReporter.num_reports();
+    int num_attempts = m_errorReporter.num_report_attempts();
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
+    KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
+#endif
+    EXPECT_EQ(num_reported, m_errorReporter.getNumReports());
+    EXPECT_EQ(num_attempts, m_errorReporter.getNumReportAttempts());
+#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
+    KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
+#endif
+#endif
 
     int expected_num_reports = min(reporter_capacity, test_size / 2);
     EXPECT_EQ(expected_num_reports, num_reported);
@@ -82,23 +86,32 @@ struct ErrorReporterDriverBase {
 template 
 void TestErrorReporter() {
   using tester_type = ErrorReporterDriverType;
+
   std::vector reporters;
   std::vector reports;
 
   tester_type test1(100, 10);
-  test1.m_errorReporter.getReports(reporters, reports);
+
+  std::tie(reporters, reports) = test1.m_errorReporter.get_reports();
   checkReportersAndReportsAgree(reporters, reports);
 
   tester_type test2(10, 100);
+  auto [reporters2, reports2] = test2.m_errorReporter.get_reports();
+  checkReportersAndReportsAgree(reporters2, reports2);
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
+#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
+  KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
+#endif
   test2.m_errorReporter.getReports(reporters, reports);
   checkReportersAndReportsAgree(reporters, reports);
 
-  typename Kokkos::View<
-      int *, typename ErrorReporterDriverType::execution_space>::HostMirror
-      view_reporters;
+  typename Kokkos::View::
+      host_mirror_type view_reporters;
   typename Kokkos::View::
-      HostMirror view_reports;
+      host_mirror_type view_reports;
   test2.m_errorReporter.getReports(view_reporters, view_reports);
 
   int num_reports = view_reporters.extent(0);
@@ -112,6 +125,10 @@ void TestErrorReporter() {
     reports.push_back(view_reports(i));
   }
   checkReportersAndReportsAgree(reporters, reports);
+#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
+  KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
+#endif
+#endif
 }
 
 template 
@@ -122,12 +139,23 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase {
 
   ErrorReporterDriver(int reporter_capacity, int test_size)
       : driver_base(reporter_capacity, test_size) {
+    EXPECT_EQ(driver_base::m_errorReporter.capacity(), reporter_capacity);
+    EXPECT_EQ(driver_base::m_errorReporter.num_reports(), 0);
+    EXPECT_EQ(driver_base::m_errorReporter.num_report_attempts(), 0);
+
     execute(reporter_capacity, test_size);
 
     // Test that clear() and resize() work across memory spaces.
     if (reporter_capacity < test_size) {
       driver_base::m_errorReporter.clear();
+      EXPECT_EQ(driver_base::m_errorReporter.capacity(), reporter_capacity);
+      EXPECT_EQ(driver_base::m_errorReporter.num_reports(), 0);
+      EXPECT_EQ(driver_base::m_errorReporter.num_report_attempts(), 0);
+
       driver_base::m_errorReporter.resize(test_size);
+      EXPECT_EQ(driver_base::m_errorReporter.capacity(), test_size);
+      EXPECT_EQ(driver_base::m_errorReporter.num_reports(), 0);
+      EXPECT_EQ(driver_base::m_errorReporter.num_report_attempts(), 0);
       execute(test_size, test_size);
     }
   }
@@ -213,5 +241,33 @@ TEST(TEST_CATEGORY, ErrorReporter) {
   TestErrorReporter>();
 }
 
+TEST(TEST_CATEGORY, ErrorReporter_label_ctor) {
+  Kokkos::Experimental::ErrorReporter logger("Reporter",
+                                                                  10);
+}
+
+void ErrorReporter_test_resize() {
+  Kokkos::Experimental::ErrorReporter logger("Reporter",
+                                                                  10);
+
+  // produce more errors when we can store
+  Kokkos::parallel_for(
+      "TestErrorReporter_resize", Kokkos::RangePolicy(0, 20),
+      KOKKOS_LAMBDA(int i) { logger.add_report(i, 0); });
+
+  ASSERT_EQ(logger.num_reports(), 10);
+  ASSERT_EQ(logger.num_report_attempts(), 20);
+
+  logger.resize(15);
+  ASSERT_EQ(logger.num_reports(), 10);
+  ASSERT_EQ(logger.num_report_attempts(), 10);
+
+  logger.resize(5);
+  ASSERT_EQ(logger.num_reports(), 5);
+  ASSERT_EQ(logger.num_report_attempts(), 10);
+}
+
+TEST(TEST_CATEGORY, ErrorReporter_resize) { ErrorReporter_test_resize(); }
+
 }  // namespace Test
 #endif  // #ifndef KOKKOS_TEST_ERROR_REPORTING_HPP
diff --git a/lib/kokkos/containers/unit_tests/TestIsViewTrait.cpp b/lib/kokkos/containers/unit_tests/TestIsViewTrait.cpp
index 994ba7ca0cb..17b90ea6ab7 100644
--- a/lib/kokkos/containers/unit_tests/TestIsViewTrait.cpp
+++ b/lib/kokkos/containers/unit_tests/TestIsViewTrait.cpp
@@ -1,25 +1,22 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dual_view;
+import kokkos.dyn_rank_view;
+import kokkos.dynamic_view;
+import kokkos.offset_view;
+import kokkos.scatter_view;
+#else
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#endif
 
 namespace {
 
diff --git a/lib/kokkos/containers/unit_tests/TestOffsetView.hpp b/lib/kokkos/containers/unit_tests/TestOffsetView.hpp
index 4bd98117b1a..9fe74ba33b5 100644
--- a/lib/kokkos/containers/unit_tests/TestOffsetView.hpp
+++ b/lib/kokkos/containers/unit_tests/TestOffsetView.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*
  * FIXME the OffsetView class is really not very well tested.
@@ -24,11 +11,15 @@
 #include 
 #include 
 #include 
-#include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.offset_view;
+#else
+#include 
 #include 
-
-using std::cout;
-using std::endl;
+#endif
+#include 
 
 namespace Test {
 
@@ -100,7 +91,7 @@ void test_offsetview_construction() {
   }
   {  // test deep copy of scalar const value into mirro
     const int constVal = 6;
-    typename offset_view_type::HostMirror hostOffsetView =
+    typename offset_view_type::host_mirror_type hostOffsetView =
         Kokkos::create_mirror_view(ov);
 
     Kokkos::deep_copy(hostOffsetView, constVal);
@@ -131,7 +122,7 @@ void test_offsetview_construction() {
       KOKKOS_LAMBDA(const int i, const int j) { ov(i, j) = constValue; });
 
   // test offsetview to offsetviewmirror deep copy
-  typename offset_view_type::HostMirror hostOffsetView =
+  typename offset_view_type::host_mirror_type hostOffsetView =
       Kokkos::create_mirror_view(ov);
 
   Kokkos::deep_copy(hostOffsetView, ov);
@@ -468,7 +459,7 @@ void test_offsetview_subview() {
     Kokkos::Experimental::OffsetView sliceMe("offsetToSlice",
                                                               {-10, 20});
     {
-      auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, 0);
       ASSERT_EQ(offsetSubview.rank(), 0u) << "subview of offset is broken.";
     }
   }
@@ -476,14 +467,12 @@ void test_offsetview_subview() {
     Kokkos::Experimental::OffsetView sliceMe(
         "offsetToSlice", {-10, 20}, {-20, 30});
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), -2);
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(), -2);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
 
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
   }
@@ -495,24 +484,24 @@ void test_offsetview_subview() {
 
     // slice 1
     {
-      auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),
-                                                         Kokkos::ALL(), 0);
+      auto offsetSubview =
+          Kokkos::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0);
       ASSERT_EQ(offsetSubview.rank(), 2u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),
-                                                         0, Kokkos::ALL());
+      auto offsetSubview =
+          Kokkos::subview(sliceMe, Kokkos::ALL(), 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 2u) << "subview of offset is broken.";
     }
 
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, 0, Kokkos::ALL(), Kokkos::ALL());
+      auto offsetSubview =
+          Kokkos::subview(sliceMe, 0, Kokkos::ALL(), Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 2u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, 0, Kokkos::ALL(), Kokkos::make_pair(-30, -21));
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, Kokkos::ALL(),
+                                           Kokkos::make_pair(-30, -21));
       ASSERT_EQ(offsetSubview.rank(), 2u) << "subview of offset is broken.";
 
       ASSERT_EQ(offsetSubview.begin(0), -20);
@@ -549,19 +538,16 @@ void test_offsetview_subview() {
 
     // slice 2
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(), 0, 0);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
 
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, Kokkos::ALL(), 0);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
   }
@@ -573,69 +559,65 @@ void test_offsetview_subview() {
 
     // slice 1
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(),
+                                           Kokkos::ALL(), Kokkos::ALL(), 0);
       ASSERT_EQ(offsetSubview.rank(), 3u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(),
+                                           Kokkos::ALL(), 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 3u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, Kokkos::ALL(), 0, Kokkos::ALL(), Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(), 0,
+                                           Kokkos::ALL(), Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 3u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview = Kokkos::Experimental::subview(
-          sliceMe, 0, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, Kokkos::ALL(),
+                                           Kokkos::ALL(), Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 3u) << "subview of offset is broken.";
     }
 
     // slice 2
-    auto offsetSubview2a = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),
-                                                         Kokkos::ALL(), 0, 0);
+    auto offsetSubview2a =
+        Kokkos::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, 0);
     ASSERT_EQ(offsetSubview2a.rank(), 2u) << "subview of offset is broken.";
     {
-      auto offsetSubview2b = Kokkos::Experimental::subview(
-          sliceMe, Kokkos::ALL(), 0, Kokkos::ALL(), 0);
+      auto offsetSubview2b =
+          Kokkos::subview(sliceMe, Kokkos::ALL(), 0, Kokkos::ALL(), 0);
       ASSERT_EQ(offsetSubview2b.rank(), 2u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview2b = Kokkos::Experimental::subview(
-          sliceMe, Kokkos::ALL(), 0, 0, Kokkos::ALL());
+      auto offsetSubview2b =
+          Kokkos::subview(sliceMe, Kokkos::ALL(), 0, 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview2b.rank(), 2u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview2b = Kokkos::Experimental::subview(
-          sliceMe, 0, Kokkos::ALL(), 0, Kokkos::ALL());
+      auto offsetSubview2b =
+          Kokkos::subview(sliceMe, 0, Kokkos::ALL(), 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview2b.rank(), 2u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview2b = Kokkos::Experimental::subview(
-          sliceMe, 0, 0, Kokkos::ALL(), Kokkos::ALL());
+      auto offsetSubview2b =
+          Kokkos::subview(sliceMe, 0, 0, Kokkos::ALL(), Kokkos::ALL());
       ASSERT_EQ(offsetSubview2b.rank(), 2u) << "subview of offset is broken.";
     }
     // slice 3
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0, 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, Kokkos::ALL(), 0, 0, 0);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0, 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, Kokkos::ALL(), 0, 0);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL(), 0);
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, 0, Kokkos::ALL(), 0);
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
     {
-      auto offsetSubview =
-          Kokkos::Experimental::subview(sliceMe, 0, 0, 0, Kokkos::ALL());
+      auto offsetSubview = Kokkos::subview(sliceMe, 0, 0, 0, Kokkos::ALL());
       ASSERT_EQ(offsetSubview.rank(), 1u) << "subview of offset is broken.";
     }
   }
diff --git a/lib/kokkos/containers/unit_tests/TestScatterView.hpp b/lib/kokkos/containers/unit_tests/TestScatterView.hpp
index 791271bfcac..d0fa56d4422 100644
--- a/lib/kokkos/containers/unit_tests/TestScatterView.hpp
+++ b/lib/kokkos/containers/unit_tests/TestScatterView.hpp
@@ -1,23 +1,18 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
 #define KOKKOS_TEST_SCATTER_VIEW_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.scatter_view;
+import kokkos.scatter_view_impl;
+#else
+#include 
 #include 
+#endif
 #include 
 
 namespace Test {
diff --git a/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
index 6e595d58c47..bc45aa881e9 100644
--- a/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
+++ b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
@@ -21,7 +8,12 @@
 #define KOKKOS_IMPL_DO_NOT_WARN_INCLUDE_STATIC_CRS_GRAPH
 #include 
 #undef KOKKOS_IMPL_DO_NOT_WARN_INCLUDE_STATIC_CRS_GRAPH
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 /*--------------------------------------------------------------------------*/
 namespace Test {
@@ -30,7 +22,7 @@ namespace TestStaticCrsGraph {
 template 
 void run_test_graph() {
   using dView = Kokkos::StaticCrsGraph;
-  using hView = typename dView::HostMirror;
+  using hView = typename dView::host_mirror_type;
 
   const unsigned LENGTH = 1000;
 
@@ -88,7 +80,7 @@ void run_test_graph() {
 template 
 void run_test_graph2() {
   using dView = Kokkos::StaticCrsGraph;
-  using hView = typename dView::HostMirror;
+  using hView = typename dView::host_mirror_type;
 
   const unsigned LENGTH = 10;
 
@@ -148,7 +140,7 @@ void run_test_graph3(size_t B, size_t N) {
   srand(10310);
 
   using dView = Kokkos::StaticCrsGraph;
-  using hView = typename dView::HostMirror;
+  using hView = typename dView::host_mirror_type;
 
   const unsigned LENGTH = 2000;
 
@@ -185,7 +177,7 @@ void run_test_graph4() {
   using memory_traits_type = Kokkos::MemoryUnmanaged;
   using dView = Kokkos::StaticCrsGraph;
-  using hView = typename dView::HostMirror;
+  using hView = typename dView::host_mirror_type;
 
   dView dx;
 
diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp
index fc7435a75e5..0408b35fb27 100644
--- a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp
+++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp
@@ -1,25 +1,22 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_UNORDERED_MAP_HPP
 #define KOKKOS_TEST_UNORDERED_MAP_HPP
 
 #include 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.functional;
+import kokkos.unordered_map;
+import kokkos.unordered_map_impl;
+#else
+#include 
+#include 
 #include 
+#endif
 
 namespace Test {
 
@@ -75,7 +72,7 @@ struct TestInsert {
     ASSERT_EQ(map_h.size(), map.size());
 
     if (!rehash_on_fail && CheckValues) {
-      typename expected_values_type::HostMirror expected_values_h =
+      typename expected_values_type::host_mirror_type expected_values_h =
           create_mirror_view(expected_values);
       Kokkos::deep_copy(expected_values_h, expected_values);
       for (unsigned i = 0; i < map_h.size(); i++) {
@@ -349,7 +346,7 @@ void test_deep_copy(uint32_t num_nodes) {
   using const_map_type =
       Kokkos::UnorderedMap;
 
-  using host_map_type = typename map_type::HostMirror;
+  using host_map_type = typename map_type::host_mirror_type;
 
   map_type map;
   map.rehash(num_nodes, false);
@@ -560,6 +557,48 @@ TEST(TEST_CATEGORY, UnorderedMap_constructor_view_alloc) {
   ASSERT_TRUE(map.is_allocated());
 }
 
+//////////////////////////Tests for UnorderedMap with View as value_type
+
+/**
+ * @test This test ensures that an @ref UnorderedMap with View as value_type can
+ * be built with SequentialHostInit instance (using @ref view_alloc).
+ */
+TEST(TEST_CATEGORY, UnorderedMap_View_as_value) {
+  using value_type = Kokkos::View;
+  using map_type   = Kokkos::UnorderedMap;
+  map_type map(Kokkos::view_alloc(Kokkos::SequentialHostInit, "test view umap"),
+               150);
+  // creation
+  ASSERT_EQ(map.size(), 0u);
+  ASSERT_GE(map.capacity(), 150u);
+  ASSERT_TRUE(map.is_allocated());
+
+  // insert
+  ASSERT_TRUE(map.insert(1, Kokkos::View(
+                                "UnorderedMap inserted View one", 10))
+                  .success());
+  ASSERT_TRUE(map.insert(2, Kokkos::View(
+                                "UnorderedMap inserted View two", 20))
+                  .success());
+  ASSERT_EQ(map.size(), 2u);
+
+  // copy
+  map_type map_copy(map);
+  ASSERT_EQ(map_copy.size(), 2u);
+  ASSERT_GE(map_copy.capacity(), 150u);
+  ASSERT_TRUE(map_copy.is_allocated());
+
+  // rehash
+  ASSERT_TRUE(map.rehash(200u));
+  ASSERT_GE(map.capacity(), 200u);
+  ASSERT_TRUE(map.is_allocated());
+
+  // assign
+  map_copy = map;
+  ASSERT_EQ(map_copy.size(), 2u);
+  ASSERT_GE(map_copy.capacity(), 200u);
+  ASSERT_TRUE(map_copy.is_allocated());
+}
 }  // namespace Test
 
 #endif  // KOKKOS_TEST_UNORDERED_MAP_HPP
diff --git a/lib/kokkos/containers/unit_tests/TestVector.hpp b/lib/kokkos/containers/unit_tests/TestVector.hpp
index abed2676d76..d5a420e99a1 100644
--- a/lib/kokkos/containers/unit_tests/TestVector.hpp
+++ b/lib/kokkos/containers/unit_tests/TestVector.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_VECTOR_HPP
 #define KOKKOS_TEST_VECTOR_HPP
diff --git a/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp b/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp
index 2edddcce34f..0482a19e789 100644
--- a/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp
+++ b/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp
@@ -1,25 +1,18 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dyn_rank_view;
+#else
 #include 
 #include 
+#endif
 
 #include 
 #include 
@@ -66,7 +59,7 @@ struct TestViewCtorProp_EmbeddedDim {
         using CommonViewValueType =
             typename decltype(view_alloc_arg)::value_type;
         using CVT     = typename Kokkos::View;
-        using HostCVT = typename CVT::HostMirror;
+        using HostCVT = typename CVT::host_mirror_type;
 
         // Construct View using the common type; for case of specialization, an
         // 'embedded_dim' would be stored by view_alloc_arg
@@ -103,7 +96,7 @@ struct TestViewCtorProp_EmbeddedDim {
         using CommonViewValueType =
             typename decltype(view_alloc_arg)::value_type;
         using CVT     = typename Kokkos::View;
-        using HostCVT = typename CVT::HostMirror;
+        using HostCVT = typename CVT::host_mirror_type;
 
         // Construct View using the common type; for case of specialization, an
         // 'embedded_dim' would be stored by view_alloc_arg
@@ -136,7 +129,7 @@ struct TestViewCtorProp_EmbeddedDim {
         using CommonViewValueType =
             typename decltype(view_alloc_arg)::value_type;
         using CVT     = typename Kokkos::View;
-        using HostCVT = typename CVT::HostMirror;
+        using HostCVT = typename CVT::host_mirror_type;
 
         // Construct View using the common type; for case of specialization, an
         // 'embedded_dim' would be stored by view_alloc_arg
@@ -157,7 +150,7 @@ struct TestViewCtorProp_EmbeddedDim {
         using CommonViewValueType =
             typename decltype(view_alloc_arg)::value_type;
         using CVT     = typename Kokkos::View;
-        using HostCVT = typename CVT::HostMirror;
+        using HostCVT = typename CVT::host_mirror_type;
 
         // Construct View using the common type; for case of specialization, an
         // 'embedded_dim' would be stored by view_alloc_arg
diff --git a/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp b/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp
index 2932898554c..77d1074174a 100644
--- a/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp
+++ b/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp
@@ -1,26 +1,23 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+import kokkos.dual_view;
+import kokkos.dyn_rank_view;
+import kokkos.dynamic_view;
+import kokkos.offset_view;
+import kokkos.scatter_view;
+#else
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#endif
 
 #include <../../core/unit_test/tools/include/ToolTestingUtilities.hpp>
 
diff --git a/lib/kokkos/containers/unit_tests/UnitTestMain.cpp b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
index 11a1cb717a3..cc9e6719470 100644
--- a/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
+++ b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
@@ -1,21 +1,13 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 int main(int argc, char *argv[]) {
   Kokkos::initialize(argc, argv);
diff --git a/lib/kokkos/core/perf_test/BenchmarkMain.cpp b/lib/kokkos/core/perf_test/BenchmarkMain.cpp
index 3524d58c0c9..4c90afc30af 100644
--- a/lib/kokkos/core/perf_test/BenchmarkMain.cpp
+++ b/lib/kokkos/core/perf_test/BenchmarkMain.cpp
@@ -1,23 +1,15 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 
 #include "Benchmark_Context.hpp"
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #include "PerfTest_Category.hpp"
 
diff --git a/lib/kokkos/core/perf_test/Benchmark_Context.cpp b/lib/kokkos/core/perf_test/Benchmark_Context.cpp
index 9aa63cc34f3..30647eae2bc 100644
--- a/lib/kokkos/core/perf_test/Benchmark_Context.cpp
+++ b/lib/kokkos/core/perf_test/Benchmark_Context.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "Benchmark_Context.hpp"
 
@@ -45,7 +32,7 @@ void add_kokkos_configuration(bool verbose) {
   // Iterate over lines returned from kokkos and extract key:value pairs
   std::stringstream ss{msg.str()};
   for (std::string line; std::getline(ss, line, '\n');) {
-    auto found = line.find_first_of(':');
+    auto found = line.find_last_of(':');
     if (found != std::string::npos) {
       auto val = remove_unwanted_characters(line.substr(found + 1));
       // Ignore line without value, for example a category name
diff --git a/lib/kokkos/core/perf_test/Benchmark_Context.hpp b/lib/kokkos/core/perf_test/Benchmark_Context.hpp
index f9f20c64094..ff5583981f2 100644
--- a/lib/kokkos/core/perf_test/Benchmark_Context.hpp
+++ b/lib/kokkos/core/perf_test/Benchmark_Context.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CORE_PERFTEST_BENCHMARK_CONTEXT_HPP
 #define KOKKOS_CORE_PERFTEST_BENCHMARK_CONTEXT_HPP
@@ -21,7 +8,12 @@
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 namespace KokkosBenchmark {
diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt
index d48bdcd26d2..d8f72825e78 100644
--- a/lib/kokkos/core/perf_test/CMakeLists.txt
+++ b/lib/kokkos/core/perf_test/CMakeLists.txt
@@ -15,8 +15,6 @@ if(KOKKOS_ENABLE_TESTS)
   endif()
 
   kokkos_include_directories(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
-
-  kokkos_add_executable_and_test(PerformanceTest_TaskDag SOURCES test_taskdag.cpp CATEGORIES PERFORMANCE)
 endif()
 
 if(NOT Kokkos_ENABLE_BENCHMARKS)
@@ -24,7 +22,7 @@ if(NOT Kokkos_ENABLE_BENCHMARKS)
 endif()
 
 # Find or download google/benchmark library
-find_package(benchmark QUIET 1.5.6)
+find_package(benchmark QUIET 1.8.3)
 if(benchmark_FOUND)
   message(STATUS "Using google benchmark found in ${benchmark_DIR}")
 else()
@@ -36,8 +34,8 @@ else()
   FetchContent_Declare(
     googlebenchmark
     DOWNLOAD_EXTRACT_TIMESTAMP FALSE
-    URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz
-    URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7
+    URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.tar.gz
+    URL_HASH MD5=7b93dd03670665684f1b2e9b70ad17fe
   )
   FetchContent_MakeAvailable(googlebenchmark)
   list(POP_BACK CMAKE_MESSAGE_INDENT)
@@ -78,8 +76,10 @@ set(BENCHMARK_SOURCES
     PerfTestGramSchmidt.cpp
     PerfTest_CustomReduction.cpp
     PerfTest_ExecSpacePartitioning.cpp
+    PerfTest_Gemv.cpp
     PerfTestHexGrad.cpp
     PerfTest_MallocFree.cpp
+    PerfTest_Stream.cpp
     PerfTest_ViewAllocate.cpp
     PerfTest_ViewCopy_a123.cpp
     PerfTest_ViewCopy_b123.cpp
@@ -127,6 +127,11 @@ kokkos_add_benchmark(PerformanceTest_Benchmark SOURCES ${BENCHMARK_SOURCES})
 
 kokkos_add_benchmark(Benchmark_Atomic_MinMax SOURCES test_atomic_minmax_simple.cpp)
 
+kokkos_add_benchmark(
+  PerformanceTest_ViewFirstTouch SOURCES PerfTest_ViewFirstTouch_Initialize.cpp PerfTest_ViewFirstTouch_ParallelFor.cpp
+  PerfTest_ViewFirstTouch_DeepCopy.cpp
+)
+
 # FIXME_NVHPC
 if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
   kokkos_add_benchmark(PerformanceTest_Mempool SOURCES test_mempool.cpp)
diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile
deleted file mode 100644
index 4c0172c53b0..00000000000
--- a/lib/kokkos/core/perf_test/Makefile
+++ /dev/null
@@ -1,84 +0,0 @@
-KOKKOS_PATH = ../..
-
-GTEST_PATH = ../../tpls/gtest
-
-vpath %.cpp ${KOKKOS_PATH}/core/perf_test
-
-default: build_all
-	echo "End Build"
-
-ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
-  KOKKOS_CUDA_OPTIONS=enable_lambda
-else
-  CXX = g++
-endif
-
-CXXFLAGS = -O3
-#CXXFLAGS += -DGENERIC_REDUCER
-LINK ?= $(CXX)
-LDFLAGS ?=
-override LDFLAGS += -lpthread
-
-KOKKOS_USE_DEPRECATED_MAKEFILES=1
-
-include $(KOKKOS_PATH)/Makefile.kokkos
-
-KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
-
-TEST_TARGETS =
-TARGETS =
-
-#
-
-OBJ_TASKDAG = test_taskdag.o
-TARGETS += KokkosCore_PerformanceTest_TaskDAG
-TEST_TARGETS += test-taskdag
-
-#
-
-KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest
-
-KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest_Atomics
-
-KokkosCore_PerformanceTest_Mempool: $(OBJ_MEMPOOL) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_MEMPOOL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Mempool
-
-KokkosCore_PerformanceTest_TaskDAG: $(OBJ_TASKDAG) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_TASKDAG) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_TaskDAG
-
-KokkosCore_PerformanceTest_Atomics_MinMax: $(OBJ_ATOMICS_MINMAX) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(EXTRA_PATH) $(OBJ_ATOMICS_MINMAX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest_Atomics_MinMax
-
-test-performance: KokkosCore_PerformanceTest
-	./KokkosCore_PerformanceTest
-
-test-atomic: KokkosCore_PerformanceTest_Atomics
-	./KokkosCore_PerformanceTest_Atomics
-
-test-mempool: KokkosCore_PerformanceTest_Mempool
-	./KokkosCore_PerformanceTest_Mempool
-
-test-taskdag: KokkosCore_PerformanceTest_TaskDAG
-	./KokkosCore_PerformanceTest_TaskDAG
-
-test-atomic-minmax: KokkosCore_PerformanceTest_Atomics_MinMax
-	./KokkosCore_PerformanceTest_Atomics_MinMax
-
-build_all: $(TARGETS)
-
-test: $(TEST_TARGETS)
-
-clean: kokkos-clean
-	rm -f *.o $(TARGETS)
-
-# Compilation rules
-
-%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
-
-gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
-
diff --git a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp
index 5e6e52f1153..40487fd5802 100644
--- a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp
+++ b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_BLAS_KERNELS_HPP
 #define KOKKOS_BLAS_KERNELS_HPP
diff --git a/lib/kokkos/core/perf_test/PerfTestDriver.hpp b/lib/kokkos/core/perf_test/PerfTestDriver.hpp
index ea54f31902b..025b8f61823 100644
--- a/lib/kokkos/core/perf_test/PerfTestDriver.hpp
+++ b/lib/kokkos/core/perf_test/PerfTestDriver.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
diff --git a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp
index ddfa73d4ba0..759fb354131 100644
--- a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include "PerfTest_Category.hpp"
 
@@ -141,7 +133,7 @@ struct ModifiedGramSchmidt {
     multivector_type Q_("Q", length, count);
     multivector_type R_("R", count, count);
 
-    typename multivector_type::HostMirror A = Kokkos::create_mirror(Q_);
+    typename multivector_type::host_mirror_type A = Kokkos::create_mirror(Q_);
 
     // Create and fill A on the host
     for (size_type j = 0; j < count; ++j) {
diff --git a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp
index 1ebe750f216..df9074cfbdf 100644
--- a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include "Benchmark_Context.hpp"
 #include "PerfTest_Category.hpp"
diff --git a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp
index 7ec1cd6996a..b1d3722877e 100644
--- a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp
+++ b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 namespace Test {
 template ;
-  using host_view_type = typename view_type::HostMirror;
+  using host_view_type = typename view_type::host_mirror_type;
 
   view_type A;
   view_type B;
@@ -285,7 +272,7 @@ struct RangePolicyCollapseTwo {
   using iterate_type = Kokkos::Iterate;
 
   using view_type      = Kokkos::View;
-  using host_view_type = typename view_type::HostMirror;
+  using host_view_type = typename view_type::host_mirror_type;
 
   view_type A;
   view_type B;
@@ -457,7 +444,7 @@ struct RangePolicyCollapseAll {
   using layout          = TestLayout;
 
   using view_type      = Kokkos::View;
-  using host_view_type = typename view_type::HostMirror;
+  using host_view_type = typename view_type::host_mirror_type;
 
   view_type A;
   view_type B;
diff --git a/lib/kokkos/core/perf_test/PerfTestMain.cpp b/lib/kokkos/core/perf_test/PerfTestMain.cpp
index 7315f26e5c1..e740a587491 100644
--- a/lib/kokkos/core/perf_test/PerfTestMain.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestMain.cpp
@@ -1,23 +1,15 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 int main(int argc, char* argv[]) {
diff --git a/lib/kokkos/core/perf_test/PerfTest_Category.hpp b/lib/kokkos/core/perf_test/PerfTest_Category.hpp
index 60f76ea8f56..3bcae22d4a6 100644
--- a/lib/kokkos/core/perf_test/PerfTest_Category.hpp
+++ b/lib/kokkos/core/perf_test/PerfTest_Category.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_TEST_PERFTEST_CAT_HPP
 #define KOKKOS_TEST_PERFTEST_CAT_HPP
diff --git a/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp b/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp
index deb139a5212..2d0f65d2c7b 100644
--- a/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp
@@ -1,29 +1,17 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
-#include 
-#include 
-#include "Benchmark_Context.hpp"
-#include "PerfTest_Category.hpp"
 #include 
 #ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
 import kokkos.random;
 #else
+#include 
 #include 
 #endif
+#include 
+#include "Benchmark_Context.hpp"
+#include "PerfTest_Category.hpp"
 #include 
 
 namespace Test {
diff --git a/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
index e966eaa45dd..fbf651edc31 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp
@@ -1,20 +1,14 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
+#include 
+
 #include 
 #include "PerfTest_Category.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_Gemv.cpp b/lib/kokkos/core/perf_test/PerfTest_Gemv.cpp
new file mode 100644
index 00000000000..509208ee59a
--- /dev/null
+++ b/lib/kokkos/core/perf_test/PerfTest_Gemv.cpp
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+/**
+ * @file PerfTest_Gemv.cpp
+ *
+ * This file implements a performance test of a naive GEMV implementation.
+ * This was created to reproduce a performance problem observed on MI300A using
+ * the HIP backend. With a relatively small number of rows (4000), the HIP
+ * backend was choosing large block sizes, which led to very few active EUs on
+ * the GPU.
+ */
+
+#include 
+#include 
+#include "Benchmark_Context.hpp"
+
+namespace Benchmark {
+
+template 
+void impl(benchmark::State& state) {
+  const size_t M = state.range(0);
+  const size_t N = state.range(1);
+
+  Kokkos::View A("A", M, N);
+  Kokkos::View y("y", M);
+  Kokkos::View x("x", N);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    Kokkos::parallel_for(
+        P(0, y.extent(0)), KOKKOS_LAMBDA(const int i) {
+          int sum = 0;
+          for (int j = 0; j < x.extent_int(0); j++) {
+            sum += A(i, j) * x(j);
+          }
+          y(i) = sum;
+        });
+    Kokkos::fence();
+    state.SetIterationTime(timer.seconds());
+  }
+}
+
+template 
+static void GemvDefault(benchmark::State& state) {
+  using P = Kokkos::RangePolicy<>;
+  impl(state);
+}
+
+template 
+static void Gemv(benchmark::State& state) {
+  using P = Kokkos::RangePolicy>;
+  impl(state);
+}
+
+#define COMMON_ARGS()                 \
+  ArgNames({"M", "N"})                \
+      ->UseManualTime()               \
+      ->Unit(benchmark::kMicrosecond) \
+      ->Args({4'000, 10'000})         \
+      ->Args({400'000, 100})
+
+BENCHMARK(GemvDefault)->COMMON_ARGS();
+BENCHMARK(GemvDefault)->COMMON_ARGS();
+
+#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
+BENCHMARK(Gemv<64, Kokkos::LayoutLeft>)->COMMON_ARGS();
+BENCHMARK(Gemv<64, Kokkos::LayoutRight>)->COMMON_ARGS();
+BENCHMARK(Gemv<256, Kokkos::LayoutLeft>)->COMMON_ARGS();
+BENCHMARK(Gemv<256, Kokkos::LayoutRight>)->COMMON_ARGS();
+BENCHMARK(Gemv<1024, Kokkos::LayoutLeft>)->COMMON_ARGS();
+BENCHMARK(Gemv<1024, Kokkos::LayoutRight>)->COMMON_ARGS();
+#endif
+
+#undef COMMON_ARGS
+
+}  // namespace Benchmark
diff --git a/lib/kokkos/core/perf_test/PerfTest_MallocFree.cpp b/lib/kokkos/core/perf_test/PerfTest_MallocFree.cpp
index 80736d99152..729f401e3a4 100644
--- a/lib/kokkos/core/perf_test/PerfTest_MallocFree.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_MallocFree.cpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include "Benchmark_Context.hpp"
 
@@ -69,31 +61,41 @@ static void MallocTouchFree(benchmark::State& state) {
   Impl(state, true, When::after_free);
 }
 
+#ifdef KOKKOS_IMPL_32BIT
+constexpr int test_range = 30;
+#else
+#ifndef KOKKOS_ENABLE_LARGE_MEM_TESTS
+constexpr int test_range = 31;
+#else
+constexpr int test_range = 32;
+#endif
+#endif
+
 BENCHMARK(Malloc)
     ->ArgName("N")
     ->RangeMultiplier(16)
-    ->Range(1, int64_t(1) << 32)
+    ->Range(1, int64_t(1) << test_range)
     ->UseManualTime()
     ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(MallocFree)
     ->ArgName("N")
     ->RangeMultiplier(16)
-    ->Range(1, int64_t(1) << 32)
+    ->Range(1, int64_t(1) << test_range)
     ->UseManualTime()
     ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(MallocTouch)
     ->ArgName("N")
     ->RangeMultiplier(16)
-    ->Range(1, int64_t(1) << 32)
+    ->Range(1, int64_t(1) << test_range)
     ->UseManualTime()
     ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(MallocTouchFree)
     ->ArgName("N")
     ->RangeMultiplier(16)
-    ->Range(1, int64_t(1) << 32)
+    ->Range(1, int64_t(1) << test_range)
     ->UseManualTime()
     ->Unit(benchmark::kMicrosecond);
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_Stream.cpp b/lib/kokkos/core/perf_test/PerfTest_Stream.cpp
new file mode 100644
index 00000000000..346d85ca70d
--- /dev/null
+++ b/lib/kokkos/core/perf_test/PerfTest_Stream.cpp
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+/**
+ * @file PerfTest_Stream.cpp
+ * @brief Implementation of STREAM benchmark operations for Kokkos.
+ *
+ * @details This file provides a set of memory bandwidth benchmarks based on the
+ * STREAM benchmark suite. It implements the five core STREAM operations (Set,
+ * Copy, Scale, Add, and Triad) using Kokkos parallel primitives. It includes
+ * validation.
+ *
+ * The implementation strives to use as few Kokkos features as possible, thus
+ * validation is performed on the host rather than via parallel_reduce.
+ */
+
+#include 
+#include 
+#include "Benchmark_Context.hpp"
+
+namespace {
+
+using StreamType                   = double;
+constexpr static StreamType A_INIT = 1.0;
+constexpr static StreamType B_INIT = 2.0;
+constexpr static StreamType C_INIT = 3.0;
+constexpr static StreamType SCALAR = 4.0;
+
+template 
+using StreamView = Kokkos::View>;
+
+// different than benchmarks/stream, which uses int
+// wide index types are common as GPU memory grows
+using StreamIndex = int64_t;
+using Policy      = Kokkos::RangePolicy>;
+
+template 
+void perform_set(const V& a, typename V::const_value_type scalar) {
+  Kokkos::parallel_for(
+      "set", Policy(0, a.extent(0)),
+      KOKKOS_LAMBDA(const StreamIndex i) { a[i] = scalar; });
+
+  Kokkos::fence();
+}
+
+template 
+void perform_copy(const V& a, const V& b) {
+  Kokkos::parallel_for(
+      "copy", Policy(0, a.extent(0)),
+      KOKKOS_LAMBDA(const StreamIndex i) { b[i] = a[i]; });
+
+  Kokkos::fence();
+}
+
+template 
+void perform_scale(const V& b, const V& c,
+                   typename V::const_value_type scalar) {
+  Kokkos::parallel_for(
+      "scale", Policy(0, b.extent(0)),
+      KOKKOS_LAMBDA(const StreamIndex i) { b[i] = scalar * c[i]; });
+
+  Kokkos::fence();
+}
+
+template 
+void perform_add(const V& a, const V& b, const V& c) {
+  Kokkos::parallel_for(
+      "add", Policy(0, a.extent(0)),
+      KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i] + b[i]; });
+
+  Kokkos::fence();
+}
+
+template 
+void perform_triad(const V& a, const V& b, const V& c,
+                   typename V::const_value_type scalar) {
+  Kokkos::parallel_for(
+      "triad", Policy(0, a.extent(0)),
+      KOKKOS_LAMBDA(const StreamIndex i) { a[i] = b[i] + scalar * c[i]; });
+
+  Kokkos::fence();
+}
+
+template 
+int validate_array(V& a_dev, typename V::const_value_type expected) {
+  using scalar_type = typename V::non_const_value_type;
+
+  const auto a =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, a_dev);
+
+  scalar_type error = 0.0;
+  for (size_t i = 0; i < a.size(); ++i) {
+    error += std::abs(a[i] - expected);
+  }
+  const scalar_type avgError = error / (scalar_type)a.size();
+
+  // all values here are pretty easy for float types to represent
+  // so let's make the tolerances very tight.
+  return std::abs(avgError / expected) >
+         Kokkos::Experimental::epsilon_v;
+}
+
+template 
+static void StreamSet(benchmark::State& state) {
+  const size_t N8                 = std::pow(state.range(0), 8);
+  static constexpr int DATA_RATIO = 1;
+
+  StreamView a(Kokkos::view_alloc(Kokkos::WithoutInitializing, "a"),
+                          N8);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    perform_set(a, SCALAR);
+    KokkosBenchmark::report_results(state, a, DATA_RATIO, timer.seconds());
+  }
+
+  if (validate_array(a, SCALAR)) {
+    state.SkipWithError("validation failure");
+  }
+}
+
+template 
+static void StreamCopy(benchmark::State& state) {
+  const size_t N8                 = std::pow(state.range(0), 8);
+  static constexpr int DATA_RATIO = 2;
+
+  StreamView a("a", N8), b("b", N8);
+
+  perform_set(a, A_INIT);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    perform_copy(a, b);
+    KokkosBenchmark::report_results(state, a, DATA_RATIO, timer.seconds());
+  }
+
+  if (validate_array(b, A_INIT)) {
+    state.SkipWithError("validation failure");
+  }
+}
+
+template 
+static void StreamScale(benchmark::State& state) {
+  const size_t N8                 = std::pow(state.range(0), 8);
+  static constexpr int DATA_RATIO = 2;
+
+  StreamView a("a", N8), b("b", N8);
+
+  perform_set(b, B_INIT);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    perform_scale(a, b, SCALAR);
+    KokkosBenchmark::report_results(state, b, DATA_RATIO, timer.seconds());
+  }
+
+  if (validate_array(a, B_INIT * SCALAR)) {
+    state.SkipWithError("validation failure");
+  }
+}
+
+template 
+static void StreamAdd(benchmark::State& state) {
+  const size_t N8                 = std::pow(state.range(0), 8);
+  static constexpr int DATA_RATIO = 3;
+
+  StreamView a("a", N8), b("b", N8), c("c", N8);
+
+  perform_set(a, A_INIT);
+  perform_set(b, B_INIT);
+  perform_set(c, C_INIT);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    perform_add(a, b, c);
+    KokkosBenchmark::report_results(state, c, DATA_RATIO, timer.seconds());
+  }
+
+  if (validate_array(c, A_INIT + B_INIT)) {
+    state.SkipWithError("validation failure");
+  }
+}
+
+template 
+static void StreamTriad(benchmark::State& state) {
+  const size_t N8                 = std::pow(state.range(0), 8);
+  static constexpr int DATA_RATIO = 3;
+
+  StreamView a("a", N8), b("b", N8), c("c", N8);
+
+  perform_set(a, A_INIT);
+  perform_set(b, B_INIT);
+  perform_set(c, C_INIT);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    perform_triad(a, b, c, SCALAR);
+    KokkosBenchmark::report_results(state, a, DATA_RATIO, timer.seconds());
+  }
+
+  if (validate_array(a, B_INIT + SCALAR * C_INIT)) {
+    state.SkipWithError("validation failure");
+  }
+}
+
+// skips a benchmark with an error from thrown exceptions
+template 
+static void or_skip(benchmark::State& state) {
+  try {
+    bm(state);
+  } catch (const std::runtime_error& e) {
+    state.SkipWithError(e.what());
+  }
+}
+
+// As of May 2025, 10^8 doubles is larger than caches, but not so large as
+// to be inconvenient. Also run 11^8 for a quick check of convergence.
+#define STREAM_ARGS(label)            \
+  Name(label)                         \
+      ->ArgName("N")                  \
+      ->Arg(10)                       \
+      ->Arg(11)                       \
+      ->Unit(benchmark::kMillisecond) \
+      ->UseManualTime()
+
+// clang-format off
+// clang-format formatted these lines inconsistently, making it hard to
+// see the common pattern
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamSet");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamSet");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamCopy");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamCopy");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamScale");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamScale");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamAdd");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamAdd");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamTriad");
+
+BENCHMARK(or_skip>)
+    ->STREAM_ARGS("StreamTriad");
+// clang-format on
+
+#undef STREAM_ARGS
+
+}  // namespace
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp
index 163e1d7d048..0a6b8dfc277 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp
@@ -1,23 +1,17 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include "Benchmark_Context.hpp"
 
+#include 
+
 namespace Test {
 
 static constexpr int N = 10;
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp
index 1d1cb1f48dc..2a375597b01 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp
@@ -1,28 +1,21 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CORE_PERFTEST_BENCHMARK_VIEW_COPY_HPP
 #define KOKKOS_CORE_PERFTEST_BENCHMARK_VIEW_COPY_HPP
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #include 
 
 #include "Benchmark_Context.hpp"
 #include 
+#include 
 
 namespace Test {
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_Raw.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_Raw.cpp
index e4db40e128c..77a79b65bf6 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_Raw.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_Raw.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a123.cpp
index 5b627d3dd46..c4c677445ce 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a45.cpp
index 32006025660..be25727ad3d 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a6.cpp
index 0855299aad3..50a467b59cc 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a7.cpp
index 36577ef2eff..35602ef6789 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a8.cpp
index c449d684f1c..5bf3c19522c 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_a8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b123.cpp
index 8675f427d74..a61b5c16205 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b45.cpp
index 93522fcf0d4..2ec8469cca4 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b6.cpp
index be95c7cab3e..37b3804b2b6 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b7.cpp
index f8eee75ce71..d463c74960b 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b8.cpp
index 01dda2a33f5..2bf58baf8ff 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_b8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c123.cpp
index 25e87474746..c1dbf94e94c 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c45.cpp
index b1f4a7b5773..930f0e5e5e5 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c6.cpp
index 8120664792a..7f3b263ab3b 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c7.cpp
index cee9f5bd014..8c5ad225cfb 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c8.cpp
index 6f204a42221..09e1ab442e3 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_c8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d123.cpp
index 6d72bea4908..90d366c451b 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d45.cpp
index 1a407cd648c..ac8bd8cfac2 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d6.cpp
index 27b1a816fc8..d23ae54abac 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d7.cpp
index 17d4bf20773..50ddc1f5598 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d8.cpp
index 7bd02632287..9ab7a2ffc01 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy_d8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewCopy.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp
index e0fd432d94e..7dd60291d1d 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp
@@ -1,22 +1,10 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "Benchmark_Context.hpp"
 
 #include 
+#include 
 
 namespace Test {
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_123.cpp
index c714181fc97..93d8a8da7a4 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_45.cpp
index 6a5acfb0d6e..ca333e4230d 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_6.cpp
index dca20c70dfb..f5d2cbec3ce 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_7.cpp
index 6fa8a418c6a..e00b323f530 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_8.cpp
index 954b097d83e..27ee742b29c 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill_Raw.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill_Raw.cpp
index 57bba83a9c1..87aa2e41187 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewFill_Raw.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill_Raw.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewFill.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_DeepCopy.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_DeepCopy.cpp
new file mode 100644
index 00000000000..4f042abc4ab
--- /dev/null
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_DeepCopy.cpp
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include "Benchmark_Context.hpp"
+
+namespace Benchmark {
+
+template 
+void ViewFirstTouch_DeepCopy(benchmark::State& state) {
+  const int N               = state.range(0);
+  const DataType init_value = static_cast(state.range(1));
+  using ViewType            = Kokkos::View;
+  ViewType v_a("A", N);
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    Kokkos::deep_copy(v_a, init_value);
+    KokkosBenchmark::report_results(state, v_a, 2, timer.seconds());
+  }
+}
+
+BENCHMARK_TEMPLATE(ViewFirstTouch_DeepCopy, double)
+    ->ArgNames({"N", "init_value"})
+    ->RangeMultiplier(8)
+    ->Ranges({{int64_t(1) << 6, int64_t(1) << 24}, {0, 1}})
+    ->UseManualTime();
+
+}  // namespace Benchmark
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_Initialize.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_Initialize.cpp
new file mode 100644
index 00000000000..c1dc57cf383
--- /dev/null
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_Initialize.cpp
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include "Benchmark_Context.hpp"
+
+namespace Benchmark {
+
+template 
+void ViewFirstTouch_Initialize(benchmark::State& state) {
+  const int N    = state.range(0);
+  using ViewType = Kokkos::View;
+
+  for (auto _ : state) {
+    Kokkos::Timer timer;
+    ViewType v_a("A", N);
+    Kokkos::fence();
+    KokkosBenchmark::report_results(state, v_a, 1, timer.seconds());
+  }
+}
+
+BENCHMARK_TEMPLATE(ViewFirstTouch_Initialize, double)
+    ->ArgName("N")
+    ->RangeMultiplier(8)
+    ->Range(int64_t(1) << 6, int64_t(1) << 24)
+    ->UseManualTime();
+
+}  // namespace Benchmark
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_ParallelFor.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_ParallelFor.cpp
new file mode 100644
index 00000000000..85879e9b41f
--- /dev/null
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewFirstTouch_ParallelFor.cpp
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#include "Benchmark_Context.hpp"
+
+namespace Benchmark {
+
+template 
+void ViewFirstTouch_ParallelFor(benchmark::State& state) {
+  const int N    = state.range(0);
+  using ViewType = Kokkos::View;
+
+  for (auto _ : state) {
+    ViewType v_a("A", N);
+    Kokkos::fence();
+    Kokkos::Timer timer;
+    Kokkos::parallel_for(
+        "ViewFirstTouch_ParallelFor", N, KOKKOS_LAMBDA(const int i) {
+          v_a(i) = static_cast(2) * v_a(i) + static_cast(1);
+        });
+    Kokkos::fence();
+    KokkosBenchmark::report_results(state, v_a, 2, timer.seconds());
+  }
+}
+
+BENCHMARK_TEMPLATE(ViewFirstTouch_ParallelFor, double)
+    ->ArgName("N")
+    ->RangeMultiplier(8)
+    ->Range(int64_t(1) << 6, int64_t(1) << 24)
+    ->UseManualTime();
+
+}  // namespace Benchmark
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp
index de6981e17af..ce1d8c4af12 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include 
 #include "Benchmark_Context.hpp"
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_123.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_123.cpp
index 0b3141eead0..82fc2e752ee 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_123.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_123.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_45.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_45.cpp
index f5eec387cbd..481e4833f26 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_45.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_45.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_6.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_6.cpp
index 6b639d3a672..22e451c3043 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_6.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_6.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_7.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_7.cpp
index 8ebf80e3ffe..cf8dec6eab3 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_7.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_7.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_8.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_8.cpp
index 5e741e800b1..13366149e60 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_8.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_8.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize_Raw.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize_Raw.cpp
index ab469cb647c..754e1e34ec7 100644
--- a/lib/kokkos/core/perf_test/PerfTest_ViewResize_Raw.cpp
+++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize_Raw.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include "PerfTest_ViewResize.hpp"
 
diff --git a/lib/kokkos/core/perf_test/run_taskdag.sh b/lib/kokkos/core/perf_test/run_taskdag.sh
deleted file mode 100755
index dcb016c9d54..00000000000
--- a/lib/kokkos/core/perf_test/run_taskdag.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash -e
-NT=$1
-PROG="./KokkosCore_PerformanceTest_TaskDAG"
-COMMON_ARGS="--kokkos-threads=$NT --alloc_size=10027008 --super_size=65536 --repeat_outer=10"
-
-postproc() {
-cat log | grep "tasks per second" | rev | cut -d ' ' -f 2 | rev >> yvals
-}
-
-rm -f xvals yvals
-for x in 21 23
-do
-  echo "test input $x"
-  echo $x >> xvals
-  $PROG $COMMON_ARGS --input=$x 2>&1 | tee log
-  postproc
-done
-
-rm -f datapoints.txt
-paste xvals yvals > datapoints.txt
-
diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp
index 99c2dc89a49..b702b3ba6ed 100644
--- a/lib/kokkos/core/perf_test/test_atomic.cpp
+++ b/lib/kokkos/core/perf_test/test_atomic.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
@@ -21,7 +8,12 @@
 #include 
 #include "Benchmark_Context.hpp"
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 using exec_space = Kokkos::DefaultExecutionSpace;
@@ -30,7 +22,7 @@ template 
 struct ZeroFunctor {
   using execution_space = DEVICE_TYPE;
   using type            = typename Kokkos::View;
-  using h_type          = typename Kokkos::View::HostMirror;
+  using h_type = typename Kokkos::View::host_mirror_type;
   type data;
   KOKKOS_INLINE_FUNCTION
   void operator()(int) const { data() = 0; }
diff --git a/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp b/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp
index bc35d1c776f..a3ad984f032 100644
--- a/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp
+++ b/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 // export OMP_PROC_BIND=spread ; export OMP_PLACES=threads
 // c++  -O2 -g -DNDEBUG  -fopenmp
@@ -25,7 +12,12 @@
 
 #include "Benchmark_Context.hpp"
 #include "PerfTest_Category.hpp"
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 using exec_space = Kokkos::DefaultExecutionSpace;
 
diff --git a/lib/kokkos/core/perf_test/test_mempool.cpp b/lib/kokkos/core/perf_test/test_mempool.cpp
index 7f9c5be77ca..50bb744576e 100644
--- a/lib/kokkos/core/perf_test/test_mempool.cpp
+++ b/lib/kokkos/core/perf_test/test_mempool.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #include 
 #include 
@@ -21,7 +8,12 @@
 #include 
 
 #include 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 
 #include "Benchmark_Context.hpp"
diff --git a/lib/kokkos/core/perf_test/test_reduction.cpp b/lib/kokkos/core/perf_test/test_reduction.cpp
index 2d000650b94..f5dcede8994 100644
--- a/lib/kokkos/core/perf_test/test_reduction.cpp
+++ b/lib/kokkos/core/perf_test/test_reduction.cpp
@@ -1,20 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #include 
 #include "Benchmark_Context.hpp"
diff --git a/lib/kokkos/core/perf_test/test_sharedSpace.cpp b/lib/kokkos/core/perf_test/test_sharedSpace.cpp
index 3c06770e286..e69ae131c4d 100644
--- a/lib/kokkos/core/perf_test/test_sharedSpace.cpp
+++ b/lib/kokkos/core/perf_test/test_sharedSpace.cpp
@@ -1,19 +1,11 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #if defined _WIN32
 #include 
diff --git a/lib/kokkos/core/perf_test/test_taskdag.cpp b/lib/kokkos/core/perf_test/test_taskdag.cpp
deleted file mode 100644
index 347d9748b5a..00000000000
--- a/lib/kokkos/core/perf_test/test_taskdag.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#include 
-
-#include 
-
-#if !defined(KOKKOS_ENABLE_TASKDAG) || \
-    defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS)
-
-int main() { return 0; }
-
-#else
-
-#include 
-#include 
-#include 
-#include 
-
-#include 
-
-#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
-// We allow using deprecated classes in this file
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
-#endif
-
-using ExecSpace = Kokkos::DefaultExecutionSpace;
-
-inline long eval_fib(long n) {
-  constexpr long mask = 0x03;
-
-  long fib[4] = {0, 1, 0, 0};
-
-  for (long i = 2; i <= n; ++i) {
-    fib[i & mask] = fib[(i - 1) & mask] + fib[(i - 2) & mask];
-  }
-
-  return fib[n & mask];
-}
-
-inline long fib_alloc_count(long n) {
-  constexpr long mask = 0x03;
-
-  long count[4] = {1, 1, 0, 0};
-
-  for (long i = 2; i <= n; ++i) {
-    count[i & mask] = 2  // this task plus the 'when_all' task
-                      + count[(i - 1) & mask] + count[(i - 2) & mask];
-  }
-
-  return count[n & mask];
-}
-
-template 
-struct TestFib {
-  using MemorySpace = typename Scheduler::memory_space;
-  using MemberType  = typename Scheduler::member_type;
-  using FutureType  = Kokkos::BasicFuture;
-
-  using value_type = long;
-
-  FutureType dep[2];
-  const value_type n;
-
-  KOKKOS_INLINE_FUNCTION
-  TestFib(const value_type arg_n) : dep{}, n(arg_n) {}
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(MemberType& member, value_type& result) noexcept {
-    auto& sched = member.scheduler();
-    if (n < 2) {
-      result = n;
-    } else if (!dep[0].is_null() && !dep[1].is_null()) {
-      result = dep[0].get() + dep[1].get();
-    } else {
-      // Spawn new children and respawn myself to sum their results.
-      // Spawn lower value at higher priority as it has a shorter
-      // path to completion.
-
-      dep[1] = Kokkos::task_spawn(
-          Kokkos::TaskSingle(sched, Kokkos::TaskPriority::High),
-          TestFib(n - 2));
-
-      dep[0] = Kokkos::task_spawn(Kokkos::TaskSingle(sched), TestFib(n - 1));
-
-      auto fib_all = sched.when_all(dep, 2);
-
-      if (!dep[0].is_null() && !dep[1].is_null() && !fib_all.is_null()) {
-        // High priority to retire this branch.
-        Kokkos::respawn(this, fib_all, Kokkos::TaskPriority::High);
-      } else {
-        Kokkos::abort("Failed nested task spawn (allocation)");
-      }
-    }
-  }
-};
-
-int main(int argc, char* argv[]) {
-  static const char help[]         = "--help";
-  static const char alloc_size[]   = "--alloc_size=";
-  static const char super_size[]   = "--super_size=";
-  static const char repeat_outer[] = "--repeat_outer=";
-  static const char input_value[]  = "--input=";
-
-  long total_alloc_size   = 1000000;
-  int min_superblock_size = 10000;
-  int test_repeat_outer   = 1;
-  int fib_input           = 4;
-
-  int ask_help = 0;
-
-  for (int i = 1; i < argc; i++) {
-    const char* const a = argv[i];
-
-    if (!strncmp(a, help, strlen(help))) ask_help = 1;
-
-    if (!strncmp(a, alloc_size, strlen(alloc_size)))
-      total_alloc_size = atol(a + strlen(alloc_size));
-
-    if (!strncmp(a, super_size, strlen(super_size)))
-      min_superblock_size = std::stoi(a + strlen(super_size));
-
-    if (!strncmp(a, repeat_outer, strlen(repeat_outer)))
-      test_repeat_outer = std::stoi(a + strlen(repeat_outer));
-
-    if (!strncmp(a, input_value, strlen(input_value)))
-      fib_input = std::stoi(a + strlen(input_value));
-  }
-
-  const long fib_output   = eval_fib(fib_input);
-  const long number_alloc = fib_alloc_count(fib_input);
-
-  const unsigned min_block_size = 32;
-  const unsigned max_block_size = 128;
-
-  long task_count_max   = 0;
-  long task_count_accum = 0;
-  long test_result      = 0;
-
-  if (ask_help) {
-    std::cout << "command line options:"
-              << " " << help << " " << alloc_size << "##"
-              << " " << super_size << "##"
-              << " " << input_value << "##"
-              << " " << repeat_outer << "##" << std::endl;
-    return -1;
-  }
-
-  using Scheduler = Kokkos::TaskSchedulerMultiple;
-
-  using Functor = TestFib;
-
-  Kokkos::initialize(argc, argv);
-
-  {
-    Scheduler sched(Functor::MemorySpace(), total_alloc_size, min_block_size,
-                    max_block_size, min_superblock_size);
-
-    Functor::FutureType f =
-        Kokkos::host_spawn(Kokkos::TaskSingle(sched), Functor(fib_input));
-
-    Kokkos::wait(sched);
-
-    test_result = f.get();
-
-    // task_count_max   = sched.allocated_task_count_max();
-    // task_count_accum = sched.allocated_task_count_accum();
-
-    // if ( number_alloc != task_count_accum ) {
-    //  std::cout << " number_alloc( " << number_alloc << " )"
-    //            << " != task_count_accum( " << task_count_accum << " )"
-    //            << std::endl ;
-    //}
-
-    if (fib_output != test_result) {
-      std::cout << " answer( " << fib_output << " )"
-                << " != result( " << test_result << " )" << std::endl;
-    }
-
-    if (fib_output != test_result) {  // || number_alloc != task_count_accum ) {
-      printf("  TEST FAILED\n");
-      return -1;
-    }
-
-    double min_time = std::numeric_limits::max();
-    double time_sum = 0;
-
-    for (int i = 0; i < test_repeat_outer; ++i) {
-      Kokkos::Timer timer;
-
-      Functor::FutureType ftmp =
-          Kokkos::host_spawn(Kokkos::TaskSingle(sched), Functor(fib_input));
-
-      Kokkos::wait(sched);
-      auto this_time = timer.seconds();
-      min_time       = std::min(min_time, this_time);
-      time_sum += this_time;
-    }
-
-    auto avg_time = time_sum / test_repeat_outer;
-
-    printf(
-        "\"taskdag: alloc super repeat input output task-accum task-max\" %ld "
-        "%d %d %d %ld %ld %ld\n",
-        total_alloc_size, min_superblock_size, test_repeat_outer, fib_input,
-        fib_output, task_count_accum, task_count_max);
-
-    printf("\"taskdag: time (min, avg)\" %g %g\n", min_time, avg_time);
-    printf("\"taskdag: tasks per second (max, avg)\" %g %g\n",
-           number_alloc / min_time, number_alloc / avg_time);
-  }  // end scope to destroy scheduler prior to finalize
-
-  Kokkos::finalize();
-
-  return 0;
-}
-
-#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
-#endif
-
-#endif
diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt
index 67846cb76ee..90fd56de803 100644
--- a/lib/kokkos/core/src/CMakeLists.txt
+++ b/lib/kokkos/core/src/CMakeLists.txt
@@ -27,6 +27,9 @@ if(NOT desul_FOUND)
       set(DESUL_ATOMICS_ENABLE_OPENACC ON)
     endif()
   endif()
+  if(BUILD_SHARED_LIBS)
+    set(DESUL_IMPL_BUILD_SHARED_LIBS ON)
+  endif()
   configure_file(
     ${KOKKOS_SOURCE_DIR}/tpls/desul/Config.hpp.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/desul/atomics/Config.hpp
   )
@@ -49,17 +52,11 @@ append_glob(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.hpp)
 
 if(KOKKOS_ENABLE_CUDA)
   append_glob(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Cuda/*.cpp)
-  if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4)
-    list(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Cuda/Kokkos_Cuda_Task.cpp)
-  endif()
   append_glob(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/Cuda/*.hpp)
 endif()
 
 if(KOKKOS_ENABLE_OPENMP)
   append_glob(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMP/*.cpp)
-  if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4)
-    list(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMP/Kokkos_OpenMP_Task.cpp)
-  endif()
   append_glob(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMP/*.hpp)
 endif()
 
@@ -85,17 +82,11 @@ endif()
 
 if(KOKKOS_ENABLE_HPX)
   append_glob(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/HPX/*.cpp)
-  if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4)
-    list(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/HPX/Kokkos_HPX_Task.cpp)
-  endif()
   append_glob(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/HPX/*.hpp)
 endif()
 
 if(KOKKOS_ENABLE_SERIAL)
   append_glob(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Serial/*.cpp)
-  if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4)
-    list(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Serial/Kokkos_Serial_Task.cpp)
-  endif()
   append_glob(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/Serial/*.hpp)
 endif()
 
@@ -127,14 +118,27 @@ if(NOT desul_FOUND)
     PATTERN "*.hpp"
   )
 
-  message(STATUS "Using internal desul_atomics copy")
+  file(STRINGS "${KOKKOS_SOURCE_DIR}/tpls/desul-hash.txt" KOKKOS_DESUL_HASH)
+  global_set(KOKKOS_DESUL_VERSION "${KOKKOS_DESUL_HASH}")
+  message(STATUS "Using bundled desul_atomics copy (desul/desul@${KOKKOS_DESUL_HASH})")
 else()
+  global_set(KOKKOS_DESUL_VERSION "unknown")
   message(STATUS "Using external desul_atomics install found at:")
   message(STATUS "  " ${desul_DIR})
 endif()
 
+if(Kokkos_ENABLE_EXPERIMENTAL_CXX20_MODULES)
+  set(KOKKOS_CORE_MODULE_FILES Kokkos_Core.cppm Kokkos_Core_Impl.cppm)
+endif()
+
 kokkos_add_library(
-  kokkoscore SOURCES ${KOKKOS_CORE_SRCS} HEADERS ${KOKKOS_CORE_HEADERS}
+  kokkoscore
+  SOURCES
+  ${KOKKOS_CORE_SRCS}
+  MODULE_INTERFACE
+  ${KOKKOS_CORE_MODULE_FILES}
+  HEADERS
+  ${KOKKOS_CORE_HEADERS}
   ADD_BUILD_OPTIONS # core should be given all the necessary compiler/linker flags
 )
 
@@ -143,12 +147,14 @@ kokkos_lib_include_directories(
 )
 if(NOT desul_FOUND)
   target_include_directories(kokkoscore SYSTEM PUBLIC $)
+  if(BUILD_SHARED_LIBS)
+    target_compile_definitions(kokkoscore PRIVATE DESUL_IMPL_EXPORT_SYMBOLS)
+  endif()
 endif()
 
 if(Kokkos_ENABLE_IMPL_MDSPAN)
-  message(STATUS "Experimental mdspan support is enabled")
-
   if(Kokkos_ENABLE_MDSPAN_EXTERNAL)
+    global_set(KOKKOS_MDSPAN_VERSION "unknown")
     message(STATUS "Using external mdspan")
     target_link_libraries(kokkoscore PUBLIC std::mdspan)
   else()
@@ -164,14 +170,20 @@ if(Kokkos_ENABLE_IMPL_MDSPAN)
       PATTERN "mdspan"
       PATTERN "*.hpp"
     )
-    message(STATUS "Using internal mdspan directory ${KOKKOS_SOURCE_DIR}/tpls/mdspan/include")
+
+    file(STRINGS "${KOKKOS_SOURCE_DIR}/tpls/mdspan-hash.txt" KOKKOS_MDSPAN_HASH)
+    global_set(KOKKOS_MDSPAN_VERSION "${KOKKOS_MDSPAN_HASH}")
+    message(STATUS "Using bundled mdspan copy (kokkos/mdspan@${KOKKOS_MDSPAN_HASH})")
   endif()
 endif()
 
+if(Kokkos_ENABLE_IMPL_VIEW_LEGACY)
+  global_set(KOKKOS_MDSPAN_VERSION "not applicable")
+endif()
+
 kokkos_link_tpl(kokkoscore PUBLIC HWLOC)
 kokkos_link_tpl(kokkoscore PUBLIC CUDA)
 kokkos_link_tpl(kokkoscore PUBLIC HPX)
-kokkos_link_tpl(kokkoscore PUBLIC LIBDL)
 # On *nix-like systems (Linux, macOS) we need pthread for C++ std::thread
 if(NOT WIN32)
   kokkos_link_tpl(kokkoscore PUBLIC THREADS)
@@ -179,6 +191,7 @@ endif()
 if(NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
   kokkos_link_tpl(kokkoscore PUBLIC ROCM)
 endif()
+kokkos_link_tpl(kokkoscore PUBLIC LIBQUADMATH)
 
 # FIXME: We need a proper solution to figure out whether to enable
 #        libatomic
@@ -196,4 +209,6 @@ if(Kokkos_ENABLE_OPENMP)
   target_link_libraries(kokkoscore PUBLIC OpenMP::OpenMP_CXX)
 endif()
 
-kokkos_link_tpl(kokkoscore PUBLIC LIBQUADMATH)
+if(Kokkos_ENABLE_LIBDL)
+  target_link_libraries(kokkoscore PUBLIC ${CMAKE_DL_LIBS})
+endif()
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp
index 7d1ad831e0b..df693189245 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
@@ -176,9 +163,6 @@ class Cuda {
   //! Free any resources being consumed by the device.
   static void impl_finalize();
 
-  //! Has been initialized
-  static int impl_is_initialized();
-
   //! Initialize, telling the CUDA run-time library which device to use.
   static void impl_initialize(InitializationSettings const&);
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index 128068eb9d9..5f182252baf 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
@@ -21,7 +8,12 @@
 #include 
 #ifdef KOKKOS_ENABLE_CUDA
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include 
 
@@ -200,7 +192,6 @@ void *impl_allocate_common(const int device_id,
     }
   }
 #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
-  // FIXME_KEPLER Everything after Kepler should support cudaMallocAsync
   int device_supports_cuda_malloc_async;
   KOKKOS_IMPL_CUDA_SAFE_CALL(
       cudaDeviceGetAttribute(&device_supports_cuda_malloc_async,
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp
index 704521bef82..25237140c34 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp
index 247cb5b02c5..4ac17b3e824 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_INTERNAL_HPP
 #define KOKKOS_CUDA_INTERNAL_HPP
@@ -103,24 +90,7 @@ inline int cuda_max_active_blocks_per_sm(cudaDeviceProp const& properties,
                              : max_blocks_regs);
 
   // Limits due to blocks/SM
-#if CUDA_VERSION >= 11000
   int const max_blocks_per_sm = properties.maxBlocksPerMultiProcessor;
-#else
-  int const max_blocks_per_sm = [&properties]() {
-    switch (properties.major) {
-      case 3: return 16;
-      case 5:
-      case 6: return 32;
-      case 7: {
-        int isTuring = properties.minor == 5;
-        return (isTuring) ? 16 : 32;
-      }
-      default:
-        throw_runtime_exception("Unknown device in cuda block size deduction");
-        return 0;
-    }
-  }();
-#endif
 
   // Overall occupancy in blocks
   return std::min({max_blocks_regs, max_blocks_shmem, max_blocks_per_sm});
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp
index 66656fefda5..dfb396d5f02 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_ERROR_HPP
 #define KOKKOS_CUDA_ERROR_HPP
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp
index 872958ceca9..5c4884f98dd 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_KOKKOS_CUDA_GRAPHNODEKERNEL_IMPL_HPP
 #define KOKKOS_KOKKOS_CUDA_GRAPHNODEKERNEL_IMPL_HPP
@@ -98,8 +85,8 @@ class GraphNodeKernelImpl m_graph_ptr    = nullptr;
-  Kokkos::ObservingRawPtr m_graph_node_ptr = nullptr;
+  cudaGraph_t const* m_graph_ptr    = nullptr;
+  cudaGraphNode_t* m_graph_node_ptr = nullptr;
   // Basically, we have to make this mutable for the same reasons that the
   // global kernel buffers in the Cuda instance are mutable...
   mutable std::shared_ptr m_driver_storage = nullptr;
@@ -137,8 +124,7 @@ class GraphNodeKernelImpl allocate_driver_memory_buffer(
-      const CudaSpace& mem) const {
+  base_t* allocate_driver_memory_buffer(const CudaSpace& mem) const {
     KOKKOS_EXPECTS(m_driver_storage == nullptr)
     std::string alloc_label =
         label + " - GraphNodeKernel global memory functor storage";
@@ -160,12 +146,12 @@ template ::type>
 struct get_graph_node_kernel_type
-    : type_identity<
+    : std::type_identity<
           GraphNodeKernelImpl> {};
 template 
 struct get_graph_node_kernel_type
-    : type_identity,
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp
index ff0aa0da0de..ed3b3a0dd17 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_GraphNode_Impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_KOKKOS_CUDA_GRAPHNODE_IMPL_HPP
 #define KOKKOS_KOKKOS_CUDA_GRAPHNODE_IMPL_HPP
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp
index 23659b57ad9..fb533d830c1 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_KOKKOS_CUDA_GRAPH_IMPL_HPP
 #define KOKKOS_KOKKOS_CUDA_GRAPH_IMPL_HPP
@@ -201,8 +188,6 @@ struct GraphImpl {
   }
 
   void submit(const execution_space& exec) {
-    desul::ensure_cuda_lock_arrays_on_device();
-
     if (!bool(m_graph_exec)) {
       instantiate();
     }
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp
index f54459d2c4d..e6adc014e73 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp
@@ -1,181 +1,20 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_HALF_HPP_
 #define KOKKOS_CUDA_HALF_HPP_
 
-#ifdef KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
-
 #include 
-#include 
+#include 
 
-#if CUDA_VERSION >= 11000
 #include 
-#endif
 
-namespace Kokkos {
-namespace Experimental {
+namespace Kokkos::Experimental {
 
 /************************** half conversions **********************************/
 KOKKOS_INLINE_FUNCTION
 half_t cast_to_half(half_t val) { return val; }
 
-// CUDA before 11.1 only has the half <-> float conversions marked host device
-// So we will largely convert to float on the host for conversion
-// But still call the correct functions on the device
-#if (CUDA_VERSION < 11010)
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(float val) { return half_t(__float2half(val)); }
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(bool val) { return cast_to_half(static_cast(val)); }
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(double val) {
-  // double2half was only introduced in CUDA 11 too
-  return half_t(__float2half(static_cast(val)));
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(short val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__short2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned short val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__ushort2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(int val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__int2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned int val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__uint2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long long val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__ll2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long long val) {
-  KOKKOS_IF_ON_DEVICE((return half_t(__ull2half_rn(val));))
-  KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast(val)));))
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(long val) {
-  return cast_to_half(static_cast(val));
-}
-
-KOKKOS_INLINE_FUNCTION
-half_t cast_to_half(unsigned long val) {
-  return cast_to_half(static_cast(val));
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  return __half2float(half_t::impl_type(val));
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  return static_cast(cast_from_half(val));
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  return static_cast(__half2float(half_t::impl_type(val)));
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2short_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2ushort_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2int_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2uint_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2ll_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-
-template 
-KOKKOS_INLINE_FUNCTION
-    std::enable_if_t, T>
-    cast_from_half(half_t val) {
-  KOKKOS_IF_ON_DEVICE((return __half2ull_rz(half_t::impl_type(val));))
-  KOKKOS_IF_ON_HOST(
-      (return static_cast(__half2float(half_t::impl_type(val)));))
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  return static_cast(cast_from_half(val));
-}
-
-template 
-KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
-cast_from_half(half_t val) {
-  return static_cast(cast_from_half(val));
-}
-
-#else  // CUDA 11.1 versions follow
-
 KOKKOS_INLINE_FUNCTION
 half_t cast_to_half(float val) { return __float2half(val); }
 KOKKOS_INLINE_FUNCTION
@@ -252,20 +91,10 @@ KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
 cast_from_half(half_t val) {
   return static_cast(cast_from_half(val));
 }
-#endif
 
 /************************** bhalf conversions *********************************/
-// Go in this branch if CUDA version is >= 11.0.0 and less than 11.1.0 or if the
-// architecture is older than Ampere
-#if !defined(KOKKOS_ARCH_KEPLER) && !defined(KOKKOS_ARCH_MAXWELL) && \
-    !defined(KOKKOS_ARCH_PASCAL) && !defined(KOKKOS_ARCH_VOLTA) &&   \
-    !defined(KOKKOS_ARCH_TURING75)
-#define KOKKOS_IMPL_NVIDIA_GPU_ARCH_SUPPORT_BHALF
-#endif
-
-#if CUDA_VERSION >= 11000 && \
-    (CUDA_VERSION < 11010 || \
-     !defined(KOKKOS_IMPL_NVIDIA_GPU_ARCH_SUPPORT_BHALF))
+// if architecture is older than Ampere
+#if KOKKOS_IMPL_ARCH_NVIDIA_GPU < 80
 KOKKOS_INLINE_FUNCTION
 bhalf_t cast_to_bhalf(bhalf_t val) { return val; }
 
@@ -388,9 +217,7 @@ KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
 cast_from_bhalf(bhalf_t val) {
   return static_cast(cast_from_bhalf(val));
 }
-#endif  // CUDA_VERSION >= 11000 && CUDA_VERSION < 11010
-
-#if CUDA_VERSION >= 11010 && defined(KOKKOS_IMPL_NVIDIA_GPU_ARCH_SUPPORT_BHALF)
+#else
 KOKKOS_INLINE_FUNCTION
 bhalf_t cast_to_bhalf(bhalf_t val) { return val; }
 KOKKOS_INLINE_FUNCTION
@@ -469,47 +296,8 @@ KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
 cast_from_bhalf(bhalf_t val) {
   return static_cast(cast_from_bhalf(val));
 }
-#endif  // CUDA_VERSION >= 11010
-
-#undef KOKKOS_IMPL_NVIDIA_GPU_ARCH_SUPPORT_BHALF
-}  // namespace Experimental
-
-#if (CUDA_VERSION >= 11000)
-template <>
-struct reduction_identity {
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
-    return 0.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
-    return 1.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
-    return -0x7f7f;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
-    return 0x7f7f;
-  }
-};
-#endif  // CUDA_VERSION >= 11000
-
-// use float as the return type for sum and prod since cuda_fp16.h
-// has no constexpr functions for casting to __half
-template <>
-struct reduction_identity {
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
-    return 0.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
-    return 1.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
-    return -65504.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
-    return 65504.0F;
-  }
-};
-
-}  // namespace Kokkos
-#endif  // KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
+#endif
+
+}  // namespace Kokkos::Experimental
+
 #endif
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp
index 79d3322da28..8a4861cd6df 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp
@@ -1,52 +1,33 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_HALF_IMPL_TYPE_HPP_
 #define KOKKOS_CUDA_HALF_IMPL_TYPE_HPP_
 
 #include 
-#ifdef KOKKOS_ENABLE_CUDA
-#if !(defined(KOKKOS_COMPILER_CLANG) && KOKKOS_COMPILER_CLANG < 900) && \
-    !(defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL50) ||  \
-      defined(KOKKOS_ARCH_MAXWELL52))
+
+#if !(defined(KOKKOS_ARCH_MAXWELL50) || defined(KOKKOS_ARCH_MAXWELL52))
+
 #include 
-#if (CUDA_VERSION >= 11000)
 #include 
-#endif  // CUDA_VERSION >= 11000
 
 #ifndef KOKKOS_IMPL_HALF_TYPE_DEFINED
 // Make sure no one else tries to define half_t
 #define KOKKOS_IMPL_HALF_TYPE_DEFINED
-#define KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
 
-namespace Kokkos {
-namespace Impl {
+namespace Kokkos::Impl {
+
 struct half_impl_t {
   using type = __half;
 };
-#if (CUDA_VERSION >= 11000)
 #define KOKKOS_IMPL_BHALF_TYPE_DEFINED
 struct bhalf_impl_t {
   using type = __nv_bfloat16;
 };
-#endif  // CUDA_VERSION >= 11000
-}  // namespace Impl
-}  // namespace Kokkos
+
+}  // namespace Kokkos::Impl
+
 #endif  // KOKKOS_IMPL_HALF_TYPE_DEFINED
-#endif  // Disables for half_t on cuda:
-        // Clang/8||KEPLER30||KEPLER32||KEPLER37||MAXWELL50||MAXWELL52
-#endif  // KOKKOS_ENABLE_CUDA
+#endif  // Disables for half_t on cuda: MAXWELL50||MAXWELL52
+
 #endif
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_MathematicalFunctions.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_MathematicalFunctions.hpp
index 86de39e475c..d3e3e480bf1 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_MathematicalFunctions.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_MathematicalFunctions.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_HALF_MATHEMATICAL_FUNCTIONS_HPP_
 #define KOKKOS_CUDA_HALF_MATHEMATICAL_FUNCTIONS_HPP_
@@ -38,8 +25,6 @@ namespace Impl {
     return CUDA_NAME(HALF_TYPE::impl_type(x));                     \
   }
 
-#ifdef KOKKOS_IMPL_CUDA_HALF_TYPE_DEFINED
-
 #define KOKKOS_CUDA_HALF_UNARY_FUNCTION_IMPL(OP, CUDA_NAME) \
   KOKKOS_CUDA_HALF_UNARY_FUNCTION(OP, CUDA_NAME, Kokkos::Experimental::half_t)
 #define KOKKOS_CUDA_HALF_BINARY_FUNCTION_IMPL(OP, CUDA_NAME) \
@@ -52,12 +37,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::Experimental::half_t impl_test_fallback_half(
   return Kokkos::Experimental::half_t(0.f);
 }
 
-#else
-#define KOKKOS_CUDA_HALF_UNARY_FUNCTION_IMPL(OP, CUDA_NAME)
-#define KOKKOS_CUDA_HALF_BINARY_FUNCTION_IMPL(OP, CUDA_NAME)
-#define KOKKOS_CUDA_HALF_UNARY_PREDICATE_IMPL(OP, CUDA_NAME)
-#endif
-
 // Function for bhalf are not available prior to Ampere
 #if defined(KOKKOS_IMPL_BHALF_TYPE_DEFINED) && \
     (KOKKOS_IMPL_ARCH_NVIDIA_GPU >= 80)
@@ -147,9 +126,9 @@ KOKKOS_CUDA_HALF_AND_BHALF_UNARY_FUNCTION_IMPL(nearbyint, hrint)
 // nextafter
 // copysign
 // isfinite
-#if (KOKKOS_COMPILER_NVCC <= 1210 || KOKKOS_COMPILER_NVCC >= 1300) || \
-    defined(KOKKOS_ENABLE_CXX17)
+#if KOKKOS_COMPILER_NVCC >= 1230
 // __hisinf always returns false with nvcc 12.2 when compiling with cxx20
+// https://docs.nvidia.com/cuda/archive/12.3.2/cuda-toolkit-release-notes/index.html#cuda-math-release-12-3
 KOKKOS_CUDA_HALF_AND_BHALF_UNARY_PREDICATE_IMPL(isinf, __hisinf)
 #endif
 KOKKOS_CUDA_HALF_AND_BHALF_UNARY_PREDICATE_IMPL(isnan, __hisnan)
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
index 8c730d4ee53..eeeb142c980 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*--------------------------------------------------------------------------*/
 /* Kokkos interfaces */
@@ -24,7 +11,12 @@
 #include 
 #ifdef KOKKOS_ENABLE_CUDA
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 // #include 
 // #include 
@@ -202,7 +194,7 @@ void CudaInternal::print_configuration(std::ostream &s) const {
   s << "macro  KOKKOS_ENABLE_CUDA      : defined\n";
 #endif
 #if defined(CUDA_VERSION)
-  s << "macro  CUDA_VERSION          = " << CUDA_VERSION << " = version "
+  s << "macro  CUDA_VERSION          : " << CUDA_VERSION << " = version "
     << CUDA_VERSION / 1000 << "." << (CUDA_VERSION % 1000) / 10 << '\n';
 #endif
 
@@ -210,7 +202,10 @@ void CudaInternal::print_configuration(std::ostream &s) const {
     cudaDeviceProp prop;
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceProperties(&prop, i));
     s << "Kokkos::Cuda[ " << i << " ] " << prop.name;
-    if (m_cudaDev == i) s << " : Selected";
+    if (m_cudaDev == i)
+      s << " : Selected";
+    else
+      s << " : Not Selected";
     s << '\n'
       << "  Capability: " << prop.major << "." << prop.minor << '\n'
       << "  Total Global Memory: " << human_memory_size(prop.totalGlobalMem)
@@ -535,10 +530,6 @@ int Cuda::concurrency() const {
   return Impl::CudaInternal::concurrency();
 }
 
-int Cuda::impl_is_initialized() {
-  return Impl::CudaInternal::singleton().is_initialized();
-}
-
 void Cuda::impl_initialize(InitializationSettings const &settings) {
   const std::vector &visible_devices = Impl::get_visible_devices();
   const int cuda_device_id =
@@ -747,12 +738,14 @@ int g_cuda_space_factory_initialized =
     initialize_space_factory("150_Cuda");
 
 int CudaInternal::m_cudaArch = -1;
-cudaDeviceProp CudaInternal::m_deviceProp;
+KOKKOS_IMPL_EXPORT cudaDeviceProp CudaInternal::m_deviceProp;
 std::set CudaInternal::cuda_devices = {};
-std::map CudaInternal::constantMemHostStagingPerDevice =
-    {};
-std::map CudaInternal::constantMemReusablePerDevice = {};
-std::map CudaInternal::constantMemMutexPerDevice     = {};
+KOKKOS_IMPL_EXPORT std::map
+    CudaInternal::constantMemHostStagingPerDevice = {};
+KOKKOS_IMPL_EXPORT std::map
+    CudaInternal::constantMemReusablePerDevice = {};
+KOKKOS_IMPL_EXPORT std::map
+    CudaInternal::constantMemMutexPerDevice = {};
 
 }  // namespace Impl
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
index a6136fe79af..4e4b13093cc 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_INSTANCE_HPP_
 #define KOKKOS_CUDA_INSTANCE_HPP_
@@ -37,6 +24,11 @@ extern "C" void kokkos_impl_cuda_set_serial_execution(bool);
 extern "C" bool kokkos_impl_cuda_use_serial_execution();
 #endif
 
+#if defined(KOKKOS_COMPILER_NVCC) && !defined(KOKKOS_ARCH_MAXWELL) && \
+    !defined(KOKKOS_ARCH_PASCAL)
+#define KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
+#endif
+
 namespace Kokkos {
 namespace Impl {
 
@@ -52,7 +44,11 @@ struct CudaTraits {
   static constexpr CudaSpace::size_type ConstantMemoryCache =
       0x002000; /*  8k bytes */
   static constexpr CudaSpace::size_type KernelArgumentLimit =
+#ifdef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
+      0x008000; /* 32k bytes */
+#else
       0x001000; /*  4k bytes */
+#endif
   static constexpr CudaSpace::size_type MaxHierarchicalParallelism =
       1024; /* team_size * vector_length */
   using ConstantGlobalBufferType =
@@ -94,7 +90,7 @@ class CudaInternal {
   static int m_cudaArch;
   static int concurrency();
 
-  static cudaDeviceProp m_deviceProp;
+  KOKKOS_IMPL_EXPORT static cudaDeviceProp m_deviceProp;
 
   // Scratch Spaces for Reductions
   mutable std::size_t m_scratchSpaceCount;
@@ -121,9 +117,11 @@ class CudaInternal {
   bool was_finalized   = false;
 
   static std::set cuda_devices;
-  static std::map constantMemHostStagingPerDevice;
-  static std::map constantMemReusablePerDevice;
-  static std::map constantMemMutexPerDevice;
+  KOKKOS_IMPL_EXPORT static std::map
+      constantMemHostStagingPerDevice;
+  KOKKOS_IMPL_EXPORT static std::map
+      constantMemReusablePerDevice;
+  KOKKOS_IMPL_EXPORT static std::map constantMemMutexPerDevice;
 
   static CudaInternal& singleton();
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
index 61b890089e8..1182ba92ca0 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDAEXEC_HPP
 #define KOKKOS_CUDAEXEC_HPP
@@ -35,6 +22,14 @@
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
+// If KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT is used we leverage implicit constant
+// cache use via an argument attribute in the "local launch" mechanism. At that
+// point we only need local and global launch - the latter for functors that
+// exceed the kernel argument limit which is now 32kB. Local launch is always
+// strictly better than global launch - which means the light weight/heavy
+// weight property can be ignored - the only thing that matters is the size of
+// the functor.
+#ifndef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
 /** \brief  Access to constant memory on the device */
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
 
@@ -47,6 +42,7 @@ __device__ __constant__ unsigned long kokkos_impl_cuda_constant_memory_buffer
     [Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long)];
 
 #endif
+#endif  // !KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
 
 template 
 inline __device__ T* kokkos_impl_cuda_shared_memory() {
@@ -64,6 +60,7 @@ namespace Impl {
 // function qualifier which could be used to improve performance.
 //----------------------------------------------------------------------------
 
+#ifndef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
 template 
 __global__ static void cuda_parallel_launch_constant_memory() {
   const DriverType& driver =
@@ -94,6 +91,22 @@ __global__ __launch_bounds__(
                                                                  driver) {
   driver();
 }
+#else
+template 
+__global__ static void cuda_parallel_launch_local_memory(
+    const __grid_constant__ DriverType driver) {
+  driver();
+}
+
+template 
+__global__ __launch_bounds__(
+    maxTperB,
+    minBperSM) static void cuda_parallel_launch_local_memory(const __grid_constant__
+                                                                 DriverType
+                                                                     driver) {
+  driver();
+}
+#endif  // KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
 
 template 
 __global__ static void cuda_parallel_launch_global_memory(
@@ -149,8 +162,6 @@ inline void configure_shmem_preference(const CudaInternal* cuda_instance,
                                        const KernelFuncPtr& func,
                                        const size_t block_size, int& shmem,
                                        const size_t occupancy) {
-#ifndef KOKKOS_ARCH_KEPLER
-
   const auto& func_attr =
       get_cuda_kernel_func_attributes(cuda_instance,
                                                                 func);
@@ -231,13 +242,6 @@ inline void configure_shmem_preference(const CudaInternal* cuda_instance,
   if (cache_config_preference_cached != carveout) {
     cache_config_preference_cached = set_cache_config();
   }
-#else
-  // Use the parameters so we don't get a warning
-  (void)func;
-  (void)device_props;
-  (void)block_size;
-  (void)occupancy;
-#endif
 }
 
 //  end Some helper functions for launch code readability }}}1
@@ -263,31 +267,51 @@ struct DeduceCudaLaunchMechanism {
       (sizeof(DriverType) < CudaTraits::KernelArgumentLimit
            ? CudaLaunchMechanism::LocalMemory
            : CudaLaunchMechanism::Default) |
+#ifndef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
       (sizeof(DriverType) < CudaTraits::ConstantMemoryUsage
            ? CudaLaunchMechanism::ConstantMemory
            : CudaLaunchMechanism::Default) |
+#endif
       CudaLaunchMechanism::GlobalMemory;
 
   static constexpr CudaLaunchMechanism requested_launch_mechanism =
+#ifdef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
+      CudaLaunchMechanism::LocalMemory |
+#else
       (((property & light_weight) == light_weight)
            ? CudaLaunchMechanism::LocalMemory
            : CudaLaunchMechanism::ConstantMemory) |
+#endif
       CudaLaunchMechanism::GlobalMemory;
 
   static constexpr CudaLaunchMechanism default_launch_mechanism =
+#ifdef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
+      (sizeof(DriverType) < CudaTraits::KernelArgumentLimit)
+          ? CudaLaunchMechanism::LocalMemory
+          : CudaLaunchMechanism::GlobalMemory;
+#else
       // BuildValidMask
       (sizeof(DriverType) < CudaTraits::ConstantMemoryUseThreshold)
           ? CudaLaunchMechanism::LocalMemory
           : ((sizeof(DriverType) < CudaTraits::ConstantMemoryUsage)
                  ? CudaLaunchMechanism::ConstantMemory
                  : CudaLaunchMechanism::GlobalMemory);
+#endif
 
-  //              None                LightWeight    HeavyWeight
-  // F end DeduceCudaLaunchMechanism }}}2
@@ -537,7 +562,7 @@ struct CudaParallelLaunchKernelInvoker {{{2
-
+#ifndef KOKKOS_IMPL_CUDA_USE_GRID_CONSTANT
 template 
 struct CudaParallelLaunchKernelFunc<
@@ -628,6 +653,7 @@ struct CudaParallelLaunchKernelInvoker end Constant Memory }}}2
 //------------------------------------------------------------------------------
@@ -677,8 +703,6 @@ struct CudaParallelLaunchImpl<
             desired_occupancy);
       }
 
-      desul::ensure_cuda_lock_arrays_on_device();
-
       // Invoke the driver function on the device
       base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance);
 
@@ -708,33 +732,23 @@ template ,
           CudaLaunchMechanism LaunchMechanism =
               DeduceCudaLaunchMechanism::launch_mechanism,
           bool DoGraph = DriverType::Policy::is_graph_kernel::value>
-struct CudaParallelLaunch;
-
-// General launch mechanism
-template 
-struct CudaParallelLaunch
+struct CudaParallelLaunch
     : CudaParallelLaunchImpl {
   using base_t =
       CudaParallelLaunchImpl;
-  template 
-  CudaParallelLaunch(Args&&... args) {
-    base_t::launch_kernel((Args&&)args...);
-  }
-};
+  CudaParallelLaunch(const DriverType& driver, const dim3& grid,
+                     const dim3& block, const int shmem,
+                     const CudaInternal* cuda_instance) {
+    if (!Impl::is_empty_launch(grid, block)) {
+      desul::ensure_cuda_lock_arrays_on_device();
+    }
 
-// Launch mechanism for creating graph nodes
-template 
-struct CudaParallelLaunch
-    : CudaParallelLaunchImpl {
-  using base_t =
-      CudaParallelLaunchImpl;
-  template 
-  CudaParallelLaunch(Args&&... args) {
-    base_t::create_parallel_launch_graph_node((Args&&)args...);
+    if constexpr (DoGraph) {
+      base_t::create_parallel_launch_graph_node(driver, grid, block, shmem,
+                                                cuda_instance);
+    } else {
+      base_t::launch_kernel(driver, grid, block, shmem, cuda_instance);
+    }
   }
 };
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp
index 2c7eba7a18f..9d9346063f8 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_MDRANGEPOLICY_HPP_
 #define KOKKOS_CUDA_MDRANGEPOLICY_HPP_
@@ -40,11 +27,14 @@ template <>
 inline TileSizeProperties get_tile_size_properties(
     const Kokkos::Cuda& space) {
   TileSizeProperties properties;
-  properties.max_threads = space.impl_internal_space_instance()
-                               ->m_deviceProp.maxThreadsPerMultiProcessor;
+  const auto& device_prop = space.cuda_device_prop();
+  properties.max_threads  = device_prop.maxThreadsPerMultiProcessor;
   properties.default_largest_tile_size = 16;
   properties.default_tile_size         = 2;
   properties.max_total_tile_size       = 512;
+  properties.max_threads_dimensions[0] = device_prop.maxThreadsDim[0];
+  properties.max_threads_dimensions[1] = device_prop.maxThreadsDim[1];
+  properties.max_threads_dimensions[2] = device_prop.maxThreadsDim[2];
   return properties;
 }
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
index a65b063427e..11eac4e17cd 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_PARALLEL_MD_RANGE_HPP
 #define KOKKOS_CUDA_PARALLEL_MD_RANGE_HPP
@@ -101,17 +88,13 @@ class ParallelFor, Kokkos::Cuda> {
 
     // maximum number of threads in each dimension of the block as fetched by
     // the API
-    const auto max_threads_dim = m_rp.space().cuda_device_prop().maxThreadsDim;
+    [[maybe_unused]] const auto max_threads_dim =
+        m_rp.space().cuda_device_prop().maxThreadsDim;
 
     // maximum total number of threads per block as fetched by the API
     [[maybe_unused]] const auto max_threads_per_block =
         m_rp.space().cuda_device_prop().maxThreadsPerBlock;
 
-    // make sure the Z dimension (it is less than x,y limits) isn't exceeded
-    const auto clampZ = [&](const int input) {
-      return std::min(input, max_threads_dim[2]);
-    };
-
     // make sure the block dimensions don't exceed the max number of threads
     // allowed
     const auto check_block_sizes = [&]([[maybe_unused]] const dim3& block) {
@@ -136,99 +119,81 @@ class ParallelFor, Kokkos::Cuda> {
                     grid.z <= static_cast(m_max_grid_size[2]));
     };
 
-    if (RP::rank == 2) {
+    dim3 grid(1, 1, 1);
+    dim3 block(1, 1, 1);
+    if constexpr (RP::rank == 2) {
       // id0 to threadIdx.x; id1 to threadIdx.y
-      const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], 1);
-      check_block_sizes(block);
-
-      const dim3 grid(
-          std::min(
-              (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
-              m_max_grid_size[0]),
-          std::min(
-              (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
-              m_max_grid_size[1]),
-          1);
-      check_grid_sizes(grid);
-
-      CudaParallelLaunch(
-          *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
-    } else if (RP::rank == 3) {
+      block = dim3(m_rp.m_tile[0], m_rp.m_tile[1], 1);
+      grid =
+          dim3(std::min(
+                   (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
+                   m_max_grid_size[0]),
+               std::min(
+                   (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
+                   m_max_grid_size[1]),
+               1);
+    } else if constexpr (RP::rank == 3) {
       // id0 to threadIdx.x; id1 to threadIdx.y; id2 to threadIdx.z
-      const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], clampZ(m_rp.m_tile[2]));
-      check_block_sizes(block);
-
-      const dim3 grid(
-          std::min(
-              (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
-              m_max_grid_size[0]),
-          std::min(
-              (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
-              m_max_grid_size[1]),
-          std::min(
-              (m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z,
-              m_max_grid_size[2]));
-      // ensure we don't exceed the capability of the device
-      check_grid_sizes(grid);
-
-      CudaParallelLaunch(
-          *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
-    } else if (RP::rank == 4) {
+      block = dim3(m_rp.m_tile[0], m_rp.m_tile[1], m_rp.m_tile[2]);
+      grid =
+          dim3(std::min(
+                   (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x,
+                   m_max_grid_size[0]),
+               std::min(
+                   (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y,
+                   m_max_grid_size[1]),
+               std::min(
+                   (m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z,
+                   m_max_grid_size[2]));
+    } else if constexpr (RP::rank == 4) {
       // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to
       // threadIdx.z
-      const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], m_rp.m_tile[2],
-                       clampZ(m_rp.m_tile[3]));
-      check_block_sizes(block);
-      const dim3 grid(
-          std::min(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
-                                     m_max_grid_size[0]),
-          std::min(
-              (m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y,
-              m_max_grid_size[1]),
-          std::min(
-              (m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z,
-              m_max_grid_size[2]));
-      check_grid_sizes(grid);
-      CudaParallelLaunch(
-          *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
-    } else if (RP::rank == 5) {
+      block =
+          dim3(m_rp.m_tile[0] * m_rp.m_tile[1], m_rp.m_tile[2], m_rp.m_tile[3]);
+      grid =
+          dim3(std::min(
+                   m_rp.m_tile_end[0] * m_rp.m_tile_end[1], m_max_grid_size[0]),
+               std::min(
+                   (m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y,
+                   m_max_grid_size[1]),
+               std::min(
+                   (m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z,
+                   m_max_grid_size[2]));
+    } else if constexpr (RP::rank == 5) {
       // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to
       // threadIdx.z
-      const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1],
-                       m_rp.m_tile[2] * m_rp.m_tile[3], clampZ(m_rp.m_tile[4]));
-      check_block_sizes(block);
-      const dim3 grid(
-          std::min(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
-                                     m_max_grid_size[0]),
-          std::min(m_rp.m_tile_end[2] * m_rp.m_tile_end[3],
-                                     m_max_grid_size[1]),
-          std::min(
-              (m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z,
-              m_max_grid_size[2]));
-      check_grid_sizes(grid);
-      CudaParallelLaunch(
-          *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
-    } else if (RP::rank == 6) {
+      block = dim3(m_rp.m_tile[0] * m_rp.m_tile[1],
+                   m_rp.m_tile[2] * m_rp.m_tile[3], m_rp.m_tile[4]);
+      grid =
+          dim3(std::min(
+                   m_rp.m_tile_end[0] * m_rp.m_tile_end[1], m_max_grid_size[0]),
+               std::min(
+                   m_rp.m_tile_end[2] * m_rp.m_tile_end[3], m_max_grid_size[1]),
+               std::min(
+                   (m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z,
+                   m_max_grid_size[2]));
+    } else if constexpr (RP::rank == 6) {
       // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to
       // threadIdx.z
-      const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1],
-                       m_rp.m_tile[2] * m_rp.m_tile[3],
-                       clampZ(m_rp.m_tile[4] * m_rp.m_tile[5]));
-      check_block_sizes(block);
-      const dim3 grid(
+      block =
+          dim3(m_rp.m_tile[0] * m_rp.m_tile[1], m_rp.m_tile[2] * m_rp.m_tile[3],
+               m_rp.m_tile[4] * m_rp.m_tile[5]);
+      grid = dim3(
           std::min(m_rp.m_tile_end[0] * m_rp.m_tile_end[1],
                                      m_max_grid_size[0]),
           std::min(m_rp.m_tile_end[2] * m_rp.m_tile_end[3],
                                      m_max_grid_size[1]),
           std::min(m_rp.m_tile_end[4] * m_rp.m_tile_end[5],
                                      m_max_grid_size[2]));
-      check_grid_sizes(grid);
-      CudaParallelLaunch(
-          *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
     } else {
       Kokkos::abort("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n");
     }
-
+    // ensure we don't exceed the capability of the device
+    check_grid_sizes(grid);
+    check_block_sizes(block);
+    // launch the kernel
+    CudaParallelLaunch(
+        *this, grid, block, 0, m_rp.space().impl_internal_space_instance());
   }  // end execute
 
   //  inline
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
index c26658ce5da..425de94f66d 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_PARALLEL_RANGE_HPP
 #define KOKKOS_CUDA_PARALLEL_RANGE_HPP
@@ -41,9 +28,10 @@ class ParallelFor, Kokkos::Cuda> {
   using Policy = Kokkos::RangePolicy;
 
  private:
-  using Member       = typename Policy::member_type;
-  using WorkTag      = typename Policy::work_tag;
-  using LaunchBounds = typename Policy::launch_bounds;
+  using Member          = typename Policy::member_type;
+  using WorkTag         = typename Policy::work_tag;
+  using LaunchBounds    = typename Policy::launch_bounds;
+  using StaticBatchSize = typename Policy::static_batch_size;
 
   const FunctorType m_functor;
   const Policy m_policy;
@@ -70,22 +58,36 @@ class ParallelFor, Kokkos::Cuda> {
   Policy const& get_policy() const { return m_policy; }
 
   inline __device__ void operator()() const {
-    const auto work_stride = Member(blockDim.y) * gridDim.x;
-    const Member work_end  = m_policy.end();
+    constexpr auto batch_size = Member(StaticBatchSize::batch_size);
+    const auto work_stride    = Member(blockDim.y) * gridDim.x;
+    const Member work_end     = m_policy.end();
 
     for (Member iwork = m_policy.begin() + threadIdx.y +
                         static_cast(blockDim.y) * blockIdx.x;
          iwork < work_end;
-         iwork = iwork < static_cast(work_end - work_stride)
-                     ? iwork + work_stride
-                     : work_end) {
-      this->template exec_range(iwork);
+         iwork =
+             iwork < static_cast(work_end - work_stride * batch_size)
+                 ? iwork + work_stride * batch_size
+                 : work_end) {
+#if defined(KOKKOS_COMPILER_NVCC)
+#pragma unroll
+#endif
+      for (Member i = 0; i < static_cast(work_stride * batch_size) &&
+                         i < work_end - iwork;
+           i = (i < static_cast(work_end - work_stride - iwork))
+                   ? i + work_stride
+                   : work_end - iwork) {
+        this->template exec_range(iwork + i);
+      }
     }
   }
 
   inline void execute() const {
-    const typename Policy::index_type nwork = m_policy.end() - m_policy.begin();
-
+    constexpr typename Policy::index_type batch_size =
+        StaticBatchSize::batch_size;
+    const typename Policy::index_type nwork =
+        (m_policy.end() - m_policy.begin()) / batch_size +
+        ((m_policy.end() - m_policy.begin()) % batch_size == 0 ? 0 : 1);
     cudaFuncAttributes attr =
         CudaParallelLaunch::get_cuda_func_attributes(
             m_policy.space().impl_internal_space_instance());
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
index 86102fa8e2a..9befb1702cd 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_PARALLEL_TEAM_HPP
 #define KOKKOS_CUDA_PARALLEL_TEAM_HPP
@@ -43,8 +30,6 @@
 
 namespace Kokkos {
 
-extern bool show_warnings() noexcept;
-
 namespace Impl {
 
 template 
@@ -225,7 +210,7 @@ class TeamPolicyInternal
   const typename traits::execution_space& space() const { return m_space; }
 
   TeamPolicyInternal()
-      : m_space(typename traits::execution_space()),
+      : m_space(),
         m_league_size(0),
         m_team_size(-1),
         m_vector_length(0),
@@ -236,9 +221,9 @@ class TeamPolicyInternal
         m_tune_vector(false) {}
 
   /** \brief  Specify league size, specify team size, specify vector length */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      int team_size_request, int vector_length_request = 1)
-      : m_space(space_),
+      : m_space(std::move(space)),
         m_league_size(league_size_),
         m_team_size(team_size_request),
         m_vector_length(impl_determine_vector_length(vector_length_request)),
@@ -264,23 +249,25 @@ class TeamPolicyInternal
   }
 
   /** \brief  Specify league size, request team size, specify vector length */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      const Kokkos::AUTO_t& /* team_size_request */
                      ,
                      int vector_length_request = 1)
-      : TeamPolicyInternal(space_, league_size_, -1, vector_length_request) {}
+      : TeamPolicyInternal(std::move(space), league_size_, -1,
+                           vector_length_request) {}
 
   /** \brief  Specify league size, request team size and vector length */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      const Kokkos::AUTO_t& /* team_size_request */,
                      const Kokkos::AUTO_t& /* vector_length_request */
                      )
-      : TeamPolicyInternal(space_, league_size_, -1, -1) {}
+      : TeamPolicyInternal(std::move(space), league_size_, -1, -1) {}
 
   /** \brief  Specify league size, specify team size, request vector length */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      int team_size_request, const Kokkos::AUTO_t&)
-      : TeamPolicyInternal(space_, league_size_, team_size_request, -1) {}
+      : TeamPolicyInternal(std::move(space), league_size_, team_size_request,
+                           -1) {}
 
   TeamPolicyInternal(int league_size_, int team_size_request,
                      int vector_length_request = 1)
@@ -306,6 +293,12 @@ class TeamPolicyInternal
       : TeamPolicyInternal(typename traits::execution_space(), league_size_,
                            team_size_request, vector_length_request) {}
 
+  TeamPolicyInternal(const PolicyUpdate, const TeamPolicyInternal& other,
+                     typename traits::execution_space space)
+      : TeamPolicyInternal(other) {
+    this->m_space = std::move(space);
+  }
+
   inline int chunk_size() const { return m_chunk_size; }
 
   /** \brief set chunk_size to a discrete value*/
@@ -522,14 +515,13 @@ class ParallelFor,
   ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy)
       : m_functor(arg_functor),
         m_policy(arg_policy),
-        m_league_size(arg_policy.league_size()),
-        m_team_size(arg_policy.team_size()),
-        m_vector_size(arg_policy.impl_vector_length()) {
+        m_league_size(m_policy.league_size()),
+        m_team_size(m_policy.team_size()),
+        m_vector_size(m_policy.impl_vector_length()) {
     auto internal_space_instance =
         m_policy.space().impl_internal_space_instance();
     if (m_team_size < 0) {
-      m_team_size =
-          arg_policy.team_size_recommended(arg_functor, ParallelForTag());
+      m_team_size = m_policy.team_size_recommended(m_functor, ParallelForTag());
       if (m_team_size <= 0)
         Kokkos::Impl::throw_runtime_exception(
             "Kokkos::Impl::ParallelFor could not find a "
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 29ff01ac91b..7069e89f189 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_REDUCESCAN_HPP
 #define KOKKOS_CUDA_REDUCESCAN_HPP
@@ -215,14 +202,19 @@ struct CudaReductionsFunctor {
 
   __device__ static inline void scalar_intra_block_reduction(
       const FunctorType& functor, Scalar value, const bool skip,
-      Scalar* my_global_team_buffer_element, const int shared_elements,
+      Scalar* my_global_team_buffer_element, const unsigned int shared_elements,
       Scalar* shared_team_buffer_element) {
-    const int warp_id = (threadIdx.y * blockDim.x) / 32;
+    const unsigned int warp_id     = (threadIdx.y * blockDim.x) / 32u;
+    const unsigned int num_threads = blockDim.x * blockDim.y;
     Scalar* const my_shared_team_buffer_element =
         shared_team_buffer_element + warp_id % shared_elements;
 
+    const unsigned int num_active_threads_in_warp0 =
+        Kokkos::min(32u, num_threads);
+
     // Warp Level Reduction, ignoring Kokkos vector entries
-    scalar_intra_warp_reduction(functor, value, skip, 32, value);
+    scalar_intra_warp_reduction(functor, value, skip,
+                                num_active_threads_in_warp0, value);
 
     if (warp_id < shared_elements) {
       *my_shared_team_buffer_element = value;
@@ -231,21 +223,27 @@ struct CudaReductionsFunctor {
     // warp reduction
     __syncthreads();
 
-    const int num_warps = blockDim.x * blockDim.y / 32;
-    for (int w = shared_elements; w < num_warps; w += shared_elements) {
+    const unsigned int num_warps = num_threads / 32u;
+    for (unsigned int w = shared_elements; w < num_warps;
+         w += shared_elements) {
       if (warp_id >= w && warp_id < w + shared_elements) {
-        if ((threadIdx.y * blockDim.x + threadIdx.x) % 32 == 0)
+        if ((threadIdx.y * blockDim.x + threadIdx.x) % 32u == 0u)
           functor.join(my_shared_team_buffer_element, &value);
       }
       __syncthreads();
     }
 
-    if (warp_id == 0) {
+    if (warp_id == 0u) {
       functor.init(&value);
+
+      // We can increment by WarpSize rather than num_active_threads_in_warp0
+      // because if the first warp is not complete, we won't enter the loop
       for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x;
-           i < blockDim.y * blockDim.x / 32; i += 32)
+           i < num_warps; i += 32u)
         functor.join(&value, &shared_team_buffer_element[i]);
-      scalar_intra_warp_reduction(functor, value, false, 32,
+
+      scalar_intra_warp_reduction(functor, value, false,
+                                  num_active_threads_in_warp0,
                                   *my_global_team_buffer_element);
     }
   }
@@ -258,9 +256,9 @@ struct CudaReductionsFunctor {
     Scalar* const my_global_team_buffer_element =
         global_team_buffer_element + blockIdx.x;
     Scalar* shared_team_buffer_elements = ((Scalar*)shared_data);
-    Scalar value        = shared_team_buffer_elements[threadIdx.y];
-    int shared_elements = blockDim.x * blockDim.y / 32;
-    int global_elements = block_count;
+    Scalar value                 = shared_team_buffer_elements[threadIdx.y];
+    unsigned int shared_elements = blockDim.x * blockDim.y / 32u;
+    unsigned int global_elements = block_count;
     __syncthreads();
 
     scalar_intra_block_reduction(functor, value, true,
@@ -268,21 +266,21 @@ struct CudaReductionsFunctor {
                                  shared_team_buffer_elements);
     __threadfence();
     __syncthreads();
-    unsigned int num_teams_done = 0;
+    unsigned int num_teams_done = 0u;
     // The cast in the atomic call is necessary to find matching call with
     // MSVC/NVCC
-    if (threadIdx.x + threadIdx.y == 0) {
+    if (threadIdx.x + threadIdx.y == 0u) {
       num_teams_done =
           Kokkos::atomic_fetch_add(global_flags, static_cast(1)) +
-          1;
+          1u;
     }
     bool is_last_block = false;
     if (__syncthreads_or(num_teams_done == gridDim.x)) {
       is_last_block = true;
-      *global_flags = 0;
+      *global_flags = 0u;
       functor.init(&value);
-      for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements;
-           i += blockDim.x * blockDim.y) {
+      for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x;
+           i < global_elements; i += blockDim.x * blockDim.y) {
         functor.join(&value, &global_team_buffer_element[i]);
       }
       scalar_intra_block_reduction(
@@ -315,7 +313,7 @@ struct CudaReductionsFunctor {
     __syncwarp(mask);
 
     for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) {
-      if ((lane_id + delta < 32) && (lane_id % (delta * 2) == 0)) {
+      if ((lane_id + delta < width) && (lane_id % (delta * 2) == 0)) {
         functor.join(value, value + delta);
       }
       __syncwarp(mask);
@@ -328,24 +326,25 @@ struct CudaReductionsFunctor {
   __device__ static inline void scalar_intra_block_reduction(
       const FunctorType& functor, Scalar value, const bool skip, Scalar* result,
       const int /*shared_elements*/, Scalar* shared_team_buffer_element) {
-    const int warp_id = (threadIdx.y * blockDim.x) / 32;
+    const int warp_id              = (threadIdx.y * blockDim.x) / 32;
+    const unsigned int num_threads = blockDim.x * blockDim.y;
     Scalar* const my_shared_team_buffer_element =
         shared_team_buffer_element + threadIdx.y * blockDim.x + threadIdx.x;
     *my_shared_team_buffer_element = value;
     // Warp Level Reduction, ignoring Kokkos vector entries
     scalar_intra_warp_reduction(functor, my_shared_team_buffer_element, skip,
-                                32);
+                                Kokkos::min(32u, num_threads));
     // Wait for every warp to be done before using one warp to do final cross
     // warp reduction
     __syncthreads();
 
     if (warp_id == 0) {
       const unsigned int delta = (threadIdx.y * blockDim.x + threadIdx.x) * 32;
-      if (delta < blockDim.x * blockDim.y)
+      if (delta < num_threads)
         *my_shared_team_buffer_element = shared_team_buffer_element[delta];
       __syncwarp(0xffffffff);
       scalar_intra_warp_reduction(functor, my_shared_team_buffer_element, false,
-                                  blockDim.x * blockDim.y / 32);
+                                  num_threads / 32);
       if (threadIdx.x + threadIdx.y == 0) *result = *shared_team_buffer_element;
     }
   }
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
deleted file mode 100644
index e401c9f74b1..00000000000
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
-#define KOKKOS_IMPL_PUBLIC_INCLUDE
-#endif
-
-#include 
-#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_TASKDAG)
-
-#include 
-
-#include 
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template class TaskQueue<
-    Kokkos::Cuda,
-    Impl::default_tasking_memory_space_for_execution_space_t >;
-template class TaskQueueMultiple<
-    Kokkos::Cuda,
-    Impl::default_tasking_memory_space_for_execution_space_t >;
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-#else
-void KOKKOS_CORE_SRC_CUDA_KOKKOS_CUDA_TASK_PREVENT_LINK_ERROR() {}
-#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG \
-          ) */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
deleted file mode 100644
index 30a9fde1ea0..00000000000
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
+++ /dev/null
@@ -1,1248 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#ifndef KOKKOS_IMPL_CUDA_TASK_HPP
-#define KOKKOS_IMPL_CUDA_TASK_HPP
-
-#include 
-#if defined(KOKKOS_ENABLE_TASKDAG)
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#include 
-
-#include 
-#include   // KOKKOS_IMPL_CUDA_SAFE_CALL
-#include 
-
-//----------------------------------------------------------------------------
-
-// We allow using deprecated classes in this file
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
-
-// NOLINTBEGIN
-
-#if defined(__CUDA_ARCH__)
-#define KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN(MSG)                           \
-  {                                                                        \
-    __syncwarp();                                                          \
-    const unsigned b = __activemask();                                     \
-    if (b != 0xffffffff) {                                                 \
-      printf(" SYNCWARP AT %s (%d,%d,%d) (%d,%d,%d) failed %x\n", MSG,     \
-             blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, threadIdx.y, \
-             threadIdx.z, b);                                              \
-      return;                                                              \
-    }                                                                      \
-  }
-#else
-#define KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN(MSG)
-#endif
-
-namespace Kokkos {
-namespace Impl {
-namespace {
-
-template 
-__global__ void set_cuda_task_base_apply_function_pointer(
-    typename TaskType::function_type* ptr,
-    typename TaskType::destroy_type* dtor) {
-  *ptr  = TaskType::apply;
-  *dtor = TaskType::destroy;
-}
-
-template 
-__global__ void cuda_task_queue_execute(Scheduler scheduler,
-                                        int32_t shmem_size) {
-  TaskQueueSpecialization::driver(std::move(scheduler), shmem_size);
-}
-
-}  // namespace
-
-template 
-class TaskExec;
-
-template 
-class TaskQueueSpecialization> {
- public:
-  using scheduler_type  = SimpleTaskScheduler;
-  using execution_space = Kokkos::Cuda;
-  using memory_space    = Kokkos::CudaUVMSpace;
-  using member_type     = TaskExec;
-
-  enum : long { max_league_size = 16 };
-  enum : int { warps_per_block = 4 };
-
-  KOKKOS_INLINE_FUNCTION
-  static void iff_single_thread_recursive_execute(scheduler_type const&) {}
-
-  static int get_max_team_count(execution_space const& space) {
-    return space.cuda_device_prop().multiProcessorCount * warps_per_block;
-  }
-
-  __device__ static void driver(scheduler_type scheduler,
-                                int32_t shmem_per_warp) {
-    using task_base_type = typename scheduler_type::task_base_type;
-    using runnable_task_base_type =
-        typename scheduler_type::runnable_task_base_type;
-    using scheduling_info_storage_type = SchedulingInfoStorage<
-        runnable_task_base_type,
-        typename scheduler_type::task_scheduling_info_type>;
-
-    extern __shared__ int32_t shmem_all[];
-
-    int32_t* const warp_shmem =
-        shmem_all + (threadIdx.z * shmem_per_warp) / sizeof(int32_t);
-
-    task_base_type* const shared_memory_task_copy = (task_base_type*)warp_shmem;
-
-    const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x;
-
-    member_type single_exec(scheduler, warp_shmem, 1);
-    member_type team_exec(scheduler, warp_shmem, blockDim.y);
-
-    auto& queue          = scheduler.queue();
-    auto& team_scheduler = team_exec.scheduler();
-
-    auto current_task = OptionalRef();
-
-    // Loop until all queues are empty and no tasks in flight
-    while (!queue.is_done()) {
-      if (warp_lane == 0) {  // should be (?) same as team_exec.team_rank() == 0
-        // pop off a task
-        current_task =
-            queue.pop_ready_task(team_scheduler.team_scheduler_info());
-      }
-
-      // Broadcast task pointer:
-
-      // Sync before the broadcast
-      __syncwarp(0xffffffff);
-
-      // pretend it's an int* for shuffle purposes
-      ((int*)¤t_task)[0] =
-          __shfl_sync(0xffffffff, ((int*)¤t_task)[0], 0, 32);
-      ((int*)¤t_task)[1] =
-          __shfl_sync(0xffffffff, ((int*)¤t_task)[1], 0, 32);
-
-      if (current_task) {
-        KOKKOS_ASSERT(!current_task->as_runnable_task().get_respawn_flag());
-
-        int32_t b = sizeof(scheduling_info_storage_type) / sizeof(int32_t);
-        static_assert(
-            sizeof(scheduling_info_storage_type) % sizeof(int32_t) == 0,
-            "bad task size");
-        int32_t const e = current_task->get_allocation_size() / sizeof(int32_t);
-        KOKKOS_ASSERT(current_task->get_allocation_size() % sizeof(int32_t) ==
-                      0);
-
-        int32_t volatile* const task_mem =
-            (int32_t volatile*)current_task.get();
-
-        // do a coordinated copy of the task closure from global to shared
-        // memory:
-        for (int32_t i = warp_lane; i < e; i += CudaTraits::WarpSize) {
-          warp_shmem[i] = task_mem[i];
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to all threads in the warp.
-        __syncwarp(0xffffffff);
-
-        if (shared_memory_task_copy->is_team_runnable()) {
-          // Thread Team Task
-          shared_memory_task_copy->as_runnable_task().run(team_exec);
-        } else if (threadIdx.y == 0) {
-          // TODO @tasking @optimization DSH Change this to warp_lane == 0 when
-          // we allow blockDim.x to be more than 1 Single Thread Task
-          shared_memory_task_copy->as_runnable_task().run(single_exec);
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to all threads in the warp.
-
-        __syncwarp(0xffffffff);
-
-        // if(warp_lane < b % CudaTraits::WarpSize) b += CudaTraits::WarpSize;
-        // b -= b % CudaTraits::WarpSize;
-
-        // copy task closure from shared to global memory:
-        for (int32_t i = b + warp_lane; i < e; i += CudaTraits::WarpSize) {
-          task_mem[i] = warp_shmem[i];
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to root thread of the warp for
-        // respawn or completion.
-
-        __syncwarp(0xffffffff);
-
-        if (warp_lane == 0) {
-          // If respawn requested copy respawn data back to main memory
-          if (shared_memory_task_copy->as_runnable_task().get_respawn_flag()) {
-            if (shared_memory_task_copy->as_runnable_task().has_predecessor()) {
-              // It's not necessary to make this a volatile write because
-              // the next read of the predecessor is on this thread in complete,
-              // and the predecessor is cleared there (using a volatile write)
-              current_task->as_runnable_task().acquire_predecessor_from(
-                  shared_memory_task_copy->as_runnable_task());
-            }
-
-            // It may not necessary to make this a volatile write, since the
-            // next read will be done by this thread in complete where the
-            // rescheduling occurs, but since the task could be stolen later
-            // before this is written again, we should do the volatile write
-            // here.  (It might not be necessary though because I don't know
-            // where else the priority would be read after it is scheduled
-            // by this thread; for now, we leave it volatile, but we should
-            // benchmark the cost of this.)
-            current_task.as_volatile()->set_priority(
-                shared_memory_task_copy->get_priority());
-
-            // It's not necessary to make this a volatile write, since the
-            // next read of it (if true) will be by this thread in `complete()`,
-            // which will unset the flag (using volatile) once it has handled
-            // the respawn
-            current_task->as_runnable_task().set_respawn_flag();
-          }
-
-          queue.complete((*std::move(current_task)).as_runnable_task(),
-                         team_scheduler.team_scheduler_info());
-        }
-      }
-    }
-  }
-
-  // FIXME_CUDA_MULTIPLE_DEVICES
-  static void execute(scheduler_type const& scheduler) {
-    const int shared_per_warp = 2048;
-    const Kokkos::Cuda& exec  = scheduler.get_execution_space();
-    const auto& impl_instance = exec.impl_internal_space_instance();
-    const int multi_processor_count =
-        exec.cuda_device_prop().multiProcessorCount;
-    const dim3 grid(multi_processor_count, 1, 1);
-    const dim3 block(1, Kokkos::Impl::CudaTraits::WarpSize, warps_per_block);
-    const int shared_total    = shared_per_warp * warps_per_block;
-    const cudaStream_t stream = nullptr;
-
-    KOKKOS_ASSERT(
-        static_cast(grid.x * grid.y * grid.z * block.x * block.y *
-                          block.z) ==
-        static_cast(get_max_team_count(scheduler.get_execution_space()) *
-                          Kokkos::Impl::CudaTraits::WarpSize));
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecialization::execute: Pre Task Execution");
-
-    // Query the stack size, in bytes:
-
-    size_t previous_stack_size = 0;
-    KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_get_limit_wrapper(
-        &previous_stack_size, cudaLimitStackSize));
-
-    // If not large enough then set the stack size, in bytes:
-
-    const size_t larger_stack_size = 1 << 11;
-
-    if (previous_stack_size < larger_stack_size) {
-      KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_set_limit_wrapper(
-          cudaLimitStackSize, larger_stack_size));
-    }
-
-    cuda_task_queue_execute<<>>(
-        scheduler, shared_per_warp);
-
-    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetLastError());
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecialization::execute: Post Task Execution");
-
-    if (previous_stack_size < larger_stack_size) {
-      KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_set_limit_wrapper(
-          cudaLimitStackSize, previous_stack_size));
-    }
-  }
-
-  template 
-  static
-      // TODO @tasking @optimiazation DSH specialize this for trivially
-      // destructible types
-      void
-      get_function_pointer(typename TaskType::function_type& ptr,
-                           typename TaskType::destroy_type& dtor) {
-    using function_type = typename TaskType::function_type;
-    using destroy_type  = typename TaskType::destroy_type;
-
-    // TODO @tasking @minor DSH make sure there aren't any alignment concerns?
-    void* storage = cuda_internal_scratch_unified(
-        Kokkos::Cuda(), sizeof(function_type) + sizeof(destroy_type));
-    function_type* ptr_ptr = (function_type*)storage;
-    destroy_type* dtor_ptr =
-        (destroy_type*)((char*)storage + sizeof(function_type));
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecialization::execute: Pre Get Function Pointer for Tasks");
-
-    set_cuda_task_base_apply_function_pointer
-        <<<1, 1>>>(ptr_ptr, dtor_ptr);
-
-    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetLastError());
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecialization::execute: Post Get Function Pointer for Tasks");
-
-    ptr  = *ptr_ptr;
-    dtor = *dtor_ptr;
-  }
-};
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-template 
-class TaskQueueSpecializationConstrained<
-    Scheduler, std::enable_if_t::value>> {
- public:
-  using scheduler_type  = Scheduler;
-  using execution_space = Kokkos::Cuda;
-  using memory_space    = Kokkos::CudaUVMSpace;
-  using member_type     = TaskExec;
-
-  enum : long { max_league_size = 16 };
-
-  KOKKOS_INLINE_FUNCTION
-  static void iff_single_thread_recursive_execute(scheduler_type const&) {}
-
-  __device__ static void driver(scheduler_type scheduler,
-                                int32_t shmem_per_warp) {
-    using queue_type     = typename scheduler_type::queue_type;
-    using task_root_type = TaskBase;
-
-    extern __shared__ int32_t shmem_all[];
-
-    task_root_type* const end = (task_root_type*)task_root_type::EndTag;
-    task_root_type* const no_more_tasks_sentinel = nullptr;
-
-    int32_t* const warp_shmem =
-        shmem_all + (threadIdx.z * shmem_per_warp) / sizeof(int32_t);
-
-    task_root_type* const task_shmem = (task_root_type*)warp_shmem;
-
-    const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x;
-
-    member_type single_exec(scheduler, warp_shmem, 1);
-    member_type team_exec(scheduler, warp_shmem, blockDim.y);
-
-    auto& team_queue = team_exec.scheduler().queue();
-
-    task_root_type* task_ptr = no_more_tasks_sentinel;
-
-    // Loop until all queues are empty and no tasks in flight
-
-    do {
-      // Each team lead attempts to acquire either a thread team task
-      // or collection of single thread tasks for the team.
-
-      if (0 == warp_lane) {
-        if (*((volatile int*)&team_queue.m_ready_count) > 0) {
-          task_ptr = end;
-          // Attempt to acquire a task
-          // Loop by priority and then type
-          for (int i = 0; i < queue_type::NumQueue && end == task_ptr; ++i) {
-            for (int j = 0; j < 2 && end == task_ptr; ++j) {
-              task_ptr = queue_type::pop_ready_task(&team_queue.m_ready[i][j]);
-            }
-          }
-        } else {
-          // returns nullptr if and only if all other queues have a ready
-          // count of 0 also. Otherwise, returns a task from another queue
-          // or `end` if one couldn't be popped
-          task_ptr = team_queue.attempt_to_steal_task();
-        }
-      }
-
-      // Synchronize warp with memory fence before broadcasting task pointer:
-
-      // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "A" );
-      __syncwarp(0xffffffff);
-
-      // Broadcast task pointer:
-
-      ((int*)&task_ptr)[0] =
-          __shfl_sync(0xffffffff, ((int*)&task_ptr)[0], 0, 32);
-      ((int*)&task_ptr)[1] =
-          __shfl_sync(0xffffffff, ((int*)&task_ptr)[1], 0, 32);
-
-#if defined(KOKKOS_ENABLE_DEBUG)
-      KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN("TaskQueue CUDA task_ptr");
-#endif
-
-      if (0 == task_ptr) break;  // 0 == queue->m_ready_count
-
-      if (end != task_ptr) {
-        // Whole warp copy task's closure to/from shared memory.
-        // Use all threads of warp for coalesced read/write.
-
-        int32_t const b = sizeof(task_root_type) / sizeof(int32_t);
-        int32_t const e =
-            *((int32_t volatile*)(&task_ptr->m_alloc_size)) / sizeof(int32_t);
-
-        int32_t volatile* const task_mem = (int32_t volatile*)task_ptr;
-
-        KOKKOS_ASSERT(e * sizeof(int32_t) < shmem_per_warp);
-
-        // copy task closure from global to shared memory:
-
-        for (int32_t i = warp_lane; i < e; i += CudaTraits::WarpSize) {
-          warp_shmem[i] = task_mem[i];
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to all threads in the warp.
-
-        // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "B" );
-        __syncwarp(0xffffffff);
-
-        if (task_root_type::TaskTeam == task_shmem->m_task_type) {
-          // Thread Team Task
-          (*task_shmem->m_apply)(task_shmem, &team_exec);
-        } else if (0 == threadIdx.y) {
-          // Single Thread Task
-          (*task_shmem->m_apply)(task_shmem, &single_exec);
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to all threads in the warp.
-
-        // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "C" );
-        __syncwarp(0xffffffff);
-
-        // copy task closure from shared to global memory:
-
-        for (int32_t i = b + warp_lane; i < e; i += CudaTraits::WarpSize) {
-          task_mem[i] = warp_shmem[i];
-        }
-
-        // Synchronize threads of the warp and insure memory
-        // writes are visible to root thread of the warp for
-        // respawn or completion.
-
-        // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "D" );
-        __syncwarp(0xffffffff);
-
-        // If respawn requested copy respawn data back to main memory
-
-        if (0 == warp_lane) {
-          if (((task_root_type*)task_root_type::LockTag) !=
-              task_shmem->m_next) {
-            ((volatile task_root_type*)task_ptr)->m_next = task_shmem->m_next;
-            ((volatile task_root_type*)task_ptr)->m_priority =
-                task_shmem->m_priority;
-          }
-
-          team_queue.complete(task_ptr);
-        }
-      }
-    } while (1);
-  }
-
-  // FIXME_CUDA_MULTIPLE_DEVICES
-  static void execute(scheduler_type const& scheduler) {
-    const int shared_per_warp = 2048;
-    const int warps_per_block = 4;
-    const Kokkos::Cuda exec   = Cuda();  // FIXME_CUDA_MULTIPLE_DEVICES
-    const auto& impl_instance = exec.impl_internal_space_instance();
-    const int multi_processor_count =
-        // FIXME not sure why this didn't work
-        // exec.cuda_device_prop().multiProcessorCount;
-        impl_instance->m_deviceProp.multiProcessorCount;
-    const dim3 grid(multi_processor_count, 1, 1);
-    // const dim3 grid( 1 , 1 , 1 );
-    const dim3 block(1, Kokkos::Impl::CudaTraits::WarpSize, warps_per_block);
-    const int shared_total    = shared_per_warp * warps_per_block;
-    const cudaStream_t stream = 0;
-
-    auto& queue = scheduler.queue();
-    queue.initialize_team_queues(warps_per_block * grid.x);
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecializationConstrained::execute: Pre Execute Task");
-
-    // Query the stack size, in bytes:
-
-    size_t previous_stack_size = 0;
-    KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_get_limit_wrapper(
-        &previous_stack_size, cudaLimitStackSize));
-
-    // If not large enough then set the stack size, in bytes:
-
-    const size_t larger_stack_size = 2048;
-
-    if (previous_stack_size < larger_stack_size) {
-      KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_set_limit_wrapper(
-          cudaLimitStackSize, larger_stack_size));
-    }
-
-    cuda_task_queue_execute<<>>(
-        scheduler, shared_per_warp);
-
-    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetLastError());
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecializationConstrained::execute: Post Execute Task");
-
-    if (previous_stack_size < larger_stack_size) {
-      KOKKOS_IMPL_CUDA_SAFE_CALL(impl_instance->cuda_device_set_limit_wrapper(
-          cudaLimitStackSize, previous_stack_size));
-    }
-  }
-
-  template 
-  static void get_function_pointer(typename TaskType::function_type& ptr,
-                                   typename TaskType::destroy_type& dtor) {
-    using function_type = typename TaskType::function_type;
-    using destroy_type  = typename TaskType::destroy_type;
-
-    void* storage = cuda_internal_scratch_unified(
-        Kokkos::Cuda(), sizeof(function_type) + sizeof(destroy_type));
-    function_type* ptr_ptr = (function_type*)storage;
-    destroy_type* dtor_ptr =
-        (destroy_type*)((char*)storage + sizeof(function_type));
-
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecializationConstrained::get_function_pointer: Pre Get Function Pointer");
-
-    set_cuda_task_base_apply_function_pointer
-        <<<1, 1>>>(ptr_ptr, dtor_ptr);
-
-    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetLastError());
-    Impl::cuda_device_synchronize(
-        "Kokkos::Impl::TaskQueueSpecializationConstrained::get_function_pointer: Post Get Function Pointer");
-
-    ptr  = *ptr_ptr;
-    dtor = *dtor_ptr;
-  }
-};
-
-extern template class TaskQueue<
-    Kokkos::Cuda,
-    default_tasking_memory_space_for_execution_space_t>;
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-/**\brief  Impl::TaskExec is the TaskScheduler::member_type
- *         passed to tasks running in a Cuda space.
- *
- *  Cuda thread blocks for tasking are dimensioned:
- *    blockDim.x == vector length
- *    blockDim.y == team size
- *    blockDim.z == number of teams
- *  where
- *    blockDim.x * blockDim.y == WarpSize
- *
- *  Current implementation requires blockDim.x == 1.
- *  Vector level parallelism with blockDim.y > 1 on Volta will
- *  require a vector-level synchronization mask for vector-level
- *  collective operaitons.
- *
- *  Both single thread and thread team tasks are run by a full Cuda warp.
- *  A single thread task is called by warp lane #0 and the remaining
- *  lanes of the warp are idle.
- *
- *  When executing a single thread task the syncwarp or other
- *  warp synchronizing functions must not be called.
- */
-template 
-class TaskExec {
- private:
-  enum : int { WarpSize = Kokkos::Impl::CudaTraits::WarpSize };
-
-  TaskExec(TaskExec&&)                 = delete;
-  TaskExec(TaskExec const&)            = delete;
-  TaskExec& operator=(TaskExec&&)      = delete;
-  TaskExec& operator=(TaskExec const&) = delete;
-
-  friend class Kokkos::Impl::TaskQueue<
-      Kokkos::Cuda,
-      default_tasking_memory_space_for_execution_space_t>;
-  template 
-  friend class Kokkos::Impl::TaskQueueSpecializationConstrained;
-  template 
-  friend struct Kokkos::Impl::TaskQueueSpecialization;
-
-  int32_t* m_team_shmem;
-  const int m_team_size;
-  Scheduler m_scheduler;
-
-  // If constructed with arg_team_size == 1 the object
-  // can only be used by 0 == threadIdx.y.
-  KOKKOS_INLINE_FUNCTION
-  TaskExec(Scheduler const& parent_scheduler, int32_t* arg_team_shmem,
-           int arg_team_size = blockDim.y)
-      : m_team_shmem(arg_team_shmem),
-        m_team_size(arg_team_size),
-        m_scheduler(parent_scheduler.get_team_scheduler(league_rank())) {}
-
- public:
-  using thread_team_member = TaskExec;
-
-#if defined(__CUDA_ARCH__)
-  __device__ int team_rank() const { return threadIdx.y; }
-  __device__ int team_size() const { return m_team_size; }
-  //__device__ int league_rank() const { return threadIdx.z; }
-  __device__ int league_rank() const {
-    return blockIdx.x * blockDim.z + threadIdx.z;
-  }
-  __device__ int league_size() const { return blockDim.z * gridDim.x; }
-
-  __device__ void team_barrier() const {
-    if (1 < m_team_size) {
-      __syncwarp(0xffffffff);
-    }
-  }
-
-  template 
-  __device__ void team_broadcast(ValueType& val, const int thread_id) const {
-    if (1 < m_team_size) {
-      // WarpSize = blockDim.X * blockDim.y
-      // thread_id < blockDim.y
-      ValueType tmp(val);  // input might not be register variable
-      Impl::in_place_shfl(val, tmp, blockDim.x * thread_id, WarpSize);
-    }
-  }
-
-#else
-  __host__ int team_rank() const { return 0; }
-  __host__ int team_size() const { return 0; }
-  __host__ int league_rank() const { return 0; }
-  __host__ int league_size() const { return 0; }
-  __host__ void team_barrier() const {}
-  template 
-  __host__ void team_broadcast(ValueType&, const int) const {}
-#endif
-
-  KOKKOS_INLINE_FUNCTION Scheduler const& scheduler() const noexcept {
-    return m_scheduler;
-  }
-  KOKKOS_INLINE_FUNCTION Scheduler& scheduler() noexcept { return m_scheduler; }
-};
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template 
-struct TeamThreadRangeBoundariesStruct> {
-  using index_type  = iType;
-  using member_type = TaskExec;
-
-  const iType start;
-  const iType end;
-  const iType increment;
-  member_type const& member;
-
-#if defined(__CUDA_ARCH__)
-
-  __device__ inline TeamThreadRangeBoundariesStruct(
-      member_type const& arg_thread, const iType& arg_count)
-      : start(threadIdx.y),
-        end(arg_count),
-        increment(blockDim.y),
-        member(arg_thread) {}
-
-  __device__ inline TeamThreadRangeBoundariesStruct(
-      member_type const& arg_thread, const iType& arg_start,
-      const iType& arg_end)
-      : start(arg_start + threadIdx.y),
-        end(arg_end),
-        increment(blockDim.y),
-        member(arg_thread) {}
-
-#else
-
-  TeamThreadRangeBoundariesStruct(member_type const& arg_thread,
-                                  const iType& arg_count);
-
-  TeamThreadRangeBoundariesStruct(member_type const& arg_thread,
-                                  const iType& arg_start, const iType& arg_end);
-
-#endif
-};
-
-//----------------------------------------------------------------------------
-
-template 
-struct ThreadVectorRangeBoundariesStruct> {
-  using index_type  = iType;
-  using member_type = TaskExec;
-
-  const index_type start;
-  const index_type end;
-  const index_type increment;
-  const member_type& member;
-
-#if defined(__CUDA_ARCH__)
-
-  __device__ inline ThreadVectorRangeBoundariesStruct(
-      member_type const& arg_thread, const index_type& arg_count)
-      : start(threadIdx.x),
-        end(arg_count),
-        increment(blockDim.x),
-        member(arg_thread) {}
-
-  __device__ inline ThreadVectorRangeBoundariesStruct(
-      member_type const& arg_thread, const index_type& arg_begin,
-      const index_type& arg_end)
-      : start(arg_begin + threadIdx.x),
-        end(arg_end),
-        increment(blockDim.x),
-        member(arg_thread) {}
-
-#else
-
-  ThreadVectorRangeBoundariesStruct(member_type const& arg_thread,
-                                    const index_type& arg_count);
-
-  ThreadVectorRangeBoundariesStruct(member_type const& arg_thread,
-                                    const index_type& arg_begin,
-                                    const index_type& arg_end);
-
-#endif
-};
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-// template
-// KOKKOS_INLINE_FUNCTION
-// Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda >
-// > TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType
-// & count )
-//{
-//  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec<
-//  Kokkos::Cuda > >( thread, count );
-//}
-//
-// template
-// KOKKOS_INLINE_FUNCTION
-// Impl::TeamThreadRangeBoundariesStruct
-//  < std::common_type_t
-//  , Impl::TaskExec< Kokkos::Cuda > >
-// TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread
-//               , const iType1 & begin, const iType2 & end )
-//{
-//  using iType = std::common_type_t< iType1, iType2 >;
-//  return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec<
-//  Kokkos::Cuda > >(
-//           thread, iType(begin), iType(end) );
-//}
-//
-// template
-// KOKKOS_INLINE_FUNCTION
-// Impl::ThreadVectorRangeBoundariesStruct
-// > ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread
-//                 , const iType & count )
-//{
-//  return Impl::ThreadVectorRangeBoundariesStruct >(thread,count);
-//}
-//
-// template
-// KOKKOS_INLINE_FUNCTION
-// Impl::ThreadVectorRangeBoundariesStruct
-// > ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread
-//                 , const iType & arg_begin
-//                 , const iType & arg_end )
-//{
-//  return Impl::ThreadVectorRangeBoundariesStruct >(thread,arg_begin,arg_end);
-//}
-
-// KOKKOS_INLINE_FUNCTION
-// Impl::ThreadSingleStruct >
-// PerTeam(const Impl::TaskExec< Kokkos::Cuda >& thread)
-// {
-//   return Impl::ThreadSingleStruct >(thread);
-// }
-
-// KOKKOS_INLINE_FUNCTION
-// Impl::VectorSingleStruct >
-// PerThread(const Impl::TaskExec< Kokkos::Cuda >& thread)
-// {
-//   return Impl::VectorSingleStruct >(thread);
-// }
-
-/** \brief  Inter-thread parallel_for. Executes lambda(iType i) for each
- * i=0..N-1.
- *
- * The range i=0..N-1 is mapped to all threads of the the calling thread team.
- */
-template 
-KOKKOS_INLINE_FUNCTION void parallel_for(
-    const Impl::TeamThreadRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda) {
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i);
-  }
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void parallel_for(
-    const Impl::ThreadVectorRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda) {
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i);
-  }
-}
-
-// reduce across corresponding lanes between team members within warp
-// assume stride*team_size == warp_size
-template 
-KOKKOS_INLINE_FUNCTION void strided_shfl_warp_reduction(const JoinType& join,
-                                                        ValueType& val,
-                                                        int team_size,
-                                                        int stride) {
-  for (int lane_delta = (team_size * stride) >> 1; lane_delta >= stride;
-       lane_delta >>= 1) {
-    join(val, Kokkos::shfl_down(val, lane_delta, team_size * stride));
-  }
-}
-
-// multiple within-warp non-strided reductions
-template 
-KOKKOS_INLINE_FUNCTION void multi_shfl_warp_reduction(const JoinType& join,
-                                                      ValueType& val,
-                                                      int vec_length) {
-  for (int lane_delta = vec_length >> 1; lane_delta; lane_delta >>= 1) {
-    join(val, Kokkos::shfl_down(val, lane_delta, vec_length));
-  }
-}
-
-// broadcast within warp
-template 
-KOKKOS_INLINE_FUNCTION ValueType shfl_warp_broadcast(ValueType& val,
-                                                     int src_lane, int width) {
-  if (1 < width) {
-    return Kokkos::shfl(val, src_lane, width);
-  } else {
-    return val;
-  }
-}
-
-/*// all-reduce across corresponding vector lanes between team members within
-warp
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-template< typename iType, class Lambda, typename ValueType, class JoinType >
-KOKKOS_INLINE_FUNCTION
-void parallel_reduce
-  (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const JoinType& join,
-   ValueType& initialized_result) {
-
-  ValueType result = initialized_result;
-  for( iType i = loop_boundaries.start; i < loop_boundaries.end;
-i+=loop_boundaries.increment) { lambda(i,result);
-  }
-  initialized_result = result;
-
-  strided_shfl_warp_reduction(
-                          join,
-                          initialized_result,
-                          loop_boundaries.member.team_size(),
-                          blockDim.x);
-  initialized_result = shfl_warp_broadcast( initialized_result,
-threadIdx.x, Impl::CudaTraits::WarpSize );
-}*/
-
-// all-reduce across corresponding vector lanes between team members within warp
-// if no join() provided, use sum
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-template 
-KOKKOS_INLINE_FUNCTION void parallel_reduce(
-    const Impl::TeamThreadRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda, ValueType& initialized_result) {
-  // TODO @internal_documentation what is the point of creating this temporary?
-  ValueType result = initialized_result;
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i, result);
-  }
-  initialized_result = result;
-
-  if (1 < loop_boundaries.member.team_size()) {
-    strided_shfl_warp_reduction(
-        [&](ValueType& val1, const ValueType& val2) { val1 += val2; },
-        initialized_result, loop_boundaries.member.team_size(), blockDim.x);
-
-    initialized_result = shfl_warp_broadcast(
-        initialized_result, threadIdx.x, Impl::CudaTraits::WarpSize);
-  }
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void parallel_reduce(
-    const Impl::TeamThreadRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda, const ReducerType& reducer) {
-  using ValueType = typename ReducerType::value_type;
-  // TODO @internal_documentation what is the point of creating this temporary?
-  ValueType result = ValueType();
-  reducer.init(result);
-
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i, result);
-  }
-
-  if (1 < loop_boundaries.member.team_size()) {
-    strided_shfl_warp_reduction(
-        [&](ValueType& val1, const ValueType& val2) {
-          reducer.join(val1, val2);
-        },
-        result, loop_boundaries.member.team_size(), blockDim.x);
-
-    reducer.reference() = shfl_warp_broadcast(
-        result, threadIdx.x, Impl::CudaTraits::WarpSize);
-  } else {
-    reducer.reference() = result;
-  }
-}
-// all-reduce within team members within warp
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-/*template< typename iType, class Lambda, typename ValueType, class JoinType >
-KOKKOS_INLINE_FUNCTION
-void parallel_reduce
-  (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const JoinType& join,
-   ValueType& initialized_result) {
-
-  ValueType result = initialized_result;
-  for( iType i = loop_boundaries.start; i < loop_boundaries.end;
-i+=loop_boundaries.increment) { lambda(i,result);
-  }
-  initialized_result = result;
-
-  multi_shfl_warp_reduction(join, initialized_result,
-blockDim.x); initialized_result = shfl_warp_broadcast(
-initialized_result, 0, blockDim.x );
-}*/
-
-// all-reduce within team members within warp
-// if no join() provided, use sum
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-template 
-KOKKOS_INLINE_FUNCTION void parallel_reduce(
-    const Impl::ThreadVectorRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda, ValueType& initialized_result) {
-  ValueType result = initialized_result;
-
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i, result);
-  }
-
-  initialized_result = result;
-
-  if (1 < loop_boundaries.member.team_size()) {
-    // initialized_result = multi_shfl_warp_reduction(
-    multi_shfl_warp_reduction(
-        [&](ValueType& val1, const ValueType& val2) { val1 += val2; },
-        initialized_result, blockDim.x);
-
-    initialized_result =
-        shfl_warp_broadcast(initialized_result, 0, blockDim.x);
-  }
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void parallel_reduce(
-    const Impl::ThreadVectorRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Lambda& lambda, const ReducerType& reducer) {
-  using ValueType = typename ReducerType::value_type;
-
-  ValueType result = ValueType();
-  reducer.init(result);
-
-  for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-       i += loop_boundaries.increment) {
-    lambda(i, result);
-  }
-
-  if (1 < loop_boundaries.member.team_size()) {
-    multi_shfl_warp_reduction(
-        [&](ValueType& val1, const ValueType& val2) {
-          reducer.join(val1, val2);
-        },
-        result, blockDim.x);
-
-    reducer.reference() = shfl_warp_broadcast(result, 0, blockDim.x);
-  } else {
-    reducer.reference() = result;
-  }
-}
-// scan across corresponding vector lanes between team members within warp
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-template 
-KOKKOS_INLINE_FUNCTION void parallel_scan(
-    const Impl::TeamThreadRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Closure& closure) {
-  // Extract value_type from closure
-
-  using value_type = typename Kokkos::Impl::FunctorAnalysis<
-      Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure,
-      void>::value_type;
-
-  if (1 < loop_boundaries.member.team_size()) {
-    // make sure all threads perform all loop iterations
-    const iType bound = loop_boundaries.end + loop_boundaries.start;
-    const int lane    = threadIdx.y * blockDim.x;
-
-    value_type accum = 0;
-    value_type val, y, local_total;
-
-    for (iType i = loop_boundaries.start; i < bound;
-         i += loop_boundaries.increment) {
-      val = 0;
-      if (i < loop_boundaries.end) closure(i, val, false);
-
-      // intra-blockDim.y exclusive scan on 'val'
-      // accum = accumulated, sum in total for this iteration
-
-      // INCLUSIVE scan
-      for (int offset = blockDim.x;
-           offset < static_cast(Impl::CudaTraits::WarpSize);
-           offset <<= 1) {
-        y = Kokkos::shfl_up(val, offset, Impl::CudaTraits::WarpSize);
-        if (lane >= offset) {
-          val += y;
-        }
-      }
-
-      // pass accum to all threads
-      local_total = shfl_warp_broadcast(
-          val, threadIdx.x + Impl::CudaTraits::WarpSize - blockDim.x,
-          Impl::CudaTraits::WarpSize);
-
-      // make EXCLUSIVE scan by shifting values over one
-      val = Kokkos::shfl_up(val, blockDim.x, Impl::CudaTraits::WarpSize);
-      if (threadIdx.y == 0) {
-        val = 0;
-      }
-
-      val += accum;
-      if (i < loop_boundaries.end) closure(i, val, true);
-      accum += local_total;
-    }
-  } else {
-    value_type accum = 0;
-    for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-         i += loop_boundaries.increment) {
-      closure(i, accum, true);
-    }
-  }
-}
-
-// scan within team member (vector) within warp
-// assume vec_length*team_size == warp_size
-// blockDim.x == vec_length == stride
-// blockDim.y == team_size
-// threadIdx.x == position in vec
-// threadIdx.y == member number
-template 
-KOKKOS_INLINE_FUNCTION void parallel_scan(
-    const Impl::ThreadVectorRangeBoundariesStruct<
-        iType, Impl::TaskExec>& loop_boundaries,
-    const Closure& closure) {
-  // Extract value_type from closure
-
-  using value_type = typename Kokkos::Impl::FunctorAnalysis<
-      Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure,
-      void>::value_type;
-
-  if (1 < loop_boundaries.member.team_size()) {
-    // make sure all threads perform all loop iterations
-    const iType bound = loop_boundaries.end + loop_boundaries.start;
-
-    value_type accum = 0;
-    value_type val, y, local_total;
-
-    for (iType i = loop_boundaries.start; i < bound;
-         i += loop_boundaries.increment) {
-      val = 0;
-      if (i < loop_boundaries.end) closure(i, val, false);
-
-      // intra-blockDim.x exclusive scan on 'val'
-      // accum = accumulated, sum in total for this iteration
-
-      // INCLUSIVE scan
-      for (int offset = 1; offset < static_cast(blockDim.x);
-           offset <<= 1) {
-        y = Kokkos::shfl_up(val, offset, blockDim.x);
-        if (static_cast(threadIdx.x) >= offset) {
-          val += y;
-        }
-      }
-
-      // pass accum to all threads
-      local_total =
-          shfl_warp_broadcast(val, blockDim.x - 1, blockDim.x);
-
-      // make EXCLUSIVE scan by shifting values over one
-      val = Kokkos::shfl_up(val, 1, blockDim.x);
-      if (threadIdx.x == 0) {
-        val = 0;
-      }
-
-      val += accum;
-      if (i < loop_boundaries.end) closure(i, val, true);
-      accum += local_total;
-    }
-  } else {
-    value_type accum = 0;
-    for (iType i = loop_boundaries.start; i < loop_boundaries.end;
-         i += loop_boundaries.increment) {
-      closure(i, accum, true);
-    }
-  }
-}
-
-} /* namespace Kokkos */
-
-namespace Kokkos {
-
-template 
-KOKKOS_INLINE_FUNCTION void single(
-    const Impl::VectorSingleStruct>&,
-    const FunctorType& lambda) {
-#ifdef __CUDA_ARCH__
-  if (threadIdx.x == 0) lambda();
-#else
-  (void)lambda;
-#endif
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void single(
-    const Impl::ThreadSingleStruct>&,
-    const FunctorType& lambda) {
-#ifdef __CUDA_ARCH__
-  if (threadIdx.x == 0 && threadIdx.y == 0) lambda();
-#else
-  (void)lambda;
-#endif
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void single(
-    const Impl::VectorSingleStruct>& s,
-    const FunctorType& lambda, ValueType& val) {
-#ifdef __CUDA_ARCH__
-  if (threadIdx.x == 0) lambda(val);
-  if (1 < s.team_member.team_size()) {
-    val = shfl(val, 0, blockDim.x);
-  }
-#else
-  (void)s;
-  (void)val;
-  (void)lambda;
-#endif
-}
-
-template 
-KOKKOS_INLINE_FUNCTION void single(
-    const Impl::ThreadSingleStruct>&
-        single_struct,
-    const FunctorType& lambda, ValueType& val) {
-#ifdef __CUDA_ARCH__
-  if (threadIdx.x == 0 && threadIdx.y == 0) {
-    lambda(val);
-  }
-  single_struct.team_member.team_broadcast(val, 0);
-#else
-  (void)single_struct;
-  (void)val;
-  (void)lambda;
-#endif
-}
-
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#undef KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN
-
-// NOLINTEND
-
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
-#endif /* #ifndef KOKKOS_IMPL_CUDA_TASK_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
index eb893ee4d2b..fd86216105c 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_TEAM_HPP
 #define KOKKOS_CUDA_TEAM_HPP
@@ -51,7 +38,7 @@ struct CudaJoinFunctor {
   }
 };
 
-/**\brief  Team member_type passed to TeamPolicy or TeamTask closures.
+/**\brief  Team member_type passed to the TeamPolicy closure.
  *
  *  Cuda thread blocks for team closures are dimensioned as:
  *    blockDim.x == number of "vector lanes" per "thread"
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
index 40c7de1e20d..a7e13528574 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_UNIQUE_TOKEN_HPP
 #define KOKKOS_CUDA_UNIQUE_TOKEN_HPP
@@ -80,8 +67,7 @@ class UniqueToken {
     int idx = blockIdx.x * (blockDim.x * blockDim.y) +
               threadIdx.y * blockDim.x + threadIdx.x;
     idx = idx % size();
-#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_PASCAL) || \
-    defined(KOKKOS_ARCH_MAXWELL)
+#if defined(KOKKOS_ARCH_MAXWELL) || defined(KOKKOS_ARCH_PASCAL)
     unsigned int mask        = __activemask();
     unsigned int active      = __ballot_sync(mask, 1);
     unsigned int done_active = 0;
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
index 9e0c5819f71..49a8fbf9539 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 #ifndef KOKKOS_CUDA_VECTORIZATION_HPP
 #define KOKKOS_CUDA_VECTORIZATION_HPP
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 2141774503b..df41ad6be52 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
 #define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
@@ -32,14 +19,10 @@ struct CudaLDGFetch {
 
   template 
   KOKKOS_FUNCTION ValueType operator[](const iType& i) const {
-#if defined(KOKKOS_ARCH_KEPLER30) || defined(KOKKOS_ARCH_KEPLER32)
-    return m_ptr[i];
-#else
     KOKKOS_IF_ON_DEVICE(
         (AliasType v = __ldg(reinterpret_cast(&m_ptr[i]));
          return *(reinterpret_cast(&v));))
     KOKKOS_IF_ON_HOST((return m_ptr[i];))
-#endif
   }
 
   KOKKOS_FUNCTION
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp
index 74a1395e909..296f0e697d8 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_WORKGRAPHPOLICY_HPP
 #define KOKKOS_CUDA_WORKGRAPHPOLICY_HPP
@@ -67,8 +54,7 @@ class ParallelFor,
         }
 // On pre-volta architectures we need a __syncwarp here to prevent
 // infinite loops depending on the scheduling order above
-#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
-    defined(KOKKOS_ARCH_PASCAL)
+#if defined(KOKKOS_ARCH_MAXWELL) || defined(KOKKOS_ARCH_PASCAL)
         __syncwarp(__activemask());
 #endif
       }
@@ -84,7 +70,8 @@ class ParallelFor,
     const int shared = 0;
 
     Kokkos::Impl::CudaParallelLaunch(
-        *this, grid, block, shared, Cuda().impl_internal_space_instance());
+        *this, grid, block, shared,
+        m_policy.space().impl_internal_space_instance());
   }
 
   inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy)
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp
index 0ac2d4052d2..e22c8c199d0 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ZeroMemset.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 #ifndef KOKKOS_CUDA_ZEROMEMSET_HPP
 #define KOKKOS_CUDA_ZEROMEMSET_HPP
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
index 79faad01bf5..1f4332520b9 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CUDA_ABORT_HPP
 #define KOKKOS_CUDA_ABORT_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp
index b755e38316b..0485599bc65 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP.cpp
@@ -1,24 +1,16 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include 
 #include 
@@ -40,10 +32,6 @@ int HIP::concurrency() const {
   return Impl::HIPInternal::concurrency();
 }
 
-int HIP::impl_is_initialized() {
-  return Impl::HIPInternal::singleton().is_initialized();
-}
-
 void HIP::impl_initialize(InitializationSettings const& settings) {
   const std::vector& visible_devices = Impl::get_visible_devices();
   const int hip_device_id =
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp
index 439075fc6cc..80ce8625984 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_HPP
 #define KOKKOS_HIP_HPP
@@ -105,8 +92,6 @@ class HIP {
 
   static void impl_initialize(InitializationSettings const&);
 
-  static int impl_is_initialized();
-
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
   KOKKOS_DEPRECATED static size_type detect_device_count() {
     int count;
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
index fdf688fdad8..a3c109b2ba5 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_ABORT_HPP
 #define KOKKOS_HIP_ABORT_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
index 710fbeab9d9..96e0e831204 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp
@@ -1,24 +1,12 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_BLOCKSIZE_DEDUCTION_HPP
 #define KOKKOS_HIP_BLOCKSIZE_DEDUCTION_HPP
 
 #include 
 #include 
+#include 
 
 #if defined(__HIPCC__)
 
@@ -159,6 +147,39 @@ unsigned hip_get_preferred_blocksize(const int hip_device) {
   return get_preferred_blocksize_impl(hip_device);
 }
 
+// Heuristic to compute the block size for non-team parallelism
+template ,
+          HIPLaunchMechanism LaunchMechanism =
+              DeduceHIPLaunchMechanism::launch_mechanism>
+unsigned get_preferred_blocksize_for_range(HIPInternal const *hip_instance,
+                                           size_t requested_parallelism) {
+  /* General approach, if the user did not make a launch bounds request
+  - If the requested parallelism is less than the available concurrency, get the
+  largest block size that would result in at least 1 block per PE, while also:
+    - at least 256
+    - power of 2
+    - no more than 1024
+  */
+
+  if constexpr (HIPParallelLaunch::default_launchbounds()) {
+    if (requested_parallelism &&
+        requested_parallelism < size_t(hip_instance->concurrency())) {
+      const unsigned eus = hip_instance->m_deviceProp.multiProcessorCount;
+      const unsigned requestedPerEU = (requested_parallelism + eus - 1) / eus;
+      // round up to power of 2
+      unsigned threadsPerEU = Kokkos::bit_ceil(requestedPerEU);
+      threadsPerEU          = std::max(threadsPerEU,
+                                       unsigned(HIPTraits::ConservativeThreadsPerBlock));
+      threadsPerEU =
+          std::min(threadsPerEU, unsigned(HIPTraits::MaxThreadsPerBlock));
+      return threadsPerEU;
+    }
+  }
+  const int hip_device = hip_instance->m_hipDev;
+  return get_preferred_blocksize_impl(hip_device);
+}
+
 // Standardized blocksize deduction for parallel constructs with no LDS usage
 // Returns the max blocksize as dictated by register usage
 //
@@ -176,6 +197,9 @@ unsigned hip_get_max_blocksize() {
 // The ShmemFunctor takes a single argument of the current blocksize under
 // consideration, and returns the LDS usage
 //
+// requested_parallelism is a hint about how much parallelism was requested
+// in the parallel construct
+//
 // Note: a returned block_size of zero indicates that the algorithm could not
 //       find a valid block size.  The caller is responsible for error handling.
 template 
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.cpp
index fc44a61ddb8..8a38c37f0a0 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.hpp
index c838c07a1a2..c8ff7696c67 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_DeepCopy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_DEEP_COPY_HPP
 #define KOKKOS_HIP_DEEP_COPY_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.cpp
new file mode 100644
index 00000000000..ce544bcb288
--- /dev/null
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.cpp
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
+
+#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
+#define KOKKOS_IMPL_PUBLIC_INCLUDE
+#endif
+
+#include 
+
+#include 
+
+namespace Kokkos {
+namespace Impl {
+void hip_internal_error_throw(hipError_t e, const char *name, const char *file,
+                              const int line) {
+  std::ostringstream out;
+  out << name << " error( " << hipGetErrorName(e)
+      << "): " << hipGetErrorString(e);
+  if (file) {
+    out << " " << file << ":" << line;
+  }
+  throw_runtime_exception(out.str());
+}
+
+void hip_internal_error_abort(hipError_t e, const char *name, const char *file,
+                              const int line) {
+  std::ostringstream out;
+  out << name << " error( " << hipGetErrorName(e)
+      << "): " << hipGetErrorString(e);
+  if (file) {
+    out << " " << file << ":" << line;
+  }
+  host_abort(out.str().c_str());
+}
+}  // namespace Impl
+}  // namespace Kokkos
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp
index fa45dcfec31..43fcfabc5e7 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_ERROR_HPP
 #define KOKKOS_HIP_ERROR_HPP
@@ -28,11 +15,48 @@ namespace Impl {
 void hip_internal_error_throw(hipError_t e, const char* name,
                               const char* file = nullptr, const int line = 0);
 
+void hip_internal_error_abort(hipError_t e, const char* name,
+                              const char* file = nullptr, const int line = 0);
+
 inline void hip_internal_safe_call(hipError_t e, const char* name,
                                    const char* file = nullptr,
                                    const int line   = 0) {
-  if (hipSuccess != e) {
-    hip_internal_error_throw(e, name, file, line);
+  // 1. Success -> normal continuation.
+  // 2. Error codes for which, to continue using HIP, the process must be
+  //    terminated and relaunched -> call abort on the host-side.
+  // 3. Any other error code -> throw a runtime error.
+  switch (e) {
+    case hipSuccess: break;
+    case hipErrorInvalidValue:
+    case hipErrorOutOfMemory:
+    case hipErrorInitializationError:
+    case hipErrorDeinitialized:
+    case hipErrorInvalidConfiguration:
+    case hipErrorInvalidSymbol:
+    case hipErrorInvalidDevicePointer:
+    case hipErrorInvalidMemcpyDirection:
+    case hipErrorInsufficientDriver:
+    case hipErrorMissingConfiguration:
+    case hipErrorPriorLaunchFailure:
+    case hipErrorInvalidDeviceFunction:
+    case hipErrorNoDevice:
+    case hipErrorInvalidDevice:
+    case hipErrorInvalidContext:
+    case hipErrorNoBinaryForGpu:
+    case hipErrorInvalidSource:
+    case hipErrorIllegalState:
+    case hipErrorNotFound:
+    case hipErrorIllegalAddress:
+    case hipErrorLaunchOutOfResources:
+    case hipErrorLaunchTimeOut:
+    case hipErrorAssert:
+    case hipErrorLaunchFailure:
+    case hipErrorNotSupported:
+    case hipErrorStreamCaptureUnsupported:
+    case hipErrorCapturedEvent:
+    case hipErrorGraphExecUpdateFailure:
+    case hipErrorUnknown: hip_internal_error_abort(e, name, file, line); break;
+    default: hip_internal_error_throw(e, name, file, line);
   }
 }
 
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNodeKernel.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNodeKernel.hpp
index a228af4470c..b8b22651d0c 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNodeKernel.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNodeKernel.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_GRAPHNODEKERNEL_HPP
 #define KOKKOS_HIP_GRAPHNODEKERNEL_HPP
@@ -111,8 +98,7 @@ class GraphNodeKernelImpl
 
   hipGraph_t const* get_hip_graph_ptr() const { return m_graph_ptr; }
 
-  Kokkos::ObservingRawPtr allocate_driver_memory_buffer(
-      const HIP& exec) const {
+  base_t* allocate_driver_memory_buffer(const HIP& exec) const {
     KOKKOS_EXPECTS(m_driver_storage == nullptr);
     std::string alloc_label =
         label + " - GraphNodeKernel global memory functor storage";
@@ -130,9 +116,9 @@ class GraphNodeKernelImpl
   auto get_driver_storage() const { return m_driver_storage; }
 
  private:
-  Kokkos::ObservingRawPtr m_graph_ptr    = nullptr;
-  Kokkos::ObservingRawPtr m_graph_node_ptr = nullptr;
-  mutable std::shared_ptr m_driver_storage         = nullptr;
+  hipGraph_t const* m_graph_ptr                    = nullptr;
+  hipGraphNode_t* m_graph_node_ptr                 = nullptr;
+  mutable std::shared_ptr m_driver_storage = nullptr;
   std::string label;
 };
 
@@ -142,13 +128,13 @@ template ::type>
 struct get_graph_node_kernel_type
-    : type_identity<
+    : std::type_identity<
           GraphNodeKernelImpl> {};
 
 template 
 struct get_graph_node_kernel_type
-    : type_identity,
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNode_Impl.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNode_Impl.hpp
index 819ee12f396..d1136f0ee7f 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNode_Impl.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_GraphNode_Impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_GRAPHNODE_IMPL_HPP
 #define KOKKOS_HIP_GRAPHNODE_IMPL_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
index e1ae48b4c80..27e347f71b7 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_GRAPH_IMPL_HPP
 #define KOKKOS_HIP_GRAPH_IMPL_HPP
@@ -214,8 +201,6 @@ inline void GraphImpl::add_predecessor(
 }
 
 inline void GraphImpl::submit(const Kokkos::HIP& exec) {
-  desul::ensure_hip_lock_arrays_on_device();
-
   if (!m_graph_exec) {
     instantiate();
   }
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp
index 26480d354a3..5a236d52ec9 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_HALF_HPP_
 #define KOKKOS_HIP_HALF_HPP_
@@ -20,10 +7,8 @@
 #ifdef KOKKOS_IMPL_HALF_TYPE_DEFINED
 
 #include 
-#include 
 
-namespace Kokkos {
-namespace Experimental {
+namespace Kokkos::Experimental {
 
 /************************** half conversions **********************************/
 KOKKOS_INLINE_FUNCTION
@@ -193,26 +178,7 @@ KOKKOS_INLINE_FUNCTION std::enable_if_t, T>
 cast_from_half(half_t val) {
   return static_cast(cast_from_half(val));
 }
-}  // namespace Experimental
-
-// use float as the return type for sum and prod since hip_fp16.h
-// has no constexpr functions for casting to __half
-template <>
-struct reduction_identity {
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() noexcept {
-    return 0.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() noexcept {
-    return 1.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() noexcept {
-    return -65504.0F;
-  }
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() noexcept {
-    return 65504.0F;
-  }
-};
-
-}  // namespace Kokkos
+}  // namespace Kokkos::Experimental
+
 #endif
 #endif
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp
index 64c7706f945..319f99872ca 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_HALF_IMPL_TYPE_HPP_
 #define KOKKOS_HIP_HALF_IMPL_TYPE_HPP_
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
index 9f39d3fd466..f1c2119a2e0 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*--------------------------------------------------------------------------*/
 /* Kokkos interfaces */
@@ -21,7 +8,12 @@
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #include 
 #include 
@@ -29,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 
 /*--------------------------------------------------------------------------*/
 /* Standard 'C' libraries */
@@ -37,7 +28,6 @@
 
 /* Standard 'C++' libraries */
 #include 
-#include 
 #include 
 #include 
 
@@ -87,7 +77,7 @@ int HIPInternal::concurrency() {
 void HIPInternal::print_configuration(std::ostream &s) const {
   s << "macro  KOKKOS_ENABLE_HIP : defined" << '\n';
 #if defined(HIP_VERSION)
-  s << "macro  HIP_VERSION = " << HIP_VERSION << " = version "
+  s << "macro  HIP_VERSION : " << HIP_VERSION << " = version "
     << HIP_VERSION_MAJOR << '.' << HIP_VERSION_MINOR << '.' << HIP_VERSION_PATCH
     << '\n';
 #endif
@@ -113,7 +103,10 @@ void HIPInternal::print_configuration(std::ostream &s) const {
 
     s << "Kokkos::HIP[ " << i << " ] "
       << "gcnArch " << hipProp.gcnArchName;
-    if (m_hipDev == i) s << " : Selected";
+    if (m_hipDev == i)
+      s << " : Selected";
+    else
+      s << " : Not Selected";
     s << '\n'
       << "  Total Global Memory: "
       << ::Kokkos::Impl::human_memory_size(hipProp.totalGlobalMem) << '\n'
@@ -186,17 +179,7 @@ void HIPInternal::initialize(hipStream_t stream) {
   if (was_finalized)
     Kokkos::abort("Calling HIP::initialize after HIP::finalize is illegal\n");
 
-    // Get the device ID. If this is ROCm 5.6 or later, we can query this from
-    // the provided stream and potentially use multiple GPU devices. For
-    // ROCm 5.5 or earlier, we must use the singleton device id and there are no
-    // checks possible for the device id matching the device the stream was
-    // created on.
-#if (HIP_VERSION_MAJOR > 5 || \
-     (HIP_VERSION_MAJOR == 5 && HIP_VERSION_MINOR >= 6))
   KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamGetDevice(stream, &m_hipDev));
-#else
-  m_hipDev = singleton().m_hipDev;
-#endif
   KOKKOS_IMPL_HIP_SAFE_CALL(hipSetDevice(m_hipDev));
   hip_devices.insert(m_hipDev);
 
@@ -432,10 +415,6 @@ std::map HIPInternal::constantMemReusable = {};
 
 //----------------------------------------------------------------------------
 
-Kokkos::HIP::size_type hip_internal_multiprocessor_count() {
-  return HIPInternal::singleton().m_deviceProp.multiProcessorCount;
-}
-
 Kokkos::HIP::size_type *hip_internal_scratch_space(const HIP &instance,
                                                    const std::size_t size) {
   return instance.impl_internal_space_instance()->scratch_space(size);
@@ -450,20 +429,3 @@ Kokkos::HIP::size_type *hip_internal_scratch_flags(const HIP &instance,
 }  // namespace Kokkos
 
 //----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-void hip_internal_error_throw(hipError_t e, const char *name, const char *file,
-                              const int line) {
-  std::ostringstream out;
-  out << name << " error( " << hipGetErrorName(e)
-      << "): " << hipGetErrorString(e);
-  if (file) {
-    out << " " << file << ":" << line;
-  }
-  throw_runtime_exception(out.str());
-}
-}  // namespace Impl
-}  // namespace Kokkos
-
-//----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
index 181a8839304..db321021127 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*--------------------------------------------------------------------------*/
 
@@ -40,7 +27,7 @@ struct HIPTraits {
   static constexpr int WarpIndexMask  = 0x003f; /* hexadecimal for 63 */
   static constexpr int WarpIndexShift = 6;      /* WarpSize == 1 << WarpShift*/
 #elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
-    defined(KOKKOS_ARCH_AMD_GFX1103)
+    defined(KOKKOS_ARCH_AMD_GFX1103) || defined(KOKKOS_ARCH_AMD_GFX1201)
   static constexpr int WarpSize       = 32;
   static constexpr int WarpIndexMask  = 0x001f; /* hexadecimal for 31 */
   static constexpr int WarpIndexShift = 5;      /* WarpSize == 1 << WarpShift*/
@@ -56,8 +43,6 @@ struct HIPTraits {
 
 //----------------------------------------------------------------------------
 
-HIP::size_type hip_internal_multiprocessor_count();
-
 HIP::size_type *hip_internal_scratch_space(const HIP &instance,
                                            const std::size_t size);
 HIP::size_type *hip_internal_scratch_flags(const HIP &instance,
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.cpp
index 32cd0f80ad9..90da82d356e 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*--------------------------------------------------------------------------*/
 
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.hpp
index 32a26a46df1..4f04a80e7d7 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /*--------------------------------------------------------------------------*/
 
@@ -54,7 +41,8 @@ constexpr bool gpu_arch_can_access_system_allocations() {
     defined(KOKKOS_ARCH_AMD_GFX942) || defined(KOKKOS_ARCH_AMD_GFX942_APU)
   return true;
 #elif defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX1103) || \
-    defined(KOKKOS_ARCH_AMD_GFX1100) || defined(KOKKOS_ARCH_AMD_GFX1030)
+    defined(KOKKOS_ARCH_AMD_GFX1100) || defined(KOKKOS_ARCH_AMD_GFX1030) ||  \
+    defined(KOKKOS_ARCH_AMD_GFX1201)
   return false;
 #endif
 }
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
index 97996ae6d54..92c8b6fbf4e 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_KERNEL_LAUNCH_HPP
 #define KOKKOS_HIP_KERNEL_LAUNCH_HPP
@@ -22,13 +9,10 @@
 #if defined(__HIPCC__)
 
 #include 
+#include 
 #include 
 #include 
-
-#ifdef KOKKOS_IMPL_HIP_NATIVE_GRAPH
-#include 
 #include 
-#endif
 
 // Must use global variable on the device with HIP-Clang
 #ifdef __HIP__
@@ -383,7 +367,6 @@ struct HIPParallelLaunchKernelInvokerm_deviceProp.sharedMemPerBlock < shmem) {
         Kokkos::Impl::throw_runtime_exception(
             "HIPParallelLaunch FAILED: shared memory request is too large");
       }
 
-      desul::ensure_hip_lock_arrays_on_device();
-
       // Invoke the driver function on the device
       base_t::invoke_kernel(driver, grid, block, shmem, hip_instance);
 
@@ -613,16 +591,17 @@ void hip_parallel_launch(const DriverType &driver, const dim3 &grid,
                          const dim3 &block, const int shmem,
                          const HIPInternal *hip_instance,
                          const bool prefer_shmem) {
-#ifdef KOKKOS_IMPL_HIP_NATIVE_GRAPH
+  if (!is_empty_launch(grid, block)) {
+    desul::ensure_hip_lock_arrays_on_device();
+  }
+
   if constexpr (DoGraph) {
     // Graph launch
     using base_t = HIPParallelLaunchKernelInvoker;
     base_t::create_parallel_launch_graph_node(driver, grid, block, shmem,
                                               hip_instance);
-  } else
-#endif
-  {
+  } else {
     // Regular kernel launch
 #ifndef KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
     HIPParallelLaunch(
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp
index 58e13cf3e82..01ecf3ee2e7 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_MDRANGEPOLICY_HPP_
 #define KOKKOS_HIP_MDRANGEPOLICY_HPP_
@@ -39,11 +26,14 @@ namespace Impl {
 template <>
 inline TileSizeProperties get_tile_size_properties(const HIP& space) {
   TileSizeProperties properties;
-  properties.max_threads =
-      space.impl_internal_space_instance()->m_maxThreadsPerSM;
+  const auto& device_prop              = space.hip_device_prop();
+  properties.max_threads               = device_prop.maxThreadsPerBlock;
   properties.default_largest_tile_size = 16;
   properties.default_tile_size         = 4;
   properties.max_total_tile_size       = HIPTraits::MaxThreadsPerBlock;
+  properties.max_threads_dimensions[0] = device_prop.maxThreadsDim[0];
+  properties.max_threads_dimensions[1] = device_prop.maxThreadsDim[1];
+  properties.max_threads_dimensions[2] = device_prop.maxThreadsDim[2];
   return properties;
 }
 
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_MDRange.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_MDRange.hpp
index 63079898463..2f8148188ca 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_MDRange.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_MDRange.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_FOR_MDRANGE_HPP
 #define KOKKOS_HIP_PARALLEL_FOR_MDRANGE_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Range.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Range.hpp
index 91d6528a65d..0d55a2cda9c 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Range.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Range.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_FOR_RANGE_HPP
 #define KOKKOS_HIP_PARALLEL_FOR_RANGE_HPP
@@ -76,8 +63,9 @@ class ParallelFor, Kokkos::HIP> {
 
     using DriverType = ParallelFor;
     const int block_size =
-        Kokkos::Impl::hip_get_preferred_blocksize(
-            m_policy.space().hip_device());
+        Kokkos::Impl::get_preferred_blocksize_for_range(
+            m_policy.space().impl_internal_space_instance(), nwork);
 
     if (block_size == 0) {
       Kokkos::Impl::throw_runtime_exception(
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Team.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Team.hpp
index 16fff77e5cb..7607ec89f57 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Team.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelFor_Team.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_FOR_TEAM_HPP
 #define KOKKOS_HIP_PARALLEL_FOR_TEAM_HPP
@@ -117,14 +104,13 @@ class ParallelFor, HIP> {
   ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy)
       : m_functor(arg_functor),
         m_policy(arg_policy),
-        m_league_size(arg_policy.league_size()),
-        m_team_size(arg_policy.team_size()),
-        m_vector_size(arg_policy.impl_vector_length()) {
+        m_league_size(m_policy.league_size()),
+        m_team_size(m_policy.team_size()),
+        m_vector_size(m_policy.impl_vector_length()) {
     auto internal_space_instance =
         m_policy.space().impl_internal_space_instance();
     if (m_team_size < 0) {
-      m_team_size =
-          arg_policy.team_size_recommended(arg_functor, ParallelForTag());
+      m_team_size = m_policy.team_size_recommended(m_functor, ParallelForTag());
       if (m_team_size <= 0)
         Kokkos::Impl::throw_runtime_exception(
             "Kokkos::Impl::ParallelFor could not find a "
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp
index 16295116462..b3e2d94eded 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_REDUCE_MDRANGE_HPP
 #define KOKKOS_HIP_PARALLEL_REDUCE_MDRANGE_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Range.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Range.hpp
index 907266b0c84..c0701b453e3 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Range.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Range.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_REDUCE_RANGE_HPP
 #define KOKKOS_HIP_PARALLEL_REDUCE_RANGE_HPP
@@ -216,9 +203,17 @@ class ParallelReduce,
       return hip_single_inter_block_reduce_scan_shmem(f, n);
     };
-    return Kokkos::Impl::hip_get_preferred_blocksize(
-        instance, shmem_functor);
+    constexpr auto light_weight =
+        Kokkos::Experimental::WorkItemProperty::HintLightWeight;
+    constexpr typename Policy::work_item_property property;
+    if constexpr ((property & light_weight) == light_weight) {
+      return Kokkos::Impl::hip_get_max_blocksize(
+          instance, shmem_functor);
+    } else {
+      return Kokkos::Impl::hip_get_preferred_blocksize(
+          instance, shmem_functor);
+    }
   }
 
   inline void execute() {
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Team.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Team.hpp
index d222005a1da..45963b5e35d 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Team.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_Team.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_REDUCE_TEAM_HPP
 #define KOKKOS_HIP_PARALLEL_REDUCE_TEAM_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelScan_Range.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelScan_Range.hpp
index cc10493b2e0..ce9b35b0d3b 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelScan_Range.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelScan_Range.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_PARALLEL_SCAN_RANGE_HPP
 #define KOKKOS_HIP_PARALLEL_SCAN_RANGE_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
index fad4c53344d..472116db26e 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_REDUCESCAN_HPP
 #define KOKKOS_HIP_REDUCESCAN_HPP
@@ -56,15 +43,17 @@ struct HIPReductionsFunctor {
 
   __device__ static inline void scalar_intra_block_reduction(
       FunctorType const& functor, Scalar value, bool const skip,
-      Scalar* my_global_team_buffer_element, int const shared_elements,
+      Scalar* my_global_team_buffer_element, unsigned int const shared_elements,
       Scalar* shared_team_buffer_element) {
     constexpr unsigned int warp_size = HIPTraits::WarpSize;
-    int const warp_id                = (threadIdx.y * blockDim.x) / warp_size;
+    unsigned int const warp_id       = (threadIdx.y * blockDim.x) / warp_size;
+    unsigned int const num_threads   = blockDim.x * blockDim.y;
     Scalar* const my_shared_team_buffer_element =
         shared_team_buffer_element + warp_id % shared_elements;
 
     // Warp Level Reduction, ignoring Kokkos vector entries
-    scalar_intra_warp_reduction(functor, value, skip, warp_size, value);
+    scalar_intra_warp_reduction(functor, value, skip,
+                                Kokkos::min(warp_size, num_threads), value);
 
     if (warp_id < shared_elements) {
       *my_shared_team_buffer_element = value;
@@ -73,8 +62,9 @@ struct HIPReductionsFunctor {
     // cross warp reduction
     __syncthreads();
 
-    int const num_warps = blockDim.x * blockDim.y / warp_size;
-    for (int w = shared_elements; w < num_warps; w += shared_elements) {
+    unsigned int const num_warps = num_threads / warp_size;
+    for (unsigned int w = shared_elements; w < num_warps;
+         w += shared_elements) {
       if (warp_id >= w && warp_id < w + shared_elements) {
         if ((threadIdx.y * blockDim.x + threadIdx.x) % warp_size == 0)
           functor.join(my_shared_team_buffer_element, &value);
@@ -85,7 +75,7 @@ struct HIPReductionsFunctor {
     if (warp_id == 0) {
       functor.init(&value);
       for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x;
-           i < blockDim.y * blockDim.x / warp_size; i += warp_size) {
+           i < num_warps; i += warp_size) {
         functor.join(&value, &shared_team_buffer_element[i]);
       }
       scalar_intra_warp_reduction(functor, value, false, warp_size,
@@ -106,8 +96,8 @@ struct HIPReductionsFunctor {
         reinterpret_cast(shared_data);
     Scalar value                     = shared_team_buffer_elements[threadIdx.y];
     constexpr unsigned int warp_size = Impl::HIPTraits::WarpSize;
-    int shared_elements              = blockDim.x * blockDim.y / warp_size;
-    int global_elements              = block_count;
+    unsigned int shared_elements     = blockDim.x * blockDim.y / warp_size;
+    unsigned int global_elements     = block_count;
     __syncthreads();
 
     scalar_intra_block_reduction(functor, value, true,
@@ -126,8 +116,8 @@ struct HIPReductionsFunctor {
       is_last_block = true;
       *global_flags = 0;
       functor.init(&value);
-      for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements;
-           i += blockDim.x * blockDim.y) {
+      for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x;
+           i < global_elements; i += blockDim.x * blockDim.y) {
         functor.join(&value, &global_team_buffer_element[i]);
       }
       scalar_intra_block_reduction(
@@ -154,8 +144,7 @@ struct HIPReductionsFunctor {
     int const lane_id =
         (threadIdx.y * blockDim.x + threadIdx.x) % HIPTraits::WarpSize;
     for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) {
-      if (lane_id + delta < HIPTraits::WarpSize &&
-          (lane_id % (delta * 2) == 0)) {
+      if (lane_id + delta < width && (lane_id % (delta * 2) == 0)) {
         functor.join(value, value + delta);
       }
     }
@@ -165,26 +154,27 @@ struct HIPReductionsFunctor {
   __device__ static inline void scalar_intra_block_reduction(
       FunctorType const& functor, Scalar value, bool const skip, Scalar* result,
       int const /*shared_elements*/, Scalar* shared_team_buffer_element) {
-    int const warp_id = (threadIdx.y * blockDim.x) / HIPTraits::WarpSize;
+    constexpr unsigned int warp_size = Impl::HIPTraits::WarpSize;
+    unsigned int const warp_id       = (threadIdx.y * blockDim.x) / warp_size;
+    const unsigned int num_threads   = blockDim.x * blockDim.y;
     Scalar* const my_shared_team_buffer_element =
         shared_team_buffer_element + threadIdx.y * blockDim.x + threadIdx.x;
     *my_shared_team_buffer_element = value;
     // Warp Level Reduction, ignoring Kokkos vector entries
     scalar_intra_warp_reduction(functor, my_shared_team_buffer_element, skip,
-                                HIPTraits::WarpSize);
+                                Kokkos::min(num_threads, warp_size));
     // Wait for every warp to be done before using one warp to do final cross
     // warp reduction
     __syncthreads();
 
-    if (warp_id == 0) {
+    if (warp_id == 0u) {
       const unsigned int delta =
-          (threadIdx.y * blockDim.x + threadIdx.x) * HIPTraits::WarpSize;
-      if (delta < blockDim.x * blockDim.y)
+          (threadIdx.y * blockDim.x + threadIdx.x) * warp_size;
+      if (delta < num_threads)
         *my_shared_team_buffer_element = shared_team_buffer_element[delta];
-      scalar_intra_warp_reduction(
-          functor, my_shared_team_buffer_element, false,
-          blockDim.x * blockDim.y / HIPTraits::WarpSize);
-      if (threadIdx.x + threadIdx.y == 0) {
+      scalar_intra_warp_reduction(functor, my_shared_team_buffer_element, false,
+                                  num_threads / warp_size);
+      if (threadIdx.x + threadIdx.y == 0u) {
         *result = *shared_team_buffer_element;
         if (skip) __threadfence();
       }
@@ -252,7 +242,6 @@ __device__ void hip_intra_block_reduce_scan(
     typename FunctorType::pointer_type const base_data) {
   using pointer_type = typename FunctorType::pointer_type;
 
-  const unsigned value_count = functor.length();
   const unsigned not_less_power_of_two =
       Kokkos::Experimental::bit_ceil_builtin(blockDim.y);
   const unsigned BlockSizeMask = not_less_power_of_two - 1;
@@ -263,15 +252,19 @@ __device__ void hip_intra_block_reduce_scan(
   const bool is_full_warp = (((threadIdx.y >> HIPTraits::WarpIndexShift) + 1)
                              << HIPTraits::WarpIndexShift) <= blockDim.y;
 
-  auto block_reduce_step = [&functor, value_count](
-                               int const R, pointer_type const TD, int const S,
-                               pointer_type memory_start, int index_shift) {
+  auto block_reduce_step = [&functor](int const R, pointer_type const TD,
+                                      int const S, pointer_type memory_start,
+                                      int index_shift) {
+    // FIXME_HIP define value_count inside the lambda instead of capturing it to
+    // avoid a warning when using ROCm 7.0 with C++ 23
+    const unsigned value_count = functor.length();
     const auto join_ptr = TD - (value_count << S) + value_count * index_shift;
     if (R > ((1 << S) - 1) && join_ptr >= memory_start) {
       functor.join(TD, join_ptr);
     }
   };
 
+  const unsigned value_count = functor.length();
   // Intra-warp reduction:
   int bit_shift = 0;
   {
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp
index 0b679218092..8cba732b0be 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp
index a464609108c..c6fd9d693ea 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_SharedAllocationRecord.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_SHARED_ALLOCATION_RECORD_HPP
 #define KOKKOS_HIP_SHARED_ALLOCATION_RECORD_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
index feee44ccaf1..54be6972988 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_SHUFFLE_REDUCE_HPP
 #define KOKKOS_HIP_SHUFFLE_REDUCE_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
index fad8d910b05..b41edad7603 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
@@ -20,7 +7,12 @@
 
 #include 
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 #include 
 #include 
 
@@ -96,9 +88,7 @@ void* HIPSpace::impl_allocate(const int device_id,
                               const size_t arg_logical_size,
                               [[maybe_unused]] bool stream_sync_only) const {
   void* ptr = nullptr;
-  // ROCm 5.5 and earlier throw an error when using hipMallocAsync and
-  // arg_alloc_size is zero. Instead of trying to allocate memory, just return
-  // early.
+  // Instead of trying to allocate zero memory, return early.
   if (arg_alloc_size == 0) return ptr;
 
   KOKKOS_IMPL_HIP_SAFE_CALL(hipSetDevice(device_id));
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp
index 63590f27862..42947e993a8 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIPSPACE_HPP
 #define KOKKOS_HIPSPACE_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp
index 80cb3804be4..aa12297b103 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_TEAM_HPP
 #define KOKKOS_HIP_TEAM_HPP
@@ -46,7 +33,7 @@ struct HIPJoinFunctor {
   }
 };
 
-/**\brief  Team member_type passed to TeamPolicy or TeamTask closures.
+/**\brief  Team member_type passed to the TeamPolicy closure.
  *
  *  HIP thread blocks for team closures are dimensioned as:
  *    blockDim.x == number of "vector lanes" per "thread"
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp
index ac0d424c297..b7ac3da485f 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_TEAM_POLICY_INTERNAL_HPP
 #define KOKKOS_HIP_TEAM_POLICY_INTERNAL_HPP
@@ -185,7 +172,7 @@ class TeamPolicyInternal
   typename traits::execution_space space() const { return m_space; }
 
   TeamPolicyInternal()
-      : m_space(typename traits::execution_space()),
+      : m_space(),
         m_league_size(0),
         m_team_size(-1),
         m_vector_length(0),
@@ -196,9 +183,9 @@ class TeamPolicyInternal
         m_tune_vector_length(false) {}
 
   /** \brief  Specify league size, request team size */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      int team_size_request, int vector_length_request = 1)
-      : m_space(space_),
+      : m_space(std::move(space)),
         m_league_size(league_size_),
         m_team_size(team_size_request),
         m_vector_length(impl_determine_vector_length(vector_length_request)),
@@ -223,27 +210,29 @@ class TeamPolicyInternal
   }
 
   /** \brief  Specify league size, request team size */
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      const Kokkos::AUTO_t& /* team_size_request */,
                      int vector_length_request = 1)
-      : TeamPolicyInternal(space_, league_size_, -1, vector_length_request) {}
+      : TeamPolicyInternal(std::move(space), league_size_, -1,
+                           vector_length_request) {}
   // FLAG
   /** \brief  Specify league size and team size, request vector length*/
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      int team_size_request,
                      const Kokkos::AUTO_t& /* vector_length_request */
                      )
-      : TeamPolicyInternal(space_, league_size_, team_size_request, -1)
+      : TeamPolicyInternal(std::move(space), league_size_, team_size_request,
+                           -1)
 
   {}
 
   /** \brief  Specify league size, request team size and vector length*/
-  TeamPolicyInternal(const execution_space space_, int league_size_,
+  TeamPolicyInternal(execution_space space, int league_size_,
                      const Kokkos::AUTO_t& /* team_size_request */,
                      const Kokkos::AUTO_t& /* vector_length_request */
 
                      )
-      : TeamPolicyInternal(space_, league_size_, -1, -1)
+      : TeamPolicyInternal(std::move(space), league_size_, -1, -1)
 
   {}
 
@@ -277,6 +266,12 @@ class TeamPolicyInternal
       : TeamPolicyInternal(typename traits::execution_space(), league_size_, -1,
                            -1) {}
 
+  TeamPolicyInternal(const PolicyUpdate, const TeamPolicyInternal& other,
+                     typename traits::execution_space space)
+      : TeamPolicyInternal(other) {
+    this->m_space = std::move(space);
+  }
+
   int chunk_size() const { return m_chunk_size; }
 
   TeamPolicyInternal& set_chunk_size(typename traits::index_type chunk_size_) {
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
index 2da38b950b0..d110c39589c 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_UNIQUE_TOKEN_HPP
 #define KOKKOS_HIP_UNIQUE_TOKEN_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
index f5b1d321e8c..37b30e57d99 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_VECTORIZATION_HPP
 #define KOKKOS_HIP_VECTORIZATION_HPP
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp
index c39963d277b..ec16a64c4ea 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HIP_WORKGRAPHPOLICY_HPP
 #define KOKKOS_HIP_WORKGRAPHPOLICY_HPP
@@ -62,12 +49,15 @@ class ParallelFor, HIP> {
 
   inline void execute() {
     const int warps_per_block = 4;
-    const dim3 grid(hip_internal_multiprocessor_count(), 1, 1);
+    const int multiProcessorCount =
+        m_policy.space().hip_device_prop().multiProcessorCount;
+    const dim3 grid(multiProcessorCount, 1, 1);
     const dim3 block(1, HIPTraits::WarpSize, warps_per_block);
     const int shared = 0;
 
     HIPParallelLaunch(*this, grid, block, shared,
-                            HIP().impl_internal_space_instance(), false);
+                            m_policy.space().impl_internal_space_instance(),
+                            false);
   }
 
   inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy)
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
index 9ed9f334fb6..b2b245829b7 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.hpp
index 86aa7890a86..874a98c6d7b 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ZeroMemset.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 #ifndef KOKKOS_HIP_ZEROMEMSET_HPP
 #define KOKKOS_HIP_ZEROMEMSET_HPP
 
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp
index 1946c20fa97..031d7f1a437 100644
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp
+++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp
@@ -1,24 +1,16 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #define KOKKOS_IMPL_PUBLIC_INCLUDE
 #endif
 
+#include 
+#ifdef KOKKOS_ENABLE_EXPERIMENTAL_CXX20_MODULES
+import kokkos.core;
+#else
 #include 
+#endif
 
 #ifdef KOKKOS_ENABLE_HPX
 #include 
@@ -223,11 +215,6 @@ void HPX::impl_initialize(InitializationSettings const &settings) {
   }
 }
 
-bool HPX::impl_is_initialized() noexcept {
-  hpx::runtime *rt = hpx::get_runtime_ptr();
-  return rt != nullptr;
-}
-
 void HPX::impl_finalize() {
   if (m_hpx_initialized) {
     hpx::runtime *rt = hpx::get_runtime_ptr();
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp
index 441b7b26222..030e3042433 100644
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp
+++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
@@ -272,7 +259,6 @@ class HPX {
   int concurrency() const;
 #endif
   static void impl_initialize(InitializationSettings const &);
-  static bool impl_is_initialized() noexcept;
   static void impl_finalize();
   static int impl_thread_pool_size() noexcept;
   static int impl_thread_pool_rank() noexcept;
@@ -529,6 +515,15 @@ struct MemorySpaceAccess
+struct ZeroMemset {
+  ZeroMemset(const Kokkos::Experimental::HPX &exec, void *dst, size_t cnt) {
+    exec.fence(
+        "Kokkos::Impl::ZeroMemset: HostSpace fence before calling std::memset");
+    std::memset(dst, 0, cnt);
+  }
+};
+
 }  // namespace Impl
 }  // namespace Kokkos
 
@@ -932,6 +927,12 @@ class TeamPolicyInternal
     init(league_size_request, 1);
   }
 
+  TeamPolicyInternal(const PolicyUpdate, const TeamPolicyInternal &other,
+                     typename traits::execution_space space)
+      : TeamPolicyInternal(other) {
+    this->m_space = std::move(space);
+  }
+
   inline int chunk_size() const { return m_chunk_size; }
 
   inline TeamPolicyInternal &set_chunk_size(
@@ -999,7 +1000,7 @@ class ParallelFor,
   }
 
   inline ParallelFor(const FunctorType &arg_functor, Policy arg_policy)
-      : m_functor(arg_functor), m_policy(arg_policy) {}
+      : m_functor(arg_functor), m_policy(std::move(arg_policy)) {}
 };
 
 template 
@@ -1587,10 +1588,10 @@ class ParallelFor,
   ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy)
       : m_functor(arg_functor),
         m_policy(arg_policy),
-        m_league(arg_policy.league_size()),
-        m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) +
+        m_league(m_policy.league_size()),
+        m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) +
                  FunctorTeamShmemSize::value(
-                     arg_functor, arg_policy.team_size())) {}
+                     m_functor, m_policy.team_size())) {}
 };
 
 template 
@@ -2089,9 +2090,5 @@ KOKKOS_INLINE_FUNCTION void single(
 
 }  // namespace Kokkos
 
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
-#include 
-#endif
-
 #endif /* #if defined( KOKKOS_ENABLE_HPX ) */
 #endif /* #ifndef KOKKOS_HPX_HPP */
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_MDRangePolicy.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_MDRangePolicy.hpp
index cf0d1add126..1a9d38f33d8 100644
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX_MDRangePolicy.hpp
+++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_MDRangePolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HPX_MDRANGEPOLICY_HPP_
 #define KOKKOS_HPX_MDRANGEPOLICY_HPP_
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp
deleted file mode 100644
index ad540a203d5..00000000000
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
-#define KOKKOS_IMPL_PUBLIC_INCLUDE
-#endif
-
-#include 
-#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG)
-
-#include 
-
-#include 
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template class TaskQueue;
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-#else
-void KOKKOS_CORE_SRC_IMPL_HPX_TASK_PREVENT_LINK_ERROR() {}
-#endif  // #if defined( KOKKOS_ENABLE_HPX ) && defined( KOKKOS_ENABLE_TASKDAG )
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp
deleted file mode 100644
index 42603440355..00000000000
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp
+++ /dev/null
@@ -1,274 +0,0 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
-
-#ifndef KOKKOS_HPX_TASK_HPP
-#define KOKKOS_HPX_TASK_HPP
-
-#include 
-#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG)
-
-#include 
-#include 
-
-#include 
-
-#include 
-
-#include 
-#include 
-
-#include 
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
-// We allow using deprecated classes in this file
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
-#endif
-
-namespace Kokkos {
-namespace Impl {
-
-template 
-class TaskQueueSpecialization<
-    SimpleTaskScheduler> {
- public:
-  void setup() const {
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-
-    hpx_thread_buffer &buffer = Kokkos::Experimental::HPX().impl_get_buffer();
-    buffer.resize(num_worker_threads, 512);
-  }
-
-  void execute_range(int t) const {
-    // NOTE: This implementation has been simplified based on the
-    // assumption that team_size = 1. The HPX backend currently only
-    // supports a team size of 1.
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-
-    hpx_thread_buffer &buffer = Kokkos::Experimental::HPX().impl_get_buffer();
-
-    buffer.get(t);
-    HPXTeamMember member(
-        TeamPolicyInternal(
-            Kokkos::Experimental::HPX(), num_worker_threads, 1),
-        0, t, buffer.get(t), 512);
-
-    member_type single_exec(*scheduler, member);
-    member_type &team_exec = single_exec;
-
-    auto &queue          = scheduler->queue();
-    auto &team_scheduler = team_exec.scheduler();
-
-    using task_base_type = typename scheduler_type::task_base_type;
-    auto current_task    = OptionalRef(nullptr);
-
-    while (!queue.is_done()) {
-      current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info());
-
-      if (current_task) {
-        KOKKOS_EXPECTS(current_task->is_single_runnable() ||
-                       current_task->is_team_runnable());
-        current_task->as_runnable_task().run(single_exec);
-        queue.complete((*std::move(current_task)).as_runnable_task(),
-                       team_scheduler.team_scheduler_info());
-      }
-    }
-  }
-
-  void finalize() const {}
-
-  using execution_space = Kokkos::Experimental::HPX;
-  using scheduler_type =
-      SimpleTaskScheduler;
-  using member_type =
-      TaskTeamMemberAdapter;
-  using memory_space = Kokkos::HostSpace;
-
-  static void execute(scheduler_type const &scheduler) {
-    // NOTE: We create an instance so that we can use impl_bulk_setup_finalize.
-    // This is not necessarily the most efficient, but can be improved later.
-    TaskQueueSpecialization task_queue;
-    task_queue.scheduler         = &scheduler;
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-    Kokkos::Experimental::HPX().impl_bulk_setup_finalize(
-        true, false, task_queue, num_worker_threads,
-        hpx::threads::thread_stacksize::nostack);
-  }
-
-  static uint32_t get_max_team_count(execution_space const &espace) {
-    return static_cast(espace.concurrency());
-  }
-
-  template 
-  static void get_function_pointer(typename TaskType::function_type &ptr,
-                                   typename TaskType::destroy_type &dtor) {
-    ptr  = TaskType::apply;
-    dtor = TaskType::destroy;
-  }
-
- private:
-  const scheduler_type *scheduler;
-};
-
-template 
-class TaskQueueSpecializationConstrained<
-    Scheduler,
-    std::enable_if_t>> {
- public:
-  void setup() const {
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-
-    hpx_thread_buffer &buffer = Kokkos::Experimental::HPX().impl_get_buffer();
-    buffer.resize(num_worker_threads, 512);
-
-    auto &queue = scheduler->queue();
-    queue.initialize_team_queues(num_worker_threads);
-  }
-
-  void execute_range(int t) const {
-    // NOTE: This implementation has been simplified based on the
-    // assumption that team_size = 1. The HPX backend currently only
-    // supports a team size of 1.
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-
-    hpx_thread_buffer &buffer = Kokkos::Experimental::HPX().impl_get_buffer();
-
-    buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id());
-    HPXTeamMember member(
-        TeamPolicyInternal(
-            Kokkos::Experimental::HPX(), num_worker_threads, 1),
-        0, t, buffer.get(t), 512);
-
-    using task_base_type = typename scheduler_type::task_base;
-    using queue_type     = typename scheduler_type::queue_type;
-
-    static task_base_type *const end = (task_base_type *)task_base_type::EndTag;
-    constexpr task_base_type *no_more_tasks_sentinel = nullptr;
-
-    member_type single_exec(*scheduler, member);
-    member_type &team_exec = single_exec;
-
-    auto &team_queue     = team_exec.scheduler().queue();
-    task_base_type *task = no_more_tasks_sentinel;
-
-    do {
-      if (task != no_more_tasks_sentinel && task != end) {
-        team_queue.complete(task);
-      }
-
-      if (desul::atomic_load(&team_queue.m_ready_count,
-                             desul::MemoryOrderAcquire(),
-                             desul::MemoryScopeDevice()) > 0) {
-        task = end;
-        for (int i = 0; i < queue_type::NumQueue && end == task; ++i) {
-          for (int j = 0; j < 2 && end == task; ++j) {
-            task = queue_type::pop_ready_task(&team_queue.m_ready[i][j]);
-          }
-        }
-      } else {
-        task = team_queue.attempt_to_steal_task();
-      }
-
-      if (task != no_more_tasks_sentinel && task != end) {
-        (*task->m_apply)(task, &single_exec);
-      }
-    } while (task != no_more_tasks_sentinel);
-  }
-
-  void finalize() const {}
-
-  using execution_space = Kokkos::Experimental::HPX;
-  using scheduler_type  = Scheduler;
-  using member_type =
-      TaskTeamMemberAdapter;
-  using memory_space = Kokkos::HostSpace;
-
-  static void iff_single_thread_recursive_execute(
-      scheduler_type const &scheduler) {
-    using task_base_type = typename scheduler_type::task_base;
-    using queue_type     = typename scheduler_type::queue_type;
-
-    if (1 == Kokkos::Experimental::HPX().concurrency()) {
-      task_base_type *const end = (task_base_type *)task_base_type::EndTag;
-      task_base_type *task      = end;
-
-      HPXTeamMember member(TeamPolicyInternal(
-                               Kokkos::Experimental::HPX(), 1, 1),
-                           0, 0, nullptr, 0);
-      member_type single_exec(scheduler, member);
-
-      do {
-        task = end;
-
-        // Loop by priority and then type
-        for (int i = 0; i < queue_type::NumQueue && end == task; ++i) {
-          for (int j = 0; j < 2 && end == task; ++j) {
-            task =
-                queue_type::pop_ready_task(&scheduler.m_queue->m_ready[i][j]);
-          }
-        }
-
-        if (end == task) break;
-
-        (*task->m_apply)(task, &single_exec);
-
-        scheduler.m_queue->complete(task);
-
-      } while (true);
-    }
-  }
-
-  static void execute(scheduler_type const &scheduler) {
-    // NOTE: We create an instance so that we can use impl_bulk_setup_finalize.
-    // This is not necessarily the most efficient, but can be improved later.
-    TaskQueueSpecializationConstrained task_queue;
-    task_queue.scheduler         = &scheduler;
-    const int num_worker_threads = Kokkos::Experimental::HPX().concurrency();
-    Kokkos::Experimental::HPX().impl_bulk_setup_finalize(
-        true, false, task_queue, num_worker_threads,
-        hpx::threads::thread_stacksize::nostack);
-  }
-
-  template 
-  static void get_function_pointer(typename TaskType::function_type &ptr,
-                                   typename TaskType::destroy_type &dtor) {
-    ptr  = TaskType::apply;
-    dtor = TaskType::destroy;
-  }
-
- private:
-  const scheduler_type *scheduler;
-};
-
-extern template class TaskQueue<
-    Kokkos::Experimental::HPX,
-    typename Kokkos::Experimental::HPX::memory_space>;
-
-}  // namespace Impl
-}  // namespace Kokkos
-
-#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
-KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
-#endif
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
-#endif /* #ifndef KOKKOS_HPX_TASK_HPP */
diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
index eb4f3da11af..b2b1a7b20fe 100644
--- a/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
+++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_HPX_WORKGRAPHPOLICY_HPP
 #define KOKKOS_HPX_WORKGRAPHPOLICY_HPP
diff --git a/lib/kokkos/core/src/KokkosExp_InterOp.hpp b/lib/kokkos/core/src/KokkosExp_InterOp.hpp
index 80b380e7381..25470420c3d 100644
--- a/lib/kokkos/core/src/KokkosExp_InterOp.hpp
+++ b/lib/kokkos/core/src/KokkosExp_InterOp.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_CORE_EXP_INTEROP_HPP
 #define KOKKOS_CORE_EXP_INTEROP_HPP
@@ -84,8 +71,7 @@ template 
 struct python_view_type {
   static_assert(Kokkos::is_view>::value);
 
-  using type =
-      Kokkos::Impl::python_view_type_impl_t;
+  using type = Kokkos::Impl::python_view_type_impl_t;
 };
 
 template 
diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
index 975f511d12b..0e1e1d3dd25 100644
--- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
+++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
@@ -30,6 +17,7 @@ static_assert(false,
 #include 
 #include 
 #include 
+#include 
 #include 
 
 namespace Kokkos {
@@ -135,10 +123,12 @@ constexpr NVCC_WONT_LET_ME_CALL_YOU_Array to_array_potentially_narrowing(
 }
 
 struct TileSizeProperties {
-  int max_threads;
+  int max_threads;  // (per SM, CU)
   int default_largest_tile_size;
   int default_tile_size;
   int max_total_tile_size;
+  // For GPU backends: hardware limits for block dimensions
+  std::array max_threads_dimensions;
 };
 
 template 
@@ -149,6 +139,9 @@ TileSizeProperties get_tile_size_properties(const ExecutionSpace&) {
   properties.default_largest_tile_size = 0;
   properties.default_tile_size         = 2;
   properties.max_total_tile_size       = std::numeric_limits::max();
+  for (int i = 0; i < 3; ++i) {
+    properties.max_threads_dimensions[i] = std::numeric_limits::max();
+  }
   return properties;
 }
 
@@ -313,6 +306,12 @@ struct MDRangePolicy
             Impl::to_array_potentially_narrowing(
                 tile)) {}
 
+  MDRangePolicy(const Impl::PolicyUpdate, const MDRangePolicy& other,
+                typename traits::execution_space space)
+      : MDRangePolicy(other) {
+    this->m_space = std::move(space);
+  }
+
   template 
   MDRangePolicy(const MDRangePolicy p)
       : traits(p),  // base class may contain data such as desired occupancy
diff --git a/lib/kokkos/core/src/Kokkos_Abort.hpp b/lib/kokkos/core/src/Kokkos_Abort.hpp
index a8f38837ea0..c1cd3958baf 100644
--- a/lib/kokkos/core/src/Kokkos_Abort.hpp
+++ b/lib/kokkos/core/src/Kokkos_Abort.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_ABORT_HPP
 #define KOKKOS_ABORT_HPP
diff --git a/lib/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp b/lib/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp
index 660ee30b7d5..5d532f5e76c 100644
--- a/lib/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp
+++ b/lib/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
diff --git a/lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp b/lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp
index 62f527aa025..c259c63fb38 100644
--- a/lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp
index bf6400d97ee..a24a3d628aa 100644
--- a/lib/kokkos/core/src/Kokkos_Array.hpp
+++ b/lib/kokkos/core/src/Kokkos_Array.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_ARRAY_HPP
 #define KOKKOS_ARRAY_HPP
@@ -103,9 +90,15 @@ struct Array {
   using pointer         = T*;
   using const_pointer   = std::add_const_t*;
 
-  KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N; }
-  KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return false; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return N; }
+  KOKKOS_INLINE_FUNCTION static constexpr size_type size() noexcept {
+    return N;
+  }
+  KOKKOS_INLINE_FUNCTION static constexpr bool empty() noexcept {
+    return false;
+  }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const noexcept {
+    return N;
+  }
 
   template 
   KOKKOS_INLINE_FUNCTION constexpr reference operator[](const iType& i) {
@@ -124,13 +117,33 @@ struct Array {
     return m_internal_implementation_private_member_data[i];
   }
 
-  KOKKOS_INLINE_FUNCTION constexpr pointer data() {
+  KOKKOS_INLINE_FUNCTION constexpr pointer data() noexcept {
     return &m_internal_implementation_private_member_data[0];
   }
-  KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const {
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const noexcept {
     return &m_internal_implementation_private_member_data[0];
   }
 
+  KOKKOS_INLINE_FUNCTION constexpr pointer begin() noexcept { return data(); }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer begin() const noexcept {
+    return data();
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr pointer end() noexcept { return data() + N; }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer end() const noexcept {
+    return data() + N;
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer cbegin() const noexcept {
+    return data();
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer cend() const noexcept {
+    return data() + N;
+  }
+
   friend KOKKOS_FUNCTION constexpr bool operator==(Array const& lhs,
                                                    Array const& rhs) noexcept {
     for (size_t i = 0; i != N; ++i)
@@ -171,9 +184,13 @@ struct Array {
   using pointer         = T*;
   using const_pointer   = std::add_const_t*;
 
-  KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0; }
-  KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true; }
-  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return 0; }
+  KOKKOS_INLINE_FUNCTION static constexpr size_type size() noexcept {
+    return 0;
+  }
+  KOKKOS_INLINE_FUNCTION static constexpr bool empty() noexcept { return true; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const noexcept {
+    return 0;
+  }
 
   template 
   KOKKOS_INLINE_FUNCTION reference operator[](const iType&) {
@@ -191,11 +208,31 @@ struct Array {
     return *reinterpret_cast(-1);
   }
 
-  KOKKOS_INLINE_FUNCTION constexpr pointer data() { return nullptr; }
-  KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const {
+  KOKKOS_INLINE_FUNCTION constexpr pointer data() noexcept { return nullptr; }
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const noexcept {
     return nullptr;
   }
 
+  KOKKOS_INLINE_FUNCTION constexpr pointer begin() noexcept { return data(); }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer begin() const noexcept {
+    return data();
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr pointer end() noexcept { return data(); }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer end() const noexcept {
+    return data();
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer cbegin() const noexcept {
+    return data();
+  }
+
+  KOKKOS_INLINE_FUNCTION constexpr const_pointer cend() const noexcept {
+    return data();
+  }
+
   friend KOKKOS_FUNCTION constexpr bool operator==(Array const&,
                                                    Array const&) noexcept {
     return true;
diff --git a/lib/kokkos/core/src/Kokkos_Assert.hpp b/lib/kokkos/core/src/Kokkos_Assert.hpp
index 6fea286005e..134c7053723 100644
--- a/lib/kokkos/core/src/Kokkos_Assert.hpp
+++ b/lib/kokkos/core/src/Kokkos_Assert.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_ASSERT_HPP
 #define KOKKOS_ASSERT_HPP
diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp
index ba611360922..cca71986761 100644
--- a/lib/kokkos/core/src/Kokkos_Atomic.hpp
+++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 /// \file Kokkos_Atomic.hpp
 /// \brief Atomic functions
diff --git a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
index 384e477f2c9..5499641ed63 100644
--- a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
+++ b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
 #include 
@@ -21,12 +8,14 @@ static_assert(false,
 #endif
 #ifndef KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_
 #define KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_
+
 #include 
 #include 
 
-#include   // identity_type
 #include 
 
+#include 
+
 namespace Kokkos {
 
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
@@ -60,7 +49,7 @@ KOKKOS_DEPRECATED inline const char* atomic_query_version() {
 namespace Impl {
 template 
 using not_deduced_atomic_t =
-    std::add_const_t>>;
+    std::add_const_t>>;
 
 template 
 using enable_if_atomic_t =
diff --git a/lib/kokkos/core/src/Kokkos_BitManipulation.hpp b/lib/kokkos/core/src/Kokkos_BitManipulation.hpp
index f314e3fa4d1..09f64464705 100644
--- a/lib/kokkos/core/src/Kokkos_BitManipulation.hpp
+++ b/lib/kokkos/core/src/Kokkos_BitManipulation.hpp
@@ -1,18 +1,5 @@
-//@HEADER
-// ************************************************************************
-//
-//                        Kokkos v. 4.0
-//       Copyright (2022) National Technology & Engineering
-//               Solutions of Sandia, LLC (NTESS).
-//
-// Under the terms of Contract DE-NA0003525 with NTESS,
-// the U.S. Government retains certain rights in this software.
-//
-// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
-// See https://kokkos.org/LICENSE for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//@HEADER
+// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
 
 #ifndef KOKKOS_BIT_MANIPULATION_HPP
 #define KOKKOS_BIT_MANIPULATION_HPP
@@ -20,80 +7,287 @@
 #include 
 #include 
 #include   // CHAR_BIT
+#include 
 #include   //memcpy
 #include 
 
 namespace Kokkos::Impl {
 
-template 
-KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
-  if constexpr (sizeof(T) > 1) {
-    using U = std::make_unsigned_t;
+template