diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index e2aaab594..bac001795 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -1,347 +1,342 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- OpenMP based threading support for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "neighbor.h" #include "neigh_request.h" #include "universe.h" #include "update.h" #include "integrate.h" #include "min.h" #include "fix_omp.h" #include "thr_data.h" #include "thr_omp.h" #include "pair_hybrid.h" #include "bond_hybrid.h" #include "angle_hybrid.h" #include "dihedral_hybrid.h" #include "improper_hybrid.h" #include "kspace.h" #include <string.h> #include <stdlib.h> #include <stdio.h> #include "suffix.h" #if defined(LMP_USER_CUDA) #include "cuda_modify_flags.h" #endif using namespace LAMMPS_NS; using namespace FixConst; #if defined(LMP_USER_CUDA) using namespace FixConstCuda; #endif static int get_tid() { int tid = 0; #if defined(_OPENMP) tid = omp_get_thread_num(); #endif return tid; } /* ---------------------------------------------------------------------- */ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL), - _nthr(-1), _neighbor(true), _mixed(false) + _nthr(-1), _neighbor(true), _mixed(false), _reduced(true) { if ((narg < 4) || (narg > 7)) error->all(FLERR,"Illegal package omp command"); if (strcmp(arg[1],"all") != 0) error->all(FLERR,"fix OMP has to operate on group 'all'"); int nthreads = 1; if (narg > 3) { #if defined(_OPENMP) if (strcmp(arg[3],"*") == 0) #pragma omp parallel default(none) shared(nthreads) nthreads = omp_get_num_threads(); else nthreads = atoi(arg[3]); #endif } if (nthreads < 1) error->all(FLERR,"Illegal number of OpenMP threads requested"); + int reset_thr = 0; if (nthreads != comm->nthreads) { #if defined(_OPENMP) + reset_thr = 1; omp_set_num_threads(nthreads); #endif comm->nthreads = nthreads; } int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"force/neigh") == 0) _neighbor = true; else if (strcmp(arg[iarg],"force") == 0) _neighbor = false; else if (strcmp(arg[iarg],"mixed") == 0) _mixed = true; else if (strcmp(arg[iarg],"double") == 0) _mixed = false; else error->all(FLERR,"Illegal package omp mode requested"); } // print summary of settings if (comm->me == 0) { - const char * const nmode = _neighbor ? "OpenMP capable" : "serial"; + const char * const nmode = _neighbor ? "multi-threaded" : "serial"; const char * const kmode = _mixed ? "mixed" : "double"; if (screen) { - fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads); - fprintf(screen," using %s neighbor list subroutines\n", nmode); - if (_mixed) - fputs(" using mixed precision OpenMP force kernels where available\n", screen); - else - fputs(" using double precision OpenMP force kernels\n", screen); + if (reset_thr) + fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads); + fprintf(screen,"using %s neighbor list subroutines\n", nmode); + fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode); } if (logfile) { - fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads); - fprintf(logfile," using %s neighbor list subroutines\n", nmode); - if (_mixed) - fputs(" using mixed precision OpenMP force kernels where available\n", logfile); - else - fputs(" using double precision OpenMP force kernels\n", logfile); + if (reset_thr) + fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads); + fprintf(logfile,"using %s neighbor list subroutines\n", nmode); + fprintf(logfile,"prefer %s precision OpenMP force kernels\n", kmode); } } // allocate list for per thread accumulator manager class instances // and then have each thread create an instance of this class to // encourage the OS to use storage that is "close" to each thread's CPU. thr = new ThrData *[nthreads]; _nthr = nthreads; - _clearforce = true; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const int tid = get_tid(); thr[tid] = new ThrData(tid); } } /* ---------------------------------------------------------------------- */ FixOMP::~FixOMP() { #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const int tid = get_tid(); delete thr[tid]; } delete[] thr; } /* ---------------------------------------------------------------------- */ int FixOMP::setmask() { // compatibility with USER-CUDA // our fix doesn't need any data transfer. #if defined(LMP_USER_CUDA) if (lmp->cuda) { int mask = 0; mask |= PRE_FORCE_CUDA; mask |= PRE_FORCE_RESPA; mask |= MIN_PRE_FORCE; return mask; } #endif int mask = 0; mask |= PRE_FORCE; mask |= PRE_FORCE_RESPA; mask |= MIN_PRE_FORCE; return mask; } /* ---------------------------------------------------------------------- */ void FixOMP::init() { - if (strstr(update->integrate_style,"respa") != NULL) - error->all(FLERR,"Cannot use r-RESPA with /omp styles"); + if ((strstr(update->integrate_style,"respa") != NULL) + && (strstr(update->integrate_style,"respa/omp") == NULL)) + error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles"); int check_hybrid, kspace_split; last_pair_hybrid = NULL; last_omp_style = NULL; const char *last_omp_name = NULL; const char *last_hybrid_name = NULL; const char *last_force_name = NULL; // support for verlet/split operation. // kspace_split == 0 : regular processing // kspace_split < 0 : master partition, does not do kspace // kspace_split > 0 : slave partition, only does kspace if (strstr(update->integrate_style,"verlet/split") != NULL) { if (universe->iworld == 0) kspace_split = -1; else kspace_split = 1; } else { kspace_split = 0; } // determine which is the last force style with OpenMP // support as this is the one that has to reduce the forces #define CheckStyleForOMP(name) \ check_hybrid = 0; \ if (force->name) { \ if ( (strcmp(force->name ## _style,"hybrid") == 0) || \ (strcmp(force->name ## _style,"hybrid/overlay") == 0) ) \ check_hybrid=1; \ if (force->name->suffix_flag & Suffix::OMP) { \ last_force_name = (const char *) #name; \ last_omp_name = force->name ## _style; \ last_omp_style = (void *) force->name; \ } \ } #define CheckHybridForOMP(name,Class) \ if (check_hybrid) { \ Class ## Hybrid *style = (Class ## Hybrid *) force->name; \ for (int i=0; i < style->nstyles; i++) { \ if (style->styles[i]->suffix_flag & Suffix::OMP) { \ last_force_name = (const char *) #name; \ last_omp_name = style->keywords[i]; \ last_omp_style = style->styles[i]; \ } \ } \ } if (kspace_split <= 0) { CheckStyleForOMP(pair); CheckHybridForOMP(pair,Pair); if (check_hybrid) { last_pair_hybrid = last_omp_style; last_hybrid_name = last_omp_name; } CheckStyleForOMP(bond); CheckHybridForOMP(bond,Bond); CheckStyleForOMP(angle); CheckHybridForOMP(angle,Angle); CheckStyleForOMP(dihedral); CheckHybridForOMP(dihedral,Dihedral); CheckStyleForOMP(improper); CheckHybridForOMP(improper,Improper); } if (kspace_split >= 0) { CheckStyleForOMP(kspace); } #undef CheckStyleForOMP #undef CheckHybridForOMP set_neighbor_omp(); // diagnostic output if (comm->me == 0) { if (last_omp_style) { if (last_pair_hybrid) { if (screen) fprintf(screen,"Hybrid pair style last /omp style %s\n", last_hybrid_name); if (logfile) fprintf(logfile,"Hybrid pair style last /omp style %s\n", last_hybrid_name); } if (screen) fprintf(screen,"Last active /omp style is %s_style %s\n", last_force_name, last_omp_name); if (logfile) fprintf(logfile,"Last active /omp style is %s_style %s\n", last_force_name, last_omp_name); } else { - _clearforce = false; if (screen) fprintf(screen,"No /omp style for force computation currently active\n"); if (logfile) fprintf(logfile,"No /omp style for force computation currently active\n"); } } } /* ---------------------------------------------------------------------- */ void FixOMP::set_neighbor_omp() { // select or deselect multi-threaded neighbor // list build depending on setting in package omp. // NOTE: since we are at the top of the list of // fixes, we cannot adjust neighbor lists from // other fixes. those have to be re-implemented // as /omp fix styles. :-( const int neigh_omp = _neighbor ? 1 : 0; const int nrequest = neighbor->nrequest; for (int i = 0; i < nrequest; ++i) neighbor->requests[i]->omp = neigh_omp; } /* ---------------------------------------------------------------------- */ // adjust size and clear out per thread accumulator arrays void FixOMP::pre_force(int) { const int nall = atom->nlocal + atom->nghost; double **f = atom->f; double **torque = atom->torque; double *erforce = atom->erforce; double *de = atom->de; double *drho = atom->drho; - if (_clearforce) { #if defined(_OPENMP) #pragma omp parallel default(none) shared(f,torque,erforce,de,drho) #endif - { - const int tid = get_tid(); - thr[tid]->check_tid(tid); - thr[tid]->init_force(nall,f,torque,erforce,de,drho); - } - } else { - thr[0]->init_force(nall,f,torque,erforce,de,drho); - } + { + const int tid = get_tid(); + thr[tid]->check_tid(tid); + thr[tid]->init_force(nall,f,torque,erforce,de,drho); + } // end of omp parallel region + + _reduced = false; } /* ---------------------------------------------------------------------- */ double FixOMP::memory_usage() { double bytes = comm->nthreads * (sizeof(ThrData *) + sizeof(ThrData)); bytes += comm->nthreads * thr[0]->memory_usage(); return bytes; } diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h index e65184998..656ab752a 100644 --- a/src/USER-OMP/fix_omp.h +++ b/src/USER-OMP/fix_omp.h @@ -1,74 +1,78 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef FIX_CLASS FixStyle(OMP,FixOMP) #else #ifndef LMP_FIX_OMP_H #define LMP_FIX_OMP_H #include "fix.h" namespace LAMMPS_NS { class ThrData; class FixOMP : public Fix { friend class ThrOMP; + friend class RespaOMP; public: FixOMP(class LAMMPS *, int, char **); virtual ~FixOMP(); virtual int setmask(); virtual void init(); virtual void pre_force(int); virtual void setup_pre_force(int vflag) { pre_force(vflag); } virtual void min_setup_pre_force(int vflag) { pre_force(vflag); } virtual void min_pre_force(int vflag) { pre_force(vflag); } virtual void setup_pre_force_respa(int vflag,int) { pre_force(vflag); } virtual void pre_force_respa(int vflag,int,int) { pre_force(vflag); } virtual double memory_usage(); protected: ThrData **thr; void *last_omp_style; // pointer to the style that needs // to do the general force reduction void *last_pair_hybrid; // pointer to the pair style that needs // to call virial_fdot_compute() + // signal that an /omp style did the force reduction. needed by respa/omp + void did_reduce() { _reduced = true; } public: ThrData *get_thr(int tid) { return thr[tid]; } int get_nthr() const { return _nthr; } bool get_neighbor() const { return _neighbor; } bool get_mixed() const { return _mixed; } + bool get_reduced() const { return _reduced; } private: - int _nthr; // number of currently active ThrData object - bool _neighbor; // en/disable threads for neighbor list construction - bool _clearforce; // whether to clear per thread data objects - bool _mixed; // whether to use a mixed precision compute kernel + int _nthr; // number of currently active ThrData objects + bool _neighbor; // en/disable threads for neighbor list construction + bool _mixed; // whether to prefer mixed precision compute kernels + bool _reduced; // whether forces have been reduced for this step void set_neighbor_omp(); }; } #endif #endif diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 5bfc49638..4aea63055 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -1,1170 +1,1177 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- OpenMP based threading support for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "thr_omp.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "math_const.h" #include <string.h> using namespace LAMMPS_NS; using namespace MathConst; /* ---------------------------------------------------------------------- */ ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style), thr_error(0) { // register fix omp with this class int ifix = lmp->modify->find_fix("package_omp"); if (ifix < 0) lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles"); fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]); } /* ---------------------------------------------------------------------- */ ThrOMP::~ThrOMP() { // nothing to do? } /* ---------------------------------------------------------------------- Hook up per thread per atom arrays into the tally infrastructure ---------------------------------------------------------------------- */ void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom, double **vatom, ThrData *thr) { const int tid = thr->get_tid(); if (tid == 0) thr_error = 0; if (thr_style & THR_PAIR) { if (eflag & 2) { thr->eatom_pair = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_pair[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_pair = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_BOND) { if (eflag & 2) { thr->eatom_bond = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_bond[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_bond = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_ANGLE) { if (eflag & 2) { thr->eatom_angle = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_angle[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_angle = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_DIHEDRAL) { if (eflag & 2) { thr->eatom_dihed = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_dihed = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_IMPROPER) { if (eflag & 2) { thr->eatom_imprp = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_imprp = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double)); } } // nothing to do for THR_KSPACE } /* ---------------------------------------------------------------------- Reduce per thread data into the regular structures Reduction of global properties is serialized with a "critical" directive, so that only one thread at a time will access the global variables. Since we are not synchronized, this should come with little overhead. The reduction of per-atom properties in contrast is parallelized over threads in the same way as forces. ---------------------------------------------------------------------- */ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, ThrData *const thr) { const int nlocal = lmp->atom->nlocal; const int nghost = lmp->atom->nghost; const int nall = nlocal + nghost; const int nfirst = lmp->atom->nfirst; const int nthreads = lmp->comm->nthreads; const int evflag = eflag | vflag; const int tid = thr->get_tid(); double **f = lmp->atom->f; double **x = lmp->atom->x; int need_force_reduce = 1; if (evflag) sync_threads(); switch (thr_style) { case THR_PAIR: { Pair * const pair = lmp->force->pair; if (pair->vflag_fdotr) { // this is a non-hybrid pair style. compute per thread fdotr if (fix->last_pair_hybrid == NULL) { if (lmp->neighbor->includegroup == 0) thr->virial_fdotr_compute(x, nlocal, nghost, -1); else thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); } else { if (style == fix->last_pair_hybrid) { // pair_style hybrid will compute fdotr for us // but we first need to reduce the forces data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + fix->did_reduce(); need_force_reduce = 0; } } } if (evflag) { #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { pair->eng_vdwl += thr->eng_vdwl; pair->eng_coul += thr->eng_coul; thr->eng_vdwl = 0.0; thr->eng_coul = 0.0; } if (vflag & 3) for (int i=0; i < 6; ++i) { pair->virial[i] += thr->virial_pair[i]; thr->virial_pair[i] = 0.0; } } if (eflag & 2) { data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); } } } break; case THR_BOND: if (evflag) { Bond * const bond = lmp->force->bond; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { bond->energy += thr->eng_bond; thr->eng_bond = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { bond->virial[i] += thr->virial_bond[i]; thr->virial_bond[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_ANGLE: if (evflag) { Angle * const angle = lmp->force->angle; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { angle->energy += thr->eng_angle; thr->eng_angle = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { angle->virial[i] += thr->virial_angle[i]; thr->virial_angle[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_DIHEDRAL: if (evflag) { Dihedral * const dihedral = lmp->force->dihedral; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { dihedral->energy += thr->eng_dihed; thr->eng_dihed = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { dihedral->virial[i] += thr->virial_dihed[i]; thr->virial_dihed[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals if (evflag) { Dihedral * const dihedral = lmp->force->dihedral; Pair * const pair = lmp->force->pair; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { dihedral->energy += thr->eng_dihed; pair->eng_vdwl += thr->eng_vdwl; pair->eng_coul += thr->eng_coul; thr->eng_dihed = 0.0; thr->eng_vdwl = 0.0; thr->eng_coul = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { dihedral->virial[i] += thr->virial_dihed[i]; pair->virial[i] += thr->virial_pair[i]; thr->virial_dihed[i] = 0.0; thr->virial_pair[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_IMPROPER: if (evflag) { Improper *improper = lmp->force->improper; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { improper->energy += thr->eng_imprp; thr->eng_imprp = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { improper->virial[i] += thr->virial_imprp[i]; thr->virial_imprp[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_KSPACE: - // nothing to do. XXX need to add support for per-atom info + // nothing to do. XXX may need to add support for per-atom info + break; + + case THR_INTGR: + // nothing to do break; default: printf("tid:%d unhandled thr_style case %d\n", tid, thr_style); break; } if (style == fix->last_omp_style) { - if (need_force_reduce) + if (need_force_reduce) { data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + fix->did_reduce(); + } if (lmp->atom->torque) data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); } } /* ---------------------------------------------------------------------- tally eng_vdwl and eng_coul into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, ThrData * const thr) { if (pair->eflag_global) { if (newton_pair) { thr->eng_vdwl += evdwl; thr->eng_coul += ecoul; } else { const double evdwlhalf = 0.5*evdwl; const double ecoulhalf = 0.5*ecoul; if (i < nlocal) { thr->eng_vdwl += evdwlhalf; thr->eng_coul += ecoulhalf; } if (j < nlocal) { thr->eng_vdwl += evdwlhalf; thr->eng_coul += ecoulhalf; } } } if (pair->eflag_atom) { const double epairhalf = 0.5 * (evdwl + ecoul); if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf; if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf; } } /* helper functions */ static void v_tally(double * const vout, const double * const vin) { vout[0] += vin[0]; vout[1] += vin[1]; vout[2] += vin[2]; vout[3] += vin[3]; vout[4] += vin[4]; vout[5] += vin[5]; } static void v_tally(double * const vout, const double scale, const double * const vin) { vout[0] += scale*vin[0]; vout[1] += scale*vin[1]; vout[2] += scale*vin[2]; vout[3] += scale*vin[3]; vout[4] += scale*vin[4]; vout[5] += scale*vin[5]; } /* ---------------------------------------------------------------------- tally virial into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double * const v, ThrData * const thr) { if (pair->vflag_global) { double * const va = thr->virial_pair; if (newton_pair) { v_tally(va,v); } else { if (i < nlocal) v_tally(va,0.5,v); if (j < nlocal) v_tally(va,0.5,v); } } if (pair->vflag_atom) { if (newton_pair || i < nlocal) { double * const va = thr->vatom_pair[i]; v_tally(va,0.5,v); } if (newton_pair || j < nlocal) { double * const va = thr->vatom_pair[j]; v_tally(va,0.5,v); } } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into per thread global and per-atom accumulators need i < nlocal test since called by bond_quartic and dihedral_charmm ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, const double fpair, const double delx, const double dely, const double delz, ThrData * const thr) { if (pair->eflag_either) e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators for virial, have delx,dely,delz and fx,fy,fz ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, const double fx, const double fy, const double fz, const double delx, const double dely, const double delz, ThrData * const thr) { if (pair->eflag_either) e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { double v[6]; v[0] = delx*fx; v[1] = dely*fy; v[2] = delz*fz; v[3] = delx*fy; v[4] = delx*fz; v[5] = dely*fz; v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators called by SW and hbond potentials, newton_pair is always on virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk ------------------------------------------------------------------------- */ void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k, const double evdwl, const double ecoul, const double * const fj, const double * const fk, const double * const drji, const double * const drki, ThrData * const thr) { if (pair->eflag_either) { if (pair->eflag_global) { thr->eng_vdwl += evdwl; thr->eng_coul += ecoul; } if (pair->eflag_atom) { const double epairthird = THIRD * (evdwl + ecoul); thr->eatom_pair[i] += epairthird; thr->eatom_pair[j] += epairthird; thr->eatom_pair[k] += epairthird; } } if (pair->vflag_either) { double v[6]; v[0] = drji[0]*fj[0] + drki[0]*fk[0]; v[1] = drji[1]*fj[1] + drki[1]*fk[1]; v[2] = drji[2]*fj[2] + drki[2]*fk[2]; v[3] = drji[0]*fj[1] + drki[0]*fk[1]; v[4] = drji[0]*fj[2] + drki[0]*fk[2]; v[5] = drji[1]*fj[2] + drki[1]*fk[2]; if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { v_tally(thr->vatom_pair[i],THIRD,v); v_tally(thr->vatom_pair[j],THIRD,v); v_tally(thr->vatom_pair[k],THIRD,v); } } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j, const int k, const int m, const double evdwl, const double * const fi, const double * const fj, const double * const fk, const double * const drim, const double * const drjm, const double * const drkm, ThrData * const thr) { double v[6]; if (pair->eflag_either) { if (pair->eflag_global) thr->eng_vdwl += evdwl; if (pair->eflag_atom) { const double epairfourth = 0.25 * evdwl; thr->eatom_pair[i] += epairfourth; thr->eatom_pair[j] += epairfourth; thr->eatom_pair[k] += epairfourth; thr->eatom_pair[m] += epairfourth; } } if (pair->vflag_atom) { v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); v_tally(thr->vatom_pair[m],v); } } /* ---------------------------------------------------------------------- tally ecoul and virial into each of n atoms in list called by TIP4P potential, newton_pair is always on changes v values by dividing by n ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_list_thr(Pair * const pair, const int key, const int * const list, const double * const v, const double ecoul, const double alpha, ThrData * const thr) { int i; if (pair->eflag_either) { if (pair->eflag_global) thr->eng_coul += ecoul; if (pair->eflag_atom) { if (key == 0) { thr->eatom_pair[list[0]] += 0.5*ecoul; thr->eatom_pair[list[1]] += 0.5*ecoul; } else if (key == 1) { thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[1]] += 0.25*ecoul*alpha; thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.5*ecoul; } else if (key == 2) { thr->eatom_pair[list[0]] += 0.5*ecoul; thr->eatom_pair[list[1]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.25*ecoul*alpha; } else { thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[1]] += 0.25*ecoul*alpha; thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[4]] += 0.25*ecoul*alpha; thr->eatom_pair[list[5]] += 0.25*ecoul*alpha; } } } if (pair->vflag_either) { if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { if (key == 0) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]; thr->vatom_pair[list[1]][i] += 0.5*v[i]; } } else if (key == 1) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.5*v[i]; } } else if (key == 2) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]; thr->vatom_pair[list[1]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.25*v[i]*alpha; } } else { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[4]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[5]][i] += 0.25*v[i]*alpha; } } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal, const int newton_bond, const double ebond, const double fbond, const double delx, const double dely, const double delz, ThrData * const thr) { if (bond->eflag_either) { const double ebondhalf = 0.5*ebond; if (newton_bond) { if (bond->eflag_global) thr->eng_bond += ebond; if (bond->eflag_atom) { thr->eatom_bond[i] += ebondhalf; thr->eatom_bond[j] += ebondhalf; } } else { if (bond->eflag_global) { if (i < nlocal) thr->eng_bond += ebondhalf; if (j < nlocal) thr->eng_bond += ebondhalf; } if (bond->eflag_atom) { if (i < nlocal) thr->eatom_bond[i] += ebondhalf; if (j < nlocal) thr->eatom_bond[j] += ebondhalf; } } } if (bond->vflag_either) { double v[6]; v[0] = delx*delx*fbond; v[1] = dely*dely*fbond; v[2] = delz*delz*fbond; v[3] = delx*dely*fbond; v[4] = delx*delz*fbond; v[5] = dely*delz*fbond; if (bond->vflag_global) { if (newton_bond) v_tally(thr->virial_bond,v); else { if (i < nlocal) v_tally(thr->virial_bond,0.5,v); if (j < nlocal) v_tally(thr->virial_bond,0.5,v); } } if (bond->vflag_atom) { v[0] *= 0.5; v[1] *= 0.5; v[2] *= 0.5; v[3] *= 0.5; v[4] *= 0.5; v[5] *= 0.5; if (newton_bond) { v_tally(thr->vatom_bond[i],v); v_tally(thr->vatom_bond[j],v); } else { if (j < nlocal) v_tally(thr->vatom_bond[i],v); if (j < nlocal) v_tally(thr->vatom_bond[j],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k, const int nlocal, const int newton_bond, const double eangle, const double * const f1, const double * const f3, const double delx1, const double dely1, const double delz1, const double delx2, const double dely2, const double delz2, ThrData * const thr) { if (angle->eflag_either) { const double eanglethird = THIRD*eangle; if (newton_bond) { if (angle->eflag_global) thr->eng_angle += eangle; if (angle->eflag_atom) { thr->eatom_angle[i] += eanglethird; thr->eatom_angle[j] += eanglethird; thr->eatom_angle[k] += eanglethird; } } else { if (angle->eflag_global) { if (i < nlocal) thr->eng_angle += eanglethird; if (j < nlocal) thr->eng_angle += eanglethird; if (k < nlocal) thr->eng_angle += eanglethird; } if (angle->eflag_atom) { if (i < nlocal) thr->eatom_angle[i] += eanglethird; if (j < nlocal) thr->eatom_angle[j] += eanglethird; if (k < nlocal) thr->eatom_angle[k] += eanglethird; } } } if (angle->vflag_either) { double v[6]; v[0] = delx1*f1[0] + delx2*f3[0]; v[1] = dely1*f1[1] + dely2*f3[1]; v[2] = delz1*f1[2] + delz2*f3[2]; v[3] = delx1*f1[1] + delx2*f3[1]; v[4] = delx1*f1[2] + delx2*f3[2]; v[5] = dely1*f1[2] + dely2*f3[2]; if (angle->vflag_global) { if (newton_bond) { v_tally(thr->virial_angle,v); } else { int cnt = 0; if (i < nlocal) ++cnt; if (j < nlocal) ++cnt; if (k < nlocal) ++cnt; v_tally(thr->virial_angle,cnt*THIRD,v); } } if (angle->vflag_atom) { v[0] *= THIRD; v[1] *= THIRD; v[2] *= THIRD; v[3] *= THIRD; v[4] *= THIRD; v[5] *= THIRD; if (newton_bond) { v_tally(thr->vatom_angle[i],v); v_tally(thr->vatom_angle[j],v); v_tally(thr->vatom_angle[k],v); } else { if (j < nlocal) v_tally(thr->vatom_angle[i],v); if (j < nlocal) v_tally(thr->vatom_angle[j],v); if (k < nlocal) v_tally(thr->vatom_angle[k],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial from 1-3 repulsion of SDK angle into accumulators ------------------------------------------------------------------------- */ void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3, const int nlocal, const int newton_bond, const double epair, const double fpair, const double delx, const double dely, const double delz, ThrData * const thr) { if (angle->eflag_either) { const double epairhalf = 0.5 * epair; if (angle->eflag_global) { if (newton_bond || i1 < nlocal) thr->eng_angle += epairhalf; if (newton_bond || i3 < nlocal) thr->eng_angle += epairhalf; } if (angle->eflag_atom) { if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf; if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf; } } if (angle->vflag_either) { double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; if (angle->vflag_global) { double * const va = thr->virial_angle; if (newton_bond || i1 < nlocal) v_tally(va,0.5,v); if (newton_bond || i3 < nlocal) v_tally(va,0.5,v); } if (angle->vflag_atom) { if (newton_bond || i1 < nlocal) { double * const va = thr->vatom_angle[i1]; v_tally(va,0.5,v); } if (newton_bond || i3 < nlocal) { double * const va = thr->vatom_angle[i3]; v_tally(va,0.5,v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2, const int i3, const int i4, const int nlocal, const int newton_bond, const double edihedral, const double * const f1, const double * const f3, const double * const f4, const double vb1x, const double vb1y, const double vb1z, const double vb2x, const double vb2y, const double vb2z, const double vb3x, const double vb3y, const double vb3z, ThrData * const thr) { if (dihed->eflag_either) { if (dihed->eflag_global) { if (newton_bond) { thr->eng_dihed += edihedral; } else { const double edihedralquarter = 0.25*edihedral; int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter; } } if (dihed->eflag_atom) { const double edihedralquarter = 0.25*edihedral; if (newton_bond) { thr->eatom_dihed[i1] += edihedralquarter; thr->eatom_dihed[i2] += edihedralquarter; thr->eatom_dihed[i3] += edihedralquarter; thr->eatom_dihed[i4] += edihedralquarter; } else { if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter; if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter; if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter; if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter; } } } if (dihed->vflag_either) { double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; if (dihed->vflag_global) { if (newton_bond) { v_tally(thr->virial_dihed,v); } else { int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v); } } v[0] *= 0.25; v[1] *= 0.25; v[2] *= 0.25; v[3] *= 0.25; v[4] *= 0.25; v[5] *= 0.25; if (dihed->vflag_atom) { if (newton_bond) { v_tally(thr->vatom_dihed[i1],v); v_tally(thr->vatom_dihed[i2],v); v_tally(thr->vatom_dihed[i3],v); v_tally(thr->vatom_dihed[i4],v); } else { if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v); if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v); if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v); if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2, const int i3, const int i4, const int nlocal, const int newton_bond, const double eimproper, const double * const f1, const double * const f3, const double * const f4, const double vb1x, const double vb1y, const double vb1z, const double vb2x, const double vb2y, const double vb2z, const double vb3x, const double vb3y, const double vb3z, ThrData * const thr) { if (imprp->eflag_either) { if (imprp->eflag_global) { if (newton_bond) { thr->eng_imprp += eimproper; } else { const double eimproperquarter = 0.25*eimproper; int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter; } } if (imprp->eflag_atom) { const double eimproperquarter = 0.25*eimproper; if (newton_bond) { thr->eatom_imprp[i1] += eimproperquarter; thr->eatom_imprp[i2] += eimproperquarter; thr->eatom_imprp[i3] += eimproperquarter; thr->eatom_imprp[i4] += eimproperquarter; } else { if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter; if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter; if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter; if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter; } } } if (imprp->vflag_either) { double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; if (imprp->vflag_global) { if (newton_bond) { v_tally(thr->virial_imprp,v); } else { int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v); } } v[0] *= 0.25; v[1] *= 0.25; v[2] *= 0.25; v[3] *= 0.25; v[4] *= 0.25; v[5] *= 0.25; if (imprp->vflag_atom) { if (newton_bond) { v_tally(thr->vatom_imprp[i1],v); v_tally(thr->vatom_imprp[i2],v); v_tally(thr->vatom_imprp[i3],v); v_tally(thr->vatom_imprp[i4],v); } else { if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v); if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v); if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v); if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v); } } } } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO potential, newton_pair is always on fpair is magnitude of force on atom I ------------------------------------------------------------------------- */ void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair, const double * const drij, ThrData * const thr) { double v[6]; v[0] = 0.5 * drij[0]*drij[0]*fpair; v[1] = 0.5 * drij[1]*drij[1]*fpair; v[2] = 0.5 * drij[2]*drij[2]*fpair; v[3] = 0.5 * drij[0]*drij[1]*fpair; v[4] = 0.5 * drij[0]*drij[2]*fpair; v[5] = 0.5 * drij[1]*drij[2]*fpair; v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO and Tersoff potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::v_tally3_thr(const int i, const int j, const int k, const double * const fi, const double * const fj, const double * const drik, const double * const drjk, ThrData * const thr) { double v[6]; v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]); v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]); v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]); v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]); v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m, const double * const fi, const double * const fj, const double * const fk, const double * const drim, const double * const drjm, const double * const drkm, ThrData * const thr) { double v[6]; v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); v_tally(thr->vatom_pair[m],v); } /* ---------------------------------------------------------------------- */ double ThrOMP::memory_usage_thr() { double bytes=0.0; return bytes; } diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h index 587c24f38..9348058d8 100644 --- a/src/USER-OMP/thr_omp.h +++ b/src/USER-OMP/thr_omp.h @@ -1,208 +1,208 @@ /* -*- c++ -*- ------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #ifndef LMP_THR_OMP_H #define LMP_THR_OMP_H #include "pointers.h" #include "error.h" #include "fix_omp.h" #include "thr_data.h" namespace LAMMPS_NS { // forward declarations class Pair; class Bond; class Angle; class Dihedral; class Improper; class KSpace; class Fix; class ThrOMP { protected: LAMMPS *lmp; // reference to base lammps object. FixOMP *fix; // pointer to fix_omp; const int thr_style; int thr_error; public: ThrOMP(LAMMPS *, int); virtual ~ThrOMP(); double memory_usage_thr(); inline void sync_threads() { #if defined(_OPENMP) #pragma omp barrier #endif { ; } }; enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2, THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5, - THR_CHARMM=1<<6, /*THR_PROXY=1<<7,*/ THR_HYBRID=1<<8, - THR_FIX=1<<9}; + THR_CHARMM=1<<6, /*THR_PROXY=1<<7,THR_HYBRID=1<<8, */ + THR_FIX=1<<9,THR_INTGR=1<<10}; protected: // extra ev_tally setup work for threaded styles void ev_setup_thr(int, int, int, double *, double **, ThrData *); // compute global per thread virial contribution from per-thread force void virial_fdotr_compute_thr(double * const, const double * const * const, const double * const * const, const int, const int, const int); // reduce per thread data as needed void reduce_thr(void * const style, const int eflag, const int vflag, ThrData * const thr); // thread safe variant error abort support. // signals an error condition in any thread by making // thr_error > 0, if condition "cond" is true. // will abort from thread 0 if thr_error is > 0 // otherwise return true. // returns false if no error found on any thread. // use return value to jump/return to end of threaded region. bool check_error_thr(const bool cond, const int tid, const char *fname, const int line, const char *errmsg) { if (cond) { #if defined(_OPENMP) #pragma omp atomic ++thr_error; #endif if (tid > 0) return true; else lmp->error->one(fname,line,errmsg); } else { if (thr_error > 0) { if (tid == 0) lmp->error->one(fname,line,errmsg); else return true; } else return false; } return false; }; protected: // threading adapted versions of the ev_tally infrastructure // style specific versions (need access to style class flags) // Pair void e_tally_thr(Pair * const, const int, const int, const int, const int, const double, const double, ThrData * const); void v_tally_thr(Pair * const, const int, const int, const int, const int, const double * const, ThrData * const); void ev_tally_thr(Pair * const, const int, const int, const int, const int, const double, const double, const double, const double, const double, const double, ThrData * const); void ev_tally_xyz_thr(Pair * const, const int, const int, const int, const int, const double, const double, const double, const double, const double, const double, const double, const double, ThrData * const); void ev_tally3_thr(Pair * const, const int, const int, const int, const double, const double, const double * const, const double * const, const double * const, const double * const, ThrData * const); void ev_tally4_thr(Pair * const, const int, const int, const int, const int, const double, const double * const, const double * const, const double * const, const double * const, const double * const, const double * const, ThrData * const); // Bond void ev_tally_thr(Bond * const, const int, const int, const int, const int, const double, const double, const double, const double, const double, ThrData * const); // Angle void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int, const double, const double * const, const double * const, const double, const double, const double, const double, const double, const double, ThrData * const thr); void ev_tally13_thr(Angle * const, const int, const int, const int, const int, const double, const double, const double, const double, const double, ThrData * const thr); // Dihedral void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int, const int, const double, const double * const, const double * const, const double * const, const double, const double, const double, const double, const double, const double, const double, const double, const double, ThrData * const); // Improper void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int, const int, const double, const double * const, const double * const, const double * const, const double, const double, const double, const double, const double, const double, const double, const double, const double, ThrData * const); // style independent versions void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const); void v_tally3_thr(const int, const int, const int, const double * const, const double * const, const double * const, const double * const, ThrData * const); void v_tally4_thr(const int, const int, const int, const int, const double * const, const double * const, const double * const, const double * const, const double * const, const double * const, ThrData * const); void ev_tally_list_thr(Pair * const, const int, const int * const, const double * const, const double, const double, ThrData * const); }; // set loop range thread id, and force array offset for threaded runs. static inline void loop_setup_thr(int &ifrom, int &ito, int &tid, int inum, int nthreads) { #if defined(_OPENMP) tid = omp_get_thread_num(); // each thread works on a fixed chunk of atoms. const int idelta = 1 + inum/nthreads; ifrom = tid*idelta; ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else tid = 0; ifrom = 0; ito = inum; #endif } // helpful definitions to help compilers optimizing code better typedef struct { double x,y,z; } dbl3_t; typedef struct { double x,y,z,w; } dbl4_t; typedef struct { int a,b,t; } int3_t; typedef struct { int a,b,c,t; } int4_t; typedef struct { int a,b,c,d,t; } int5_t; } #ifdef _noalias #undef _noalias #endif #if defined(__GNUC__) #define _noalias __restrict #else #define _noalias #endif #endif