diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh index 6af859952..db0beb521 100644 --- a/src/USER-OMP/Install.sh +++ b/src/USER-OMP/Install.sh @@ -1,32 +1,20 @@ # Install/unInstall package files in LAMMPS # do not install child files if parent does not exist -if (test $1 = 1) then - -# if (test -e ../pair_lj_cut_coul_long.cpp) then -# cp pair_lj_cut_coul_long_omp.cpp .. -# cp pair_lj_cut_coul_long_omp.h .. -# fi - - cp pair_lj_cut_omp.cpp .. - - cp thr_omp.cpp .. - - cp pair_lj_cut_omp.h .. - - cp thr_omp.h .. - -elif (test $1 = 0) then - -# rm -f ../pair_lj_cut_coul_long_omp.cpp - rm -f ../pair_lj_cut_omp.cpp - - rm -f ../thr_omp.cpp - -# rm -f ../pair_lj_cut_coul_long_omp.h - rm -f ../pair_lj_cut_omp.h - - rm -f ../thr_omp.h - -fi - +for file in *_omp.cpp *_omp.h; do + # let us see if the "rain man" can count the toothpicks... + ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` + + if (test $1 = 1) then + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then + : # always install those files. + elif (test ! -e ../$ofile) then + continue + fi + + cp $file .. + + elif (test $1 = 0) then + rm -f ../$file + fi +done diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh index ecd2ebee1..5a004c918 100644 --- a/src/USER-OMP/Package.sh +++ b/src/USER-OMP/Package.sh @@ -1,21 +1,28 @@ -#/bin/sh # Update package files in LAMMPS -# copy package file to src if it doesn't exists or is different -# do not copy gayberne files if non-GPU version does not exist +# cp package file to src if doesn't exist or is different +# do not copy certain files if non-OMP versions do not exist +# do remove OpenMP style files that have no matching +# non-OpenMP version installed, e.g. after a package has been removed + for file in *_omp.cpp *_omp.h; do # let us see if the "rain man" can count the toothpicks... ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then - : # do check for those files. + : # always check for those files. elif (test ! -e ../$ofile) then + if (test -e ../$file) then + echo " removing src/$file" + rm -f ../$file + fi continue fi + if (test ! -e ../$file) then echo " creating src/$file" cp $file .. elif ! cmp -s $file ../$file ; then echo " updating src/$file" cp $file .. fi done diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp new file mode 100644 index 000000000..63bfc4327 --- /dev/null +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -0,0 +1,328 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_charmm_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void DihedralCharmmOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + // insure pair->ev_tally() will use 1-4 virial contribution + + if (weightflag && vflag_global == 2) + force->pair->vflag_either = force->pair->vflag_global = 1; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + + // reduce contributions to non-bonded energy terms + for (int n = 0; n < nthreads; ++n) { + force->pair->eng_vdwl += eng_vdwl_thr[n]; + force->pair->eng_coul += eng_coul_thr[n]; + } +} + +template +void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,i,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,p,sx2,sy2,sz2; + int itype,jtype; + double delx,dely,delz,rsq,r2inv,r6inv; + double forcecoul,forcelj,fpair,ecoul,evdwl; + + edihedral = 0.0; + + double **x = atom->x; + double *q = atom->q; + int *atomtype = atom->type; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + double qqrd2e = force->qqrd2e; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + p = 1.0; + df1 = 0.0; + + for (i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*cos_shift[type] + df1*sin_shift[type]; + df1 = df1*cos_shift[type] - ddf1*sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + cos_shift[type]; + df1 = 0.0; + } + + if (EFLAG) edihedral = k[type] * p; + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + df = -k[type] * df1; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + // 1-4 LJ and Coulomb interactions + // tally energy/virial in pair, using newton_bond as newton flag + + if (weight[type] > 0.0) { + itype = atomtype[i1]; + jtype = atomtype[i4]; + + delx = x[i1][0] - x[i4][0]; + dely = x[i1][1] - x[i4][1]; + delz = x[i1][2] - x[i4][2]; + domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + + if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; + else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + forcelj = r6inv * (lj14_1[itype][jtype]*r6inv - lj14_2[itype][jtype]); + fpair = weight[type] * (forcelj+forcecoul)*r2inv; + + if (EFLAG) { + ecoul = weight[type] * forcecoul; + evdwl = r6inv * (lj14_3[itype][jtype]*r6inv - lj14_4[itype][jtype]); + evdwl *= weight[type]; + } + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fpair; + f[i1][1] += dely*fpair; + f[i1][2] += delz*fpair; + } + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] -= delx*fpair; + f[i4][1] -= dely*fpair; + f[i4][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND, + evdwl,ecoul,fpair,delx,dely,delz,tid); + } + } +} + diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h new file mode 100644 index 000000000..a39ad83f7 --- /dev/null +++ b/src/USER-OMP/dihedral_charmm_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(charmm/omp,DihedralCharmmOMP) + +#else + +#ifndef LMP_DIHEDRAL_CHARMM_OMP_H +#define LMP_DIHEDRAL_CHARMM_OMP_H + +#include "dihedral_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP { + + public: + DihedralCharmmOMP(class LAMMPS *lmp) : + DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp new file mode 100644 index 000000000..734829664 --- /dev/null +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -0,0 +1,532 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.0000001 + +/* ---------------------------------------------------------------------- */ + +void DihedralClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,i,j,k,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral; + double r1mag2,r1,r2mag2,r2,r3mag2,r3; + double sb1,rb1,sb2,rb2,sb3,rb3,c0,r12c1; + double r12c2,costh12,costh13,costh23,sc1,sc2,s1,s2,c; + double cosphi,phi,sinphi,a11,a22,a33,a12,a13,a23,sx1,sx2; + double sx12,sy1,sy2,sy12,sz1,sz2,sz12,dphi1,dphi2,dphi3; + double de_dihedral,t1,t2,t3,t4,cos2phi,cos3phi,bt1,bt2; + double bt3,sumbte,db,sumbtf,at1,at2,at3,da,da1,da2,r1_0; + double r3_0,dr1,dr2,tk1,tk2,s12,sin2; + double dcosphidr[4][3],dphidr[4][3],dbonddr[3][4][3],dthetadr[2][4][3]; + double fabcd[4][3]; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // distances + + r1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + r1 = sqrt(r1mag2); + r2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + r2 = sqrt(r2mag2); + r3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + r3 = sqrt(r3mag2); + + sb1 = 1.0/r1mag2; + rb1 = 1.0/r1; + sb2 = 1.0/r2mag2; + rb2 = 1.0/r2; + sb3 = 1.0/r3mag2; + rb3 = 1.0/r3; + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // angles + + r12c1 = rb1*rb2; + r12c2 = rb2*rb3; + costh12 = (vb1x*vb2x + vb1y*vb2y + vb1z*vb2z) * r12c1; + costh13 = c0; + costh23 = (vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z) * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - costh12*costh12,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - costh23*costh23,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + costh12*costh23) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me; + MPI_Comm_rank(world,&me); + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d", + me,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + cosphi = c; + phi = acos(c); + + sinphi = sqrt(1.0 - c*c); + sinphi = MAX(sinphi,SMALL); + + a11 = -c*sb1*s1; + a22 = sb2 * (2.0*costh13*s12 - c*(s1+s2)); + a33 = -c*sb3*s2; + a12 = r12c1 * (costh12*c*s1 + costh23*s12); + a13 = rb1*rb3*s12; + a23 = r12c2 * (-costh23*c*s2 - costh12*s12); + + sx1 = a11*vb1x + a12*vb2x + a13*vb3x; + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sx12 = a13*vb1x + a23*vb2x + a33*vb3x; + sy1 = a11*vb1y + a12*vb2y + a13*vb3y; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sy12 = a13*vb1y + a23*vb2y + a33*vb3y; + sz1 = a11*vb1z + a12*vb2z + a13*vb3z; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + sz12 = a13*vb1z + a23*vb2z + a33*vb3z; + + // set up d(cos(phi))/d(r) and dphi/dr arrays + + dcosphidr[0][0] = -sx1; + dcosphidr[0][1] = -sy1; + dcosphidr[0][2] = -sz1; + dcosphidr[1][0] = sx2 + sx1; + dcosphidr[1][1] = sy2 + sy1; + dcosphidr[1][2] = sz2 + sz1; + dcosphidr[2][0] = sx12 - sx2; + dcosphidr[2][1] = sy12 - sy2; + dcosphidr[2][2] = sz12 - sz2; + dcosphidr[3][0] = -sx12; + dcosphidr[3][1] = -sy12; + dcosphidr[3][2] = -sz12; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + dphidr[i][j] = -dcosphidr[i][j] / sinphi; + + // energy + + dphi1 = phi - phi1[type]; + dphi2 = 2.0*phi - phi2[type]; + dphi3 = 3.0*phi - phi3[type]; + + if (EFLAG) edihedral = k1[type]*(1.0 - cos(dphi1)) + + k2[type]*(1.0 - cos(dphi2)) + + k3[type]*(1.0 - cos(dphi3)); + + de_dihedral = k1[type]*sin(dphi1) + 2.0*k2[type]*sin(dphi2) + + 3.0*k3[type]*sin(dphi3); + + // torsion forces on all 4 atoms + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = de_dihedral*dphidr[i][j]; + + // set up d(bond)/d(r) array + // dbonddr(i,j,k) = bond i, atom j, coordinate k + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dbonddr[i][j][k] = 0.0; + + // bond1 + + dbonddr[0][0][0] = vb1x / r1; + dbonddr[0][0][1] = vb1y / r1; + dbonddr[0][0][2] = vb1z / r1; + dbonddr[0][1][0] = -vb1x / r1; + dbonddr[0][1][1] = -vb1y / r1; + dbonddr[0][1][2] = -vb1z / r1; + + // bond2 + + dbonddr[1][1][0] = vb2x / r2; + dbonddr[1][1][1] = vb2y / r2; + dbonddr[1][1][2] = vb2z / r2; + dbonddr[1][2][0] = -vb2x / r2; + dbonddr[1][2][1] = -vb2y / r2; + dbonddr[1][2][2] = -vb2z / r2; + + // bond3 + + dbonddr[2][2][0] = vb3x / r3; + dbonddr[2][2][1] = vb3y / r3; + dbonddr[2][2][2] = vb3z / r3; + dbonddr[2][3][0] = -vb3x / r3; + dbonddr[2][3][1] = -vb3y / r3; + dbonddr[2][3][2] = -vb3z / r3; + + // set up d(theta)/d(r) array + // dthetadr(i,j,k) = angle i, atom j, coordinate k + + for (i = 0; i < 2; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dthetadr[i][j][k] = 0.0; + + t1 = costh12 / r1mag2; + t2 = costh23 / r2mag2; + t3 = costh12 / r2mag2; + t4 = costh23 / r3mag2; + + // angle12 + + dthetadr[0][0][0] = sc1 * ((t1 * vb1x) - (vb2x * r12c1)); + dthetadr[0][0][1] = sc1 * ((t1 * vb1y) - (vb2y * r12c1)); + dthetadr[0][0][2] = sc1 * ((t1 * vb1z) - (vb2z * r12c1)); + + dthetadr[0][1][0] = sc1 * ((-t1 * vb1x) + (vb2x * r12c1) + + (-t3 * vb2x) + (vb1x * r12c1)); + dthetadr[0][1][1] = sc1 * ((-t1 * vb1y) + (vb2y * r12c1) + + (-t3 * vb2y) + (vb1y * r12c1)); + dthetadr[0][1][2] = sc1 * ((-t1 * vb1z) + (vb2z * r12c1) + + (-t3 * vb2z) + (vb1z * r12c1)); + + dthetadr[0][2][0] = sc1 * ((t3 * vb2x) - (vb1x * r12c1)); + dthetadr[0][2][1] = sc1 * ((t3 * vb2y) - (vb1y * r12c1)); + dthetadr[0][2][2] = sc1 * ((t3 * vb2z) - (vb1z * r12c1)); + + // angle23 + + dthetadr[1][1][0] = sc2 * ((t2 * vb2x) + (vb3x * r12c2)); + dthetadr[1][1][1] = sc2 * ((t2 * vb2y) + (vb3y * r12c2)); + dthetadr[1][1][2] = sc2 * ((t2 * vb2z) + (vb3z * r12c2)); + + dthetadr[1][2][0] = sc2 * ((-t2 * vb2x) - (vb3x * r12c2) + + (t4 * vb3x) + (vb2x * r12c2)); + dthetadr[1][2][1] = sc2 * ((-t2 * vb2y) - (vb3y * r12c2) + + (t4 * vb3y) + (vb2y * r12c2)); + dthetadr[1][2][2] = sc2 * ((-t2 * vb2z) - (vb3z * r12c2) + + (t4 * vb3z) + (vb2z * r12c2)); + + dthetadr[1][3][0] = -sc2 * ((t4 * vb3x) + (vb2x * r12c2)); + dthetadr[1][3][1] = -sc2 * ((t4 * vb3y) + (vb2y * r12c2)); + dthetadr[1][3][2] = -sc2 * ((t4 * vb3z) + (vb2z * r12c2)); + + // mid-bond/torsion coupling + // energy on bond2 (middle bond) + + cos2phi = cos(2.0*phi); + cos3phi = cos(3.0*phi); + + bt1 = mbt_f1[type] * cosphi; + bt2 = mbt_f2[type] * cos2phi; + bt3 = mbt_f3[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + db = r2 - mbt_r0[type]; + if (EFLAG) edihedral += db * sumbte; + + // force on bond2 + + bt1 = -mbt_f1[type] * sinphi; + bt2 = -2.0 * mbt_f2[type] * sin(2.0*phi); + bt3 = -3.0 * mbt_f3[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[1][i][j]; + + // end-bond/torsion coupling + // energy on bond1 (first bond) + + bt1 = ebt_f1_1[type] * cosphi; + bt2 = ebt_f2_1[type] * cos2phi; + bt3 = ebt_f3_1[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + + db = r1 - ebt_r0_1[type]; + if (EFLAG) edihedral += db * (bt1+bt2+bt3); + + // force on bond1 + + bt1 = ebt_f1_1[type] * sinphi; + bt2 = 2.0 * ebt_f2_1[type] * sin(2.0*phi); + bt3 = 3.0 * ebt_f3_1[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= db*sumbtf*dphidr[i][j] + sumbte*dbonddr[0][i][j]; + + // end-bond/torsion coupling + // energy on bond3 (last bond) + + bt1 = ebt_f1_2[type] * cosphi; + bt2 = ebt_f2_2[type] * cos2phi; + bt3 = ebt_f3_2[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + + db = r3 - ebt_r0_2[type]; + if (EFLAG) edihedral += db * (bt1+bt2+bt3); + + // force on bond3 + + bt1 = -ebt_f1_2[type] * sinphi; + bt2 = -2.0 * ebt_f2_2[type] * sin(2.0*phi); + bt3 = -3.0 * ebt_f3_2[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[2][i][j]; + + // angle/torsion coupling + // energy on angle1 + + at1 = at_f1_1[type] * cosphi; + at2 = at_f2_1[type] * cos2phi; + at3 = at_f3_1[type] * cos3phi; + sumbte = at1 + at2 + at3; + + da = acos(costh12) - at_theta0_1[type]; + if (EFLAG) edihedral += da * (at1+at2+at3); + + // force on angle1 + + bt1 = at_f1_1[type] * sinphi; + bt2 = 2.0 * at_f2_1[type] * sin(2.0*phi); + bt3 = 3.0 * at_f3_1[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= da*sumbtf*dphidr[i][j] + sumbte*dthetadr[0][i][j]; + + // energy on angle2 + + at1 = at_f1_2[type] * cosphi; + at2 = at_f2_2[type] * cos2phi; + at3 = at_f3_2[type] * cos3phi; + sumbte = at1 + at2 + at3; + + da = acos(costh23) - at_theta0_2[type]; + if (EFLAG) edihedral += da * (at1+at2+at3); + + // force on angle2 + + bt1 = -at_f1_2[type] * sinphi; + bt2 = -2.0 * at_f2_2[type] * sin(2.0*phi); + bt3 = -3.0 * at_f3_2[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += da*sumbtf*dphidr[i][j] + sumbte*dthetadr[1][i][j]; + + // angle/angle/torsion coupling + + da1 = acos(costh12) - aat_theta0_1[type]; + da2 = acos(costh23) - aat_theta0_2[type]; + + if (EFLAG) edihedral += aat_k[type]*da1*da2*cosphi; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= aat_k[type] * + (cosphi * (da2*dthetadr[0][i][j] - da1*dthetadr[1][i][j]) + + sinphi * da1*da2*dphidr[i][j]); + + // bond1/bond3 coupling + + if (fabs(bb13t_k[type]) > SMALL) { + + r1_0 = bb13t_r10[type]; + r3_0 = bb13t_r30[type]; + dr1 = r1 - r1_0; + dr2 = r3 - r3_0; + tk1 = -bb13t_k[type] * dr1 / r3; + tk2 = -bb13t_k[type] * dr2 / r1; + + if (EFLAG) edihedral += bb13t_k[type]*dr1*dr2; + + fabcd[0][0] += tk2 * vb1x; + fabcd[0][1] += tk2 * vb1y; + fabcd[0][2] += tk2 * vb1z; + + fabcd[1][0] -= tk2 * vb1x; + fabcd[1][1] -= tk2 * vb1y; + fabcd[1][2] -= tk2 * vb1z; + + fabcd[2][0] -= tk1 * vb3x; + fabcd[2][1] -= tk1 * vb3y; + fabcd[2][2] -= tk1 * vb3z; + + fabcd[3][0] += tk1 * vb3x; + fabcd[3][1] += tk1 * vb3y; + fabcd[3][2] += tk1 * vb3z; + } + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral, + fabcd[0],fabcd[2],fabcd[3], + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} + diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h new file mode 100644 index 000000000..d26f2f871 --- /dev/null +++ b/src/USER-OMP/dihedral_class2_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(class2/omp,DihedralClass2OMP) + +#else + +#ifndef LMP_DIHEDRAL_CLASS2_OMP_H +#define LMP_DIHEDRAL_CLASS2_OMP_H + +#include "dihedral_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralClass2OMP : public DihedralClass2, public ThrOMP { + + public: + DihedralClass2OMP(class LAMMPS *lmp) : + DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp new file mode 100644 index 000000000..a6c027e92 --- /dev/null +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -0,0 +1,263 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_cosine_shift_exp_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,sx2,sy2,sz2; + double cccpsss,cssmscc,exp2; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + double aa=a[type]; + double uumin=umin[type]; + + cccpsss = c*cost[type]+s*sint[type]; + cssmscc = c*sint[type]-s*cost[type]; + + if (doExpansion[type]) { + // |a|<0.001 so use expansions relative precision <1e-5 + if (EFLAG) edihedral = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin; + df=0.5*uumin*( cssmscc + 0.5*aa*cccpsss); + } else { + exp2=exp(0.5*aa*(1+cccpsss)); + if (EFLAG) edihedral = opt1[type]*(1-exp2); + df= 0.5*opt1[type]*aa* ( exp2*cssmscc ); + } + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} + diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h new file mode 100644 index 000000000..eb906ab95 --- /dev/null +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(cosine/shift/exp/omp,DihedralCosineShiftExpOMP) + +#else + +#ifndef LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H +#define LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H + +#include "dihedral_cosine_shift_exp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP { + + public: + DihedralCosineShiftExpOMP(class LAMMPS *lmp) : + DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp new file mode 100644 index 000000000..0fa24090a --- /dev/null +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -0,0 +1,270 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void DihedralHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,i,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,p,sx2,sy2,sz2; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + p = 1.0; + df1 = 0.0; + + for (i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*cos_shift[type] + df1*sin_shift[type]; + df1 = df1*cos_shift[type] - ddf1*sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + cos_shift[type]; + df1 = 0.0; + } + + if (EFLAG) edihedral = k[type] * p; + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + df = -k[type] * df1; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} + diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h new file mode 100644 index 000000000..2d7bae64e --- /dev/null +++ b/src/USER-OMP/dihedral_harmonic_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(harmonic/omp,DihedralHarmonicOMP) + +#else + +#ifndef LMP_DIHEDRAL_HARMONIC_OMP_H +#define LMP_DIHEDRAL_HARMONIC_OMP_H + +#include "dihedral_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP { + + public: + DihedralHarmonicOMP(class LAMMPS *lmp) : + DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp new file mode 100644 index 000000000..a3ca969ef --- /dev/null +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -0,0 +1,280 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_helix_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +void DihedralHelixOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + cx = vb1y*vb2z - vb1z*vb2y; + cy = vb1z*vb2x - vb1x*vb2z; + cz = vb1x*vb2y - vb1y*vb2x; + cmag = sqrt(cx*cx + cy*cy + cz*cz); + dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + phi = acos(c); + if (dx < 0.0) phi *= -1.0; + si = sin(phi); + if (fabs(si) < SMALLER) si = SMALLER; + siinv = 1.0/si; + + pd = -aphi[type] + 3.0*bphi[type]*sin(3.0*phi)*siinv + + cphi[type]*sin(phi + 0.25*PI)*siinv; + + if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) + + cphi[type]*(1.0 + cos(phi + 0.25*PI)); +; + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2 * (c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h new file mode 100644 index 000000000..792319741 --- /dev/null +++ b/src/USER-OMP/dihedral_helix_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(helix/omp,DihedralHelixOMP) + +#else + +#ifndef LMP_DIHEDRAL_HELIX_OMP_H +#define LMP_DIHEDRAL_HELIX_OMP_H + +#include "dihedral_helix.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralHelixOMP : public DihedralHelix, public ThrOMP { + + public: + DihedralHelixOMP(class LAMMPS *lmp) : + DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp new file mode 100644 index 000000000..bde958984 --- /dev/null +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -0,0 +1,269 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_multi_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,sin2; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = sum (i=1,5) a_i * c**(i-1) + // pd = dp/dc + + pd = a2[type] + c*(2.0*a3[type] + c*(3.0*a4[type] + c*4.0*a5[type])); + + if (EFLAG) + edihedral = a1[type] + c*(a2[type] + c*(a3[type] + c*(a4[type] + c*a5[type]))); + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1*(c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2*(c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h new file mode 100644 index 000000000..da2322f03 --- /dev/null +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(multi/harmonic/omp,DihedralMultiHarmonicOMP) + +#else + +#ifndef LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H +#define LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H + +#include "dihedral_multi_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP { + + public: + DihedralMultiHarmonicOMP(class LAMMPS *lmp) : + DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp new file mode 100644 index 000000000..9f59e26d2 --- /dev/null +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -0,0 +1,286 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_opls_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +void DihedralOPLSOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2; + + edihedral = 0.0; + + double **x = atom->x; + int **dihedrallist = neighbor->dihedrallist; + int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + cx = vb1y*vb2z - vb1z*vb2y; + cy = vb1z*vb2x - vb1x*vb2z; + cz = vb1x*vb2y - vb1y*vb2x; + cmag = sqrt(cx*cx + cy*cy + cz*cz); + dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = sum (i=1,4) k_i * (1 + (-1)**(i+1)*cos(i*phi) ) + // pd = dp/dc + + phi = acos(c); + if (dx < 0.0) phi *= -1.0; + si = sin(phi); + if (fabs(si) < SMALLER) si = SMALLER; + siinv = 1.0/si; + + pd = k1[type] - 2.0*k2[type]*sin(2.0*phi)*siinv + + 3.0*k3[type]*sin(3.0*phi)*siinv - 4.0*k4[type]*sin(4.0*phi)*siinv; + + if (EFLAG) edihedral = k1[type]*(1.0 + c) + k2[type]*(1.0 - cos(2.0*phi)) + + k3[type]*(1.0 + cos(3.0*phi)) + k4[type]*(1.0 - cos(4.0*phi)); + + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2 * (c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid); + } +} + diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h new file mode 100644 index 000000000..58b992053 --- /dev/null +++ b/src/USER-OMP/dihedral_opls_omp.h @@ -0,0 +1,48 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(opls/omp,DihedralOPLSOMP) + +#else + +#ifndef LMP_DIHEDRAL_OPLS_OMP_H +#define LMP_DIHEDRAL_OPLS_OMP_H + +#include "dihedral_opls.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP { + + public: + DihedralOPLSOMP(class LAMMPS *lmp) : + DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {}; + + virtual void compute(int, int); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_gravity_omp.cpp b/src/USER-OMP/fix_gravity_omp.cpp new file mode 100644 index 000000000..c4f4b39b6 --- /dev/null +++ b/src/USER-OMP/fix_gravity_omp.cpp @@ -0,0 +1,114 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "fix_gravity_omp.h" +#include "atom.h" +#include "update.h" +#include "domain.h" +#include "respa.h" +#include "error.h" + +using namespace LAMMPS_NS; + +enum{CHUTE,SPHERICAL,GRADIENT,VECTOR}; + +/* ---------------------------------------------------------------------- */ + +FixGravityOMP::FixGravityOMP(LAMMPS *lmp, int narg, char **arg) : + FixGravity(lmp, narg, arg) { } + +/* ---------------------------------------------------------------------- */ + +void FixGravityOMP::post_force(int vflag) +{ + // update direction of gravity vector if gradient style + + if (style == GRADIENT) { + if (domain->dimension == 3) { + double phi_current = degree2rad * + (phi + (update->ntimestep - time_origin)*dt*phigrad*360.0); + double theta_current = degree2rad * + (theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0); + xgrav = sin(theta_current) * cos(phi_current); + ygrav = sin(theta_current) * sin(phi_current); + zgrav = cos(theta_current); + } else { + double theta_current = degree2rad * + (theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0); + xgrav = sin(theta_current); + ygrav = cos(theta_current); + } + xacc = magnitude*xgrav; + yacc = magnitude*ygrav; + zacc = magnitude*zgrav; + } + + const double * const * const x = atom->x; + double * const * const f = atom->f; + double * const rmass = atom->rmass; + double * const mass = atom->mass; + int * const mask = atom->mask; + int * const type = atom->type; + const int nlocal = atom->nlocal; + const double xacc_thr = xacc; + const double yacc_thr = yacc; + const double zacc_thr = zacc; + double massone; + + int i; + eflag = 0; + double grav = 0.0; + + if (rmass) { +#if defined(_OPENMP) +#pragma omp parallel for private(i,massone) default(none) reduction(-:grav) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + massone = rmass[i]; + f[i][0] += massone*xacc_thr; + f[i][1] += massone*yacc_thr; + f[i][2] += massone*zacc_thr; + grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]); + } + } else { +#if defined(_OPENMP) +#pragma omp parallel for private(i,massone) default(none) reduction(-:grav) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + massone = mass[type[i]]; + f[i][0] += massone*xacc_thr; + f[i][1] += massone*yacc_thr; + f[i][2] += massone*zacc_thr; + grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]); + } + } + egrav = grav; +} + +/* ---------------------------------------------------------------------- */ + +void FixGravityOMP::post_force_respa(int vflag, int ilevel, int iloop) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} + diff --git a/src/USER-OMP/fix_gravity_omp.h b/src/USER-OMP/fix_gravity_omp.h new file mode 100644 index 000000000..dd0144410 --- /dev/null +++ b/src/USER-OMP/fix_gravity_omp.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(gravity/omp,FixGravityOMP) + +#else + +#ifndef LMP_FIX_GRAVITY_OMP_H +#define LMP_FIX_GRAVITY_OMP_H + +#include "fix_gravity.h" + +namespace LAMMPS_NS { + +class FixGravityOMP : public FixGravity { + + public: + FixGravityOMP(class LAMMPS *, int, char **); + virtual void post_force(int); + virtual void post_force_respa(int, int, int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp new file mode 100644 index 000000000..a642b21f2 --- /dev/null +++ b/src/USER-OMP/fix_nve_sphere_omp.cpp @@ -0,0 +1,140 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "string.h" +#include "fix_nve_sphere_omp.h" +#include "atom.h" +#include "atom_vec.h" +#include "update.h" +#include "respa.h" +#include "force.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define INERTIA 0.4 // moment of inertia prefactor for sphere + +enum{NONE,DIPOLE}; + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ + +void FixNVESphereOMP::initial_integrate(int vflag) +{ + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + double **omega = atom->omega; + double **torque = atom->torque; + double *radius = atom->radius; + double *rmass = atom->rmass; + int *mask = atom->mask; + int nlocal = atom->nlocal; + int i; + + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + // set timestep here since dt may have changed or come via rRESPA + const double dtfrotate = dtf / INERTIA; + + // update v,x,omega for all particles + // d_omega/dt = torque / inertia +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(shared) +#endif + for (i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double dtfm = dtf / rmass[i]; + v[i][0] += dtfm * f[i][0]; + v[i][1] += dtfm * f[i][1]; + v[i][2] += dtfm * f[i][2]; + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + + const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]); + omega[i][0] += dtirotate * torque[i][0]; + omega[i][1] += dtirotate * torque[i][1]; + omega[i][2] += dtirotate * torque[i][2]; + } + } + + // update mu for dipoles + // d_mu/dt = omega cross mu + // renormalize mu to dipole length + + if (extra == DIPOLE) { + double **mu = atom->mu; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(shared) +#endif + for (i = 0; i < nlocal; i++) { + double g0,g1,g2,msq,scale; + if (mask[i] & groupbit) { + if (mu[i][3] > 0.0) { + g0 = mu[i][0] + dtv * (omega[i][1]*mu[i][2]-omega[i][2]*mu[i][1]); + g1 = mu[i][1] + dtv * (omega[i][2]*mu[i][0]-omega[i][0]*mu[i][2]); + g2 = mu[i][2] + dtv * (omega[i][0]*mu[i][1]-omega[i][1]*mu[i][0]); + msq = g0*g0 + g1*g1 + g2*g2; + scale = mu[i][3]/sqrt(msq); + mu[i][0] = g0*scale; + mu[i][1] = g1*scale; + mu[i][2] = g2*scale; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixNVESphereOMP::final_integrate() +{ + double **v = atom->v; + double **f = atom->f; + double **omega = atom->omega; + double **torque = atom->torque; + double *rmass = atom->rmass; + double *radius = atom->radius; + int *mask = atom->mask; + int nlocal = atom->nlocal; + int i; + + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + // set timestep here since dt may have changed or come via rRESPA + + const double dtfrotate = dtf / INERTIA; + + // update v,omega for all particles + // d_omega/dt = torque / inertia + +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(shared) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + const double dtfm = dtf / rmass[i]; + v[i][0] += dtfm * f[i][0]; + v[i][1] += dtfm * f[i][1]; + v[i][2] += dtfm * f[i][2]; + + const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]); + omega[i][0] += dtirotate * torque[i][0]; + omega[i][1] += dtirotate * torque[i][1]; + omega[i][2] += dtirotate * torque[i][2]; + } +} diff --git a/src/USER-OMP/fix_nve_sphere_omp.h b/src/USER-OMP/fix_nve_sphere_omp.h new file mode 100644 index 000000000..fe86039b1 --- /dev/null +++ b/src/USER-OMP/fix_nve_sphere_omp.h @@ -0,0 +1,39 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(nve/sphere/omp,FixNVESphereOMP) + +#else + +#ifndef LMP_FIX_NVE_SPHERE_OMP_H +#define LMP_FIX_NVE_SPHERE_OMP_H + +#include "fix_nve_sphere.h" + +namespace LAMMPS_NS { + +class FixNVESphereOMP : public FixNVESphere { + public: + FixNVESphereOMP(class LAMMPS *lmp, int narg, char **arg) : + FixNVESphere(lmp, narg, arg) {}; + + virtual void initial_integrate(int); + virtual void final_integrate(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp new file mode 100644 index 000000000..175bab898 --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.cpp @@ -0,0 +1,166 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include +#include "fix_qeq_comb_omp.h" +#include "atom.h" +#include "force.h" +#include "group.h" +#include "memory.h" +#include "error.h" +#include "respa.h" +#include "update.h" +#include "pair_comb_omp.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) + : FixQEQComb(lmp, narg, arg) +{ + if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::init() +{ + if (!atom->q_flag) + error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q"); + + comb = (PairComb *) force->pair_match("comb/omp",1); + if (comb == NULL) + comb = (PairComb *) force->pair_match("comb",1); + if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb"); + + if (strstr(update->integrate_style,"respa")) + nlevels_respa = ((Respa *) update->integrate)->nlevels; + + ngroup = group->count(igroup); + if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::post_force(int vflag) +{ + int i,iloop,loopmax; + double heatpq,qmass,dtq,dtq2; + double enegchkall,enegmaxall; + + if (update->ntimestep % nevery) return; + + // reallocate work arrays if necessary + // qf = charge force + // q1 = charge displacement + // q2 = tmp storage of charge force for next iteration + + if (atom->nmax > nmax) { + memory->destroy(qf); + memory->destroy(q1); + memory->destroy(q2); + nmax = atom->nmax; + memory->create(qf,nmax,"qeq:qf"); + memory->create(q1,nmax,"qeq:q1"); + memory->create(q2,nmax,"qeq:q2"); + vector_atom = qf; + } + + // more loops for first-time charge equilibrium + + iloop = 0; + if (firstflag) loopmax = 5000; + else loopmax = 2000; + + // charge-equilibration loop + + if (me == 0 && fp) + fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n", + update->ntimestep); + + heatpq = 0.05; + qmass = 0.000548580; + dtq = 0.0006; + dtq2 = 0.5*dtq*dtq/qmass; + + double enegchk = 0.0; + double enegtot = 0.0; + double enegmax = 0.0; + + double *q = atom->q; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) + q1[i] = q2[i] = qf[i] = 0.0; + + for (iloop = 0; iloop < loopmax; iloop ++ ) { + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + q[i] += q1[i]; + } + + enegtot = comb->yasu_char(qf,igroup); + enegtot /= ngroup; + enegchk = enegmax = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(shared) +#endif + for (i = 0; i < nlocal ; i++) + if (mask[i] & groupbit) { + q2[i] = enegtot-qf[i]; + enegmax = MAX(enegmax,fabs(q2[i])); + enegchk += fabs(q2[i]); + qf[i] = q2[i]; + } + + MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world); + enegchk = enegchkall/ngroup; + MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world); + enegmax = enegmaxall; + + if (enegchk <= precision && enegmax <= 100.0*precision) break; + + if (me == 0 && fp) + fprintf(fp," iteration: %d, enegtot %.6g, " + "enegmax %.6g, fq deviation: %.6g\n", + iloop,enegtot,enegmax,enegchk); + +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(shared) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + } + + if (me == 0 && fp) { + if (iloop == loopmax) + fprintf(fp,"Charges did not converge in %d iterations\n",iloop); + else + fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n", + iloop,enegchk); + } +} + diff --git a/src/USER-OMP/fix_qeq_comb_omp.h b/src/USER-OMP/fix_qeq_comb_omp.h new file mode 100644 index 000000000..0febe6b0a --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(qeq/comb/omp,FixQEQCombOMP) + +#else + +#ifndef LMP_FIX_QEQ_COMB_OMP_H +#define LMP_FIX_QEQ_COMB_OMP_H + +#include "fix_qeq_comb.h" + +namespace LAMMPS_NS { + +class FixQEQCombOMP : public FixQEQComb { + public: + FixQEQCombOMP(class LAMMPS *, int, char **); + virtual void init(); + virtual void post_force(int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp new file mode 100644 index 000000000..40781cb40 --- /dev/null +++ b/src/USER-OMP/fix_shear_history_omp.cpp @@ -0,0 +1,150 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "string.h" +#include "stdio.h" +#include "fix_shear_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "modify.h" +#include "error.h" + +#if defined(_OPENMP) +#include +#endif + +using namespace LAMMPS_NS; + +#define MAXTOUCH 15 + +/* ---------------------------------------------------------------------- + copy shear partner info from neighbor lists to atom arrays + so can be exchanged with atoms +------------------------------------------------------------------------- */ + +void FixShearHistoryOMP::pre_exchange() +{ + + const int nlocal = atom->nlocal; + const int nghost = atom->nghost; + const int nall = nlocal + nghost; + const int nthreads = comm->nthreads; + + int flag = 0; +#if defined(_OPENMP) +#pragma omp parallel shared(flag) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + nlocal/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > nlocal) ? nlocal : lmax; + + const int gdelta = 1 + nghost/nthreads; + const int gfrom = nlocal + tid*gdelta; + const int gmax = gfrom + gdelta; + const int gto = (gmax > nall) ? nall : gmax; + + + int i,j,ii,jj,m,inum,jnum; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *shear,*allshear,**firstshear; + + // zero npartners for all current atoms + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + // copy shear info from neighbor list atoms to atom arrays + + int *tag = atom->tag; + + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = list->listgranhistory->firstneigh; + firstshear = list->listgranhistory->firstdouble; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + allshear = firstshear[i]; + jnum = numneigh[i]; + touch = firsttouch[i]; + + for (jj = 0; jj < jnum; jj++) { + if (touch[jj]) { + j = jlist[jj]; + j &= NEIGHMASK; + shear = &allshear[3*jj]; + + if ((i >= lfrom) && (i < lto)) { + if (npartner[i] < MAXTOUCH) { + m = npartner[i]; + partner[i][m] = tag[j]; + shearpartner[i][m][0] = shear[0]; + shearpartner[i][m][1] = shear[1]; + shearpartner[i][m][2] = shear[2]; + } + npartner[i]++; + } + + if ((j >= lfrom) && (j < lto)) { + if (npartner[j] < MAXTOUCH) { + m = npartner[j]; + partner[j][m] = tag[i]; + shearpartner[j][m][0] = -shear[0]; + shearpartner[j][m][1] = -shear[1]; + shearpartner[j][m][2] = -shear[2]; + } + npartner[j]++; + } + + if ((j >= gfrom) && (j < gto)) { + npartner[j]++; + } + } + } + } + + // test for too many touching neighbors + int myflag = 0; + for (i = lfrom; i < lto; i++) + if (npartner[i] >= MAXTOUCH) myflag = 1; + + if (myflag) +#if defined(_OPENMP) +#pragma omp atomic +#endif + ++flag; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Too many touching neighbors - boost MAXTOUCH"); +} diff --git a/src/USER-OMP/fix_shear_history_omp.h b/src/USER-OMP/fix_shear_history_omp.h new file mode 100644 index 000000000..9a360b792 --- /dev/null +++ b/src/USER-OMP/fix_shear_history_omp.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(SHEAR_HISTORY/omp,FixShearHistoryOMP) + +#else + +#ifndef LMP_FIX_SHEAR_HISTORY_OMP_H +#define LMP_FIX_SHEAR_HISTORY_OMP_H + +#include "fix_shear_history.h" + +namespace LAMMPS_NS { + +class FixShearHistoryOMP : public FixShearHistory { + + public: + FixShearHistoryOMP(class LAMMPS *lmp, int narg, char **argv) + : FixShearHistory(lmp,narg,argv) {}; + virtual void pre_exchange(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp new file mode 100644 index 000000000..e91642e6b --- /dev/null +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -0,0 +1,404 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_adp_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairADPOMP::PairADPOMP(LAMMPS *lmp) : + PairADP(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairADPOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + memory->destroy(mu); + memory->destroy(lambda); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nmax,"pair:fp"); + memory->create(mu,nthreads*nmax,3,"pair:mu"); + memory->create(lambda,nthreads*nmax,6,"pair:lambda"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, *rho_t, **mu_t, **lambda_t; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (force->newton_pair) { + rho_t = rho + tid*nall; + mu_t = mu + tid*nall; + lambda_t = lambda + tid*nall; + } else { + rho_t = rho + tid*atom->nlocal; + mu_t = mu + tid*atom->nlocal; + lambda_t = lambda + tid*atom->nlocal; + } + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, + double **lambda_t, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi; + double u2,u2p,w2,w2p,nu; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + double delmux,delmuy,delmuz,trdelmu,tradellam; + double adpx,adpy,adpz,fx,fy,fz; + double sumlamxx,sumlamyy,sumlamzz,sumlamyz,sumlamxz,sumlamxy; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // zero out density + + if (NEWTON_PAIR) { + memset(rho_t, 0, nall*sizeof(double)); + memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double)); + memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double)); + } else { + memset(rho_t, 0, nlocal*sizeof(double)); + memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double)); + memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double)); + } + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[jtype][itype]][m]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + mu_t[i][0] += u2*delx; + mu_t[i][1] += u2*dely; + mu_t[i][2] += u2*delz; + coeff = w2r_spline[type2w2r[jtype][itype]][m]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + lambda_t[i][0] += w2*delx*delx; + lambda_t[i][1] += w2*dely*dely; + lambda_t[i][2] += w2*delz*delz; + lambda_t[i][3] += w2*dely*delz; + lambda_t[i][4] += w2*delx*delz; + lambda_t[i][5] += w2*delx*dely; + + if (NEWTON_PAIR || j < nlocal) { + // verify sign difference for mu and lambda + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[itype][jtype]][m]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + mu_t[j][0] -= u2*delx; + mu_t[j][1] -= u2*dely; + mu_t[j][2] -= u2*delz; + coeff = w2r_spline[type2w2r[itype][jtype]][m]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + lambda_t[j][0] += w2*delx*delx; + lambda_t[j][1] += w2*dely*dely; + lambda_t[j][2] += w2*delz*delz; + lambda_t[j][3] += w2*dely*delz; + lambda_t[j][4] += w2*delx*delz; + lambda_t[j][5] += w2*delx*dely; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(&(mu[0][0]), nall, comm->nthreads, 3, tid); + data_reduce_thr(&(lambda[0][0]), nall, comm->nthreads, 6, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + // reduce per thread density + data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(&(mu[0][0]), nlocal, comm->nthreads, 3, tid); + data_reduce_thr(&(lambda[0][0]), nlocal, comm->nthreads, 6, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + p = rho[i]*rdrho + 1.0; + m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + coeff = frho_spline[type2frho[type[i]]][m]; + fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; + if (EFLAG) { + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + phi += 0.5*(mu[i][0]*mu[i][0]+mu[i][1]*mu[i][1]+mu[i][2]*mu[i][2]); + phi += 0.5*(lambda[i][0]*lambda[i][0]+lambda[i][1]* + lambda[i][1]+lambda[i][2]*lambda[i][2]); + phi += 1.0*(lambda[i][3]*lambda[i][3]+lambda[i][4]* + lambda[i][4]+lambda[i][5]*lambda[i][5]); + phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])* + (lambda[i][0]+lambda[i][1]+lambda[i][2]); + if (eflag_global) eng_vdwl_thr[tid] += phi; + if (eflag_atom) eatom_thr[tid][i] += phi; + } + } + + // wait until all theads are done with computation + sync_threads(); + + // communicate derivative of embedding function + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // u2 = u + // u2p = u' + // w2 = w + // w2p = w' + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[itype][jtype]][m]; + u2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = w2r_spline[type2w2r[itype][jtype]][m]; + w2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp[i]*rhojp + fp[j]*rhoip + phip; + fpair = -psip*recip; + + delmux = mu[i][0]-mu[j][0]; + delmuy = mu[i][1]-mu[j][1]; + delmuz = mu[i][2]-mu[j][2]; + trdelmu = delmux*delx+delmuy*dely+delmuz*delz; + sumlamxx = lambda[i][0]+lambda[j][0]; + sumlamyy = lambda[i][1]+lambda[j][1]; + sumlamzz = lambda[i][2]+lambda[j][2]; + sumlamyz = lambda[i][3]+lambda[j][3]; + sumlamxz = lambda[i][4]+lambda[j][4]; + sumlamxy = lambda[i][5]+lambda[j][5]; + tradellam = sumlamxx*delx*delx+sumlamyy*dely*dely+ + sumlamzz*delz*delz+2.0*sumlamxy*delx*dely+ + 2.0*sumlamxz*delx*delz+2.0*sumlamyz*dely*delz; + nu = sumlamxx+sumlamyy+sumlamzz; + + adpx = delmux*u2 + trdelmu*u2p*delx*recip + + 2.0*w2*(sumlamxx*delx+sumlamxy*dely+sumlamxz*delz) + + w2p*delx*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delx; + adpy = delmuy*u2 + trdelmu*u2p*dely*recip + + 2.0*w2*(sumlamxy*delx+sumlamyy*dely+sumlamyz*delz) + + w2p*dely*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*dely; + adpz = delmuz*u2 + trdelmu*u2p*delz*recip + + 2.0*w2*(sumlamxz*delx+sumlamyz*dely+sumlamzz*delz) + + w2p*delz*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delz; + adpx*=-1.0; adpy*=-1.0; adpz*=-1.0; + + fx = delx*fpair+adpx; + fy = dely*fpair+adpy; + fz = delz*fpair+adpz; + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + } + + if (EFLAG) evdwl = phi; + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fx,fy,fz,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairADPOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairADP::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h new file mode 100644 index 000000000..f7d2509cd --- /dev/null +++ b/src/USER-OMP/pair_adp_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(adp/omp,PairADPOMP) + +#else + +#ifndef LMP_PAIR_ADP_OMP_H +#define LMP_PAIR_ADP_OMP_H + +#include "pair_adp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairADPOMP : public PairADP, public ThrOMP { + + public: + PairADPOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, + int iifrom, int iito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp similarity index 61% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_born_coul_long_omp.cpp index 8ed82c5e5..c277a080c 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -1,163 +1,199 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_born_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) : + PairBornCoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairBornCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forceborn,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + + if (rsq < cut_coulsq) { + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fpair = (forcecoul + factor_lj*forceborn)*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } + } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairBornCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairBornCoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h new file mode 100644 index 000000000..d6ccbfc68 --- /dev/null +++ b/src/USER-OMP/pair_born_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/coul/long/omp,PairBornCoulLongOMP) + +#else + +#ifndef LMP_PAIR_BORN_COUL_LONG_OMP_H +#define LMP_PAIR_BORN_COUL_LONG_OMP_H + +#include "pair_born_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP { + + public: + PairBornCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_born_omp.cpp similarity index 83% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_born_omp.cpp index 8ed82c5e5..c39d205c9 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_born_omp.cpp @@ -1,163 +1,163 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_born_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairBornOMP::PairBornOMP(LAMMPS *lmp) : + PairBorn(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairBornOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBornOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r2inv,r6inv,r,rexp,forceborn,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + fpair = factor_lj*forceborn*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairBornOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairBorn::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h new file mode 100644 index 000000000..b24de4a57 --- /dev/null +++ b/src/USER-OMP/pair_born_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/omp,PairBornOMP) + +#else + +#ifndef LMP_PAIR_BORN_OMP_H +#define LMP_PAIR_BORN_OMP_H + +#include "pair_born.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBornOMP : public PairBorn, public ThrOMP { + + public: + PairBornOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp similarity index 69% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_buck_coul_cut_omp.cpp index 8ed82c5e5..ac47d478a 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -1,163 +1,182 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_buck_coul_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) : + PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairBuckCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + + if (rsq < cut_coulsq[itype][jtype]) + forcecoul = qqrd2e * qtmp*q[j]/r; + else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (forcecoul + factor_lj*forcebuck)*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]/r; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } + } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairBuckCoulCutOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairBuckCoulCut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h new file mode 100644 index 000000000..a77f3bad2 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/cut/omp,PairBuckCoulCutOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_CUT_OMP_H +#define LMP_PAIR_BUCK_COUL_CUT_OMP_H + +#include "pair_buck_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP { + + public: + PairBuckCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp similarity index 62% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_buck_coul_long_omp.cpp index 8ed82c5e5..6e7398ca4 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -1,163 +1,198 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_buck_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) : + PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairBuckCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + + if (rsq < cut_coulsq) { + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (forcecoul + factor_lj*forcebuck)*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } + } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairBuckCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairBuckCoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h new file mode 100644 index 000000000..2c87904de --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/long/omp,PairBuckCoulLongOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_LONG_OMP_H +#define LMP_PAIR_BUCK_COUL_LONG_OMP_H + +#include "pair_buck_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP { + + public: + PairBuckCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp new file mode 100644 index 000000000..bd171f628 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_omp.cpp @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_buck_coul_omp.h" +#include "atom.h" +#include "comm.h" +#include "math_vector.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) : + PairBuckCoul(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid) +{ + double evdwl,ecoul,fpair; + evdwl = ecoul = 0.0; + + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + double *x0 = x[0]; + double *f0 = f[0], *fi = f0; + + int *ilist = list->ilist; + + // loop over neighbors of my atoms + + int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int *jneigh, *jneighn, typei, typej, ni; + double qi, qri, *cutsqi, *cut_bucksqi, + *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti; + double r, rsq, r2inv, force_coul, force_buck; + double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; + vector xi, d; + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (order1) qri = (qi = q[i])*qqrd2e; // initialize constants + offseti = offset[typei = type[i]]; + buck1i = buck1[typei]; buck2i = buck2[typei]; + buckai = buck_a[typei]; buckci = buck_c[typei], rhoinvi = rhoinv[typei]; + cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + + for (; jneigh= cutsqi[typej = type[j]]) continue; + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (order1 && (rsq < cut_coulsq)) { // coulombic + if (!ncoultablebits || rsq <= tabinnersq) { // series real space + register double x = g_ewald*r; + register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s; + if (EFLAG) ecoul = t; + } else { // special case + register double f = s*(1.0-special_coul[ni])/r; + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f; + if (EFLAG) ecoul = t-f; + } // table real space + } else { + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; + register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; + if (ni == 0) { + force_coul = qiqj*(ftable[k]+f*dftable[k]); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // special case + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } else force_coul = ecoul = 0.0; + + if (rsq < cut_bucksqi[typej]) { // buckingham + register double rn = r2inv*r2inv*r2inv, + expr = exp(-r*rhoinvi[typej]); + if (order6) { // long-range + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*buckci[typej]; + if (ni == 0) { + force_buck = + r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_buck = f*r*expr*buck1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej]; + if (EFLAG) evdwl = f*expr*buckai[typej] - + g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej]; + } + } else { // cut + if (ni == 0) { + force_buck = r*expr*buck1i[typej]-rn*buck2i[typej]; + if (EFLAG) evdwl = expr*buckai[typej] - + rn*buckci[typej]-offseti[typej]; + } else { // special case + register double f = special_lj[ni]; + force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]); + if (EFLAG) + evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]); + } + } + } else force_buck = evdwl = 0.0; + + fpair = (force_coul+force_buck)*r2inv; + + if (NEWTON_PAIR || j < nlocal) { + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBuckCoul::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h new file mode 100644 index 000000000..dbff9b419 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/omp,PairBuckCoulOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_OMP_H +#define LMP_PAIR_BUCK_COUL_OMP_H + +#include "pair_buck_coul.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP { + + public: + PairBuckCoulOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp similarity index 83% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_buck_omp.cpp index 8ed82c5e5..66d8730ab 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -1,163 +1,165 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_buck_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairBuckOMP::PairBuckOMP(LAMMPS *lmp) : + PairBuck(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairBuckOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r2inv,r6inv,r,rexp,forcebuck,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + fpair = factor_lj*forcebuck*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairBuckOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairBuck::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h new file mode 100644 index 000000000..40b6702e6 --- /dev/null +++ b/src/USER-OMP/pair_buck_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/omp,PairBuckOMP) + +#else + +#ifndef LMP_PAIR_BUCK_OMP_H +#define LMP_PAIR_BUCK_OMP_H + +#include "pair_buck.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckOMP : public PairBuck, public ThrOMP { + + public: + PairBuckOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp new file mode 100644 index 000000000..01bd5f6ea --- /dev/null +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -0,0 +1,545 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_cdeam_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +// This is for debugging purposes. The ASSERT() macro is used in the code to check +// if everything runs as expected. Change this to #if 0 if you don't need the checking. +#if 0 + #define ASSERT(cond) ((!(cond)) ? my_failure(error,__FILE__,__LINE__) : my_noop()) + + inline void my_noop() {} + inline void my_failure(Error* error, const char* file, int line) { + char str[1024]; + sprintf(str,"Assertion failure: File %s, line %i", file, line); + error->one(FLERR,str); + } +#else + #define ASSERT(cond) +#endif + +/* ---------------------------------------------------------------------- */ + +PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) : + PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCDEAMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(rhoB); + memory->destroy(D_values); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(rhoB,nthreads*nmax,"pair:mu"); + memory->create(D_values,nthreads*nmax,"pair:D_values"); + memory->create(fp,nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, *rho_t, *rhoB_t, *D_values_t; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (force->newton_pair) { + rho_t = rho + tid*nall; + rhoB_t = rhoB + tid*nall; + D_values_t = D_values + tid*nall; + } else { + rho_t = rho + tid*atom->nlocal; + rhoB_t = rhoB + tid*atom->nlocal; + D_values_t = D_values + tid*atom->nlocal; + } + + switch (cdeamVersion) { + + case 1: + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } + break; + + case 2: + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid); + } + break; + + default: +#if defined(_OPENMP) +#pragma omp master +#endif + error->all(FLERR,"unsupported eam/cd pair style variant"); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, + double *D_values_t, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rhoip,rhojp,recip,phi; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // zero out density + + if (NEWTON_PAIR) { + memset(rho_t, 0, nall*sizeof(double)); + memset(rhoB_t, 0, nall*sizeof(double)); + memset(D_values_t, 0, nall*sizeof(double)); + } else { + memset(rho_t, 0, nlocal*sizeof(double)); + memset(rhoB_t, 0, nlocal*sizeof(double)); + memset(D_values_t, 0, nlocal*sizeof(double)); + } + + // Stage I + + // Compute rho and rhoB at each local atom site. + // Additionally calculate the D_i values here if we are using the one-site formulation. + // For the two-site formulation we have to calculate the D values in an extra loop (Stage II). + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + jtype = type[j]; + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + double localrho = RhoOfR(index, jtype, itype); + rho_t[i] += localrho; + if(jtype == speciesB) rhoB_t[i] += localrho; + if(NEWTON_PAIR || j < nlocal) { + localrho = RhoOfR(index, itype, jtype); + rho_t[j] += localrho; + if(itype == speciesB) rhoB_t[j] += localrho; + } + + if(CDEAMVERSION == 1 && itype != jtype) { + // Note: if the i-j interaction is not concentration dependent (because either + // i or j are not species A or B) then its contribution to D_i and D_j should + // be ignored. + // This if-clause is only required for a ternary. + if((itype == speciesA && jtype == speciesB) + || (jtype == speciesA && itype == speciesB)) { + double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); + D_values_t[i] += Phi_AB; + if(NEWTON_PAIR || j < nlocal) + D_values_t[j] += Phi_AB; + } + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid); + if (CDEAMVERSION==1) + data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 1; + comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + // reduce per thread density + data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid); + if (CDEAMVERSION==1) + data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + EAMTableIndex index = rhoToTableIndex(rho[i]); + fp[i] = FPrimeOfRho(index, type[i]); + if(EFLAG) { + phi = FofRho(index, type[i]); + if (eflag_global) eng_vdwl_thr[tid] += phi; + if (eflag_atom) eatom_thr[tid][i] += phi; + } + } + + // wait until all theads are done with computation + sync_threads(); + + // Communicate derivative of embedding function and densities + // and D_values (this for one-site formulation only). +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 2; + comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + + // The electron densities may not drop to zero because then the concentration would no longer be defined. + // But the concentration is not needed anyway if there is no interaction with another atom, which is the case + // if the electron density is exactly zero. That's why the following lines have been commented out. + // + //for(i = 0; i < nlocal + atom->nghost; i++) { + // if(rho[i] == 0 && (type[i] == speciesA || type[i] == speciesB)) + // error->one(FLERR,"CD-EAM potential routine: Detected atom with zero electron density."); + //} + + // Stage II + // This is only required for the original two-site formulation of the CD-EAM potential. + + if(CDEAMVERSION == 2) { + // Compute intermediate value D_i for each atom. + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // This code line is required for ternary alloys. + if(itype != speciesA && itype != speciesB) continue; + + double x_i = rhoB[i] / rho[i]; // Concentration at atom i. + + for(jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + if(itype == jtype) continue; + + // This code line is required for ternary alloys. + if(jtype != speciesA && jtype != speciesB) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + + // The concentration independent part of the cross pair potential. + double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); + + // Average concentration of two sites + double x_ij = 0.5 * (x_i + rhoB[j]/rho[j]); + + // Calculate derivative of h(x_ij) polynomial function. + double h_prime = evalHprime(x_ij); + + D_values_t[i] += h_prime * Phi_AB / (2.0 * rho[i] * rho[i]); + if(NEWTON_PAIR || j < nlocal) + D_values_t[j] += h_prime * Phi_AB / (2.0 * rho[j] * rho[j]); + } + } + } + + if (NEWTON_PAIR) { + data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 3; + comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 4; + comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + } + + // Stage III + + // Compute force acting on each atom. + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // Concentration at site i + double x_i = -1.0; // The value -1 indicates: no concentration dependence for all interactions of atom i. + // It will be replaced by the concentration at site i if atom i is either A or B. + + double D_i, h_prime_i; + + // This if-clause is only required for ternary alloys. + if((itype == speciesA || itype == speciesB) && rho[i] != 0.0) { + + // Compute local concentration at site i. + x_i = rhoB[i]/rho[i]; + ASSERT(x_i >= 0 && x_i<=1.0); + + if(CDEAMVERSION == 1) { + // Calculate derivative of h(x_i) polynomial function. + h_prime_i = evalHprime(x_i); + D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]); + } else if(CDEAMVERSION == 2) { + D_i = D_values[i]; + } else ASSERT(false); + } + + for(jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + jtype = type[j]; + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + rhoip = RhoPrimeOfR(index, itype, jtype); + rhojp = RhoPrimeOfR(index, jtype, itype); + fpair = fp[i]*rhojp + fp[j]*rhoip; + recip = 1.0/r; + + double x_j = -1; // The value -1 indicates: no concentration dependence for this i-j pair + // because atom j is not of species A nor B. + + // This code line is required for ternary alloy. + if(jtype == speciesA || jtype == speciesB) { + ASSERT(rho[i] != 0.0); + ASSERT(rho[j] != 0.0); + + // Compute local concentration at site j. + x_j = rhoB[j]/rho[j]; + ASSERT(x_j >= 0 && x_j<=1.0); + + double D_j; + if(CDEAMVERSION == 1) { + // Calculate derivative of h(x_j) polynomial function. + double h_prime_j = evalHprime(x_j); + D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]); + } else if(CDEAMVERSION == 2) { + D_j = D_values[j]; + } else ASSERT(false); + + double t2 = -rhoB[j]; + if(itype == speciesB) t2 += rho[j]; + fpair += D_j * rhoip * t2; + } + + // This if-clause is only required for a ternary alloy. + // Actually we don't need it at all because D_i should be zero anyway if + // atom i has no concentration dependent interactions (because it is not species A or B). + if(x_i != -1.0) { + double t1 = -rhoB[i]; + if(jtype == speciesB) t1 += rho[i]; + fpair += D_i * rhojp * t1; + } + + double phip; + double phi = PhiOfR(index, itype, jtype, recip, phip); + if(itype == jtype || x_i == -1.0 || x_j == -1.0) { + // Case of no concentration dependence. + fpair += phip; + } else { + // We have a concentration dependence for the i-j interaction. + double h; + if(CDEAMVERSION == 1) { + // Calculate h(x_i) polynomial function. + double h_i = evalH(x_i); + // Calculate h(x_j) polynomial function. + double h_j = evalH(x_j); + h = 0.5 * (h_i + h_j); + } else if(CDEAMVERSION == 2) { + // Average concentration. + double x_ij = 0.5 * (x_i + x_j); + // Calculate h(x_ij) polynomial function. + h = evalH(x_ij); + } else ASSERT(false); + + fpair += h * phip; + phi *= h; + } + + // Divide by r_ij and negate to get forces from gradient. + fpair /= -r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if(NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if(EFLAG) evdwl = phi; + if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCDEAMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCDEAM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h new file mode 100644 index 000000000..85b124cb1 --- /dev/null +++ b/src/USER-OMP/pair_cdeam_omp.h @@ -0,0 +1,66 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/cd/omp,PairCDEAM_OneSiteOMP) +PairStyle(eam/cd/old/omp,PairCDEAM_TwoSiteOMP) + +#else + +#ifndef LMP_PAIR_CDEAM_OMP_H +#define LMP_PAIR_CDEAM_OMP_H + +#include "pair_cdeam.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCDEAMOMP : public PairCDEAM, public ThrOMP { + + public: + PairCDEAMOMP(class LAMMPS *, int); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, + int iifrom, int iito, int tid); +}; + + /// The one-site concentration formulation of CD-EAM. + class PairCDEAM_OneSiteOMP : public PairCDEAMOMP + { + public: + /// Constructor. + PairCDEAM_OneSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 1) {} + }; + + /// The two-site concentration formulation of CD-EAM. + class PairCDEAM_TwoSiteOMP : public PairCDEAMOMP + { + public: + /// Constructor. + PairCDEAM_TwoSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 2) {} + }; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp similarity index 54% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_colloid_omp.cpp index 8ed82c5e5..c8bc74407 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -1,163 +1,223 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_colloid_omp.h" #include "atom.h" #include "comm.h" +#include "error.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairColloidOMP::PairColloidOMP(LAMMPS *lmp) : + PairColloid(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r,r2inv,r6inv,forcelj,factor_lj; + double c1,c2,fR,dUR,dUA,K[9],h[4],g[4]; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - if (rsq < cutsq[itype][jtype]) { + if (rsq >= cutsq[itype][jtype]) continue; + + switch(form[itype][jtype]) { + case SMALL_SMALL: r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); fpair = factor_lj*forcelj*r2inv; - - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j][0] -= delx*fpair; - f[j][1] -= dely*fpair; - f[j][2] -= delz*fpair; - } - - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } - - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + if (EFLAG) + evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + break; + + case SMALL_LARGE: + c2 = a2[itype][jtype]; + K[1] = c2*c2; + K[2] = rsq; + K[0] = K[1] - rsq; + K[4] = rsq*rsq; + K[3] = K[1] - K[2]; + K[3] *= K[3]*K[3]; + K[6] = K[3]*K[3]; + fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3]; + fpair = 4.0/15.0*fR*factor_lj * + (2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) * + sigma6[itype][jtype]/K[6]-5.0) / K[0]; + if (EFLAG) + evdwl = 2.0/9.0*fR * + (1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) * + sigma6[itype][jtype]/K[6]) - offset[itype][jtype]; + if (rsq <= K[1]) error->one(FLERR,"Overlapping small/large in pair colloid"); + break; + + case LARGE_LARGE: + r = sqrt(rsq); + c1 = a1[itype][jtype]; + c2 = a2[itype][jtype]; + K[0] = c1*c2; + K[1] = c1+c2; + K[2] = c1-c2; + K[3] = K[1]+r; + K[4] = K[1]-r; + K[5] = K[2]+r; + K[6] = K[2]-r; + K[7] = 1.0/(K[3]*K[4]); + K[8] = 1.0/(K[5]*K[6]); + g[0] = pow(K[3],-7.0); + g[1] = pow(K[4],-7.0); + g[2] = pow(K[5],-7.0); + g[3] = pow(K[6],-7.0); + h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0]; + h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1]; + h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2]; + h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3]; + g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3]; + g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4]; + g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5]; + g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6]; + + fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0; + evdwl = fR * (h[0]-h[1]-h[2]+h[3]); + dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]); + dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] + + (2.0*K[0]*K[8]-1.0)*K[8]); + fpair = factor_lj * (dUR+dUA)/r; + if (EFLAG) + evdwl += a12[itype][jtype]/6.0 * + (2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype]; + if (r <= K[1]) error->one(FLERR,"Overlapping large/large in pair colloid"); + break; } + + if (EFLAG) evdwl *= factor_lj; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,tid); } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairColloidOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairColloid::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h new file mode 100644 index 000000000..a0be13cbb --- /dev/null +++ b/src/USER-OMP/pair_colloid_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(colloid/omp,PairColloidOMP) + +#else + +#ifndef LMP_PAIR_COLLOID_OMP_H +#define LMP_PAIR_COLLOID_OMP_H + +#include "pair_colloid.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairColloidOMP : public PairColloid, public ThrOMP { + + public: + PairColloidOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp new file mode 100644 index 000000000..207c122e4 --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -0,0 +1,540 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_comb_omp.h" +#include "atom.h" +#include "comm.h" +#include "group.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCombOMP::PairCombOMP(LAMMPS *lmp) : + PairComb(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCombOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow coordination array if necessary + + if (atom->nmax > nmax) { + memory->destroy(NCo); + nmax = atom->nmax; + memory->create(NCo,nmax,"pair:NCo"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else eval<0,0,0>(f, ifrom, ito, tid); + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairCombOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,ii,jj,kk,jnum,iparam_i; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + int mr1,mr2,mr3; + int rsc,inty; + double elp_ij,filp[3],fjlp[3],fklp[3]; + double iq,jq; + double yaself; + double potal,fac11,fac11e; + double vionij,fvionij,sr1,sr2,sr3,Eov,Fov; + + evdwl = ecoul = 0.0; + + double **x = atom->x; + double *q = atom->q; + int *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + yaself = vionij = fvionij = Eov = Fov = 0.0; + + double fxtmp,fytmp,fztmp; + double fjxtmp,fjytmp,fjztmp; + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + iq = q[i]; + NCo[i] = 0; + iparam_i = elem2param[itype][itype][itype]; + + // self energy, only on i atom + + yaself = self(¶ms[iparam_i],iq,potal); + + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself, + 0.0,0.0,0.0,0.0,0.0,tid); + + // two-body interactions (long and short repulsive) + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + // Qj calculates 2-body Coulombic + + jtype = map[type[j]]; + jq = q[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq > params[iparam_ij].lcutsq) continue; + + inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype); + + // 1/r energy and forces + + direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq, + potal,fac11,fac11e,vionij,fvionij); + + // field correction to self energy + + field(¶ms[iparam_ij],rsq,iq,jq,vionij,fvionij); + + // polarization field + // sums up long range forces + + fxtmp += delx*fvionij; + fytmp += dely*fvionij; + fztmp += delz*fvionij; + f[j][0] -= delx*fvionij; + f[j][1] -= dely*fvionij; + f[j][2] -= delz*fvionij; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + 0.0,vionij,fvionij,delx,dely,delz,tid); + + // short range q-independent + + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq); + + // repulsion is pure two-body, sums up pair repulsive forces + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,tid); + } + + // accumulate coordination number information + + if (cor_flag) { + int numcoor = 0; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + if(params[iparam_ij].hfocor > 0.0 ) { + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + ++numcoor; + } + NCo[i] = numcoor; + } + } + + // three-body interactions + // skip immediately if I-J is not within cutoff + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + // this Qj for q-dependent BSi + + jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + cuo_flag1 = 0; cuo_flag2 = 0; + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + + if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1; + if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1; + } + + if (cuo_flag1 && cuo_flag2) cuo_flag = 1; + else cuo_flag = 0; + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],rsq1,zeta_ij,fpair, + prefactor,EFLAG,evdwl,iq,jq); + + // over-coordination correction for HfO2 + + if (cor_flag && NCo[i] != 0) + Over_cor(¶ms[iparam_ij],rsq1,NCo[i],Eov, Fov); + evdwl += Eov; + fpair += Fov; + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],tid); + + // attractive term via loop over k (3-body forces) + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + if (rsq2 > params[iparam_ijk].cutsq) continue; + + for (rsc = 0; rsc < 3; rsc++) + fi[rsc] = fj[rsc] = fk[rsc] = 0.0; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + // 3-body LP and BB correction and forces + + elp_ij = elp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + flp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); + + fxtmp += fi[0] + filp[0]; + fytmp += fi[1] + filp[1]; + fztmp += fi[2] + filp[2]; + fjxtmp += fj[0] + fjlp[0]; + fjytmp += fj[1] + fjlp[1]; + fjztmp += fj[2] + fjlp[2]; + f[k][0] += fk[0] + fklp[0]; + f[k][1] += fk[1] + fklp[1]; + f[k][2] += fk[2] + fklp[2]; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + elp_ij,0.0,0.0,0.0,0.0,0.0, tid); + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + + if (cuo_flag) params[iparam_i].cutsq *= 0.65; + } + cuo_flag = 0; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::yasu_char(double *qf_fix, int &igroup) +{ + int ii; + double potal,fac11,fac11e; + + const double * const * const x = atom->x; + const double * const q = atom->q; + const int * const type = atom->type; + + const int inum = list->inum; + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + const int * const mask = atom->mask; + const int groupbit = group->bitmask[igroup]; + + qf = qf_fix; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + qf[i] = 0.0; + } + + // communicating charge force to all nodes, first forward then reverse + + comm->forward_comm_pair(this); + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms +#if defined(_OPENMP) +#pragma omp parallel for private(ii) default(none) shared(potal,fac11e) +#endif + for (ii = 0; ii < inum; ii ++) { + double fqi,fqj,fqij,fqji,fqjj,delr1[3],delr2[3]; + double sr1,sr2,sr3; + int mr1,mr2,mr3; + + const int i = ilist[ii]; + + if (mask[i] & groupbit) { + fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed. + int itype = map[type[i]]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const double iq = q[i]; + const int iparam_i = elem2param[itype][itype][itype]; + + // charge force from self energy + + fqi = qfo_self(¶ms[iparam_i],iq,potal); + + // two-body interactions + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + const int jtype = map[type[j]]; + double jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + double rsq1 = vec3_dot(delr1,delr1); + + const int iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq1 > params[iparam_ij].lcutsq) continue; + + const int inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype); + + // 1/r charge forces + + qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij); + + // field correction to self energy and charge force + + qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj); + fqi += jq * fqij + fqji; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += (iq * fqij + fqjj); + + // polarization field charge force + // three-body interactions + + if (rsq1 > params[iparam_ij].cutsq) continue; + + double zeta_ij = 0.0; + + for (int kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + const int k = jlist[kk] & NEIGHMASK; + const int ktype = map[type[k]]; + const int iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + const double rsq2 = vec3_dot(delr2,delr2); + + if (rsq2 > params[iparam_ijk].cutsq) continue; + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + } + + // charge force in Aij and Bij + + qfo_short(¶ms[iparam_ij],rsq1,zeta_ij,iq,jq,fqij,fqjj); + fqi += fqij; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += fqjj; + } + +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[i] += fqi; + + } + } + + comm->reverse_comm_pair(this); + + // sum charge force on each node and return it + + double eneg = 0.0; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + eneg += qf[i]; + } + double enegtot; + MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world); + return enegtot; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairComb::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_comb_omp.h b/src/USER-OMP/pair_comb_omp.h new file mode 100644 index 000000000..6f020ea9a --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(comb/omp,PairCombOMP) + +#else + +#ifndef LMP_PAIR_COMB_OMP_H +#define LMP_PAIR_COMB_OMP_H + +#include "pair_comb.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCombOMP : public PairComb, public ThrOMP { + + public: + PairCombOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + virtual double yasu_char(double *, int &); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp similarity index 77% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_coul_cut_omp.cpp index 8ed82c5e5..bb19db3d2 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -1,163 +1,162 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_coul_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) : + PairCoulCut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r2inv,rinv,forcecoul,factor_coul; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + double *special_coul = force->special_coul; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + rinv = sqrt(r2inv); + forcecoul = qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; + fpair = factor_coul*forcecoul * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (EFLAG) + ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairCoulCutOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairCoulCut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h new file mode 100644 index 000000000..eca9958ff --- /dev/null +++ b/src/USER-OMP/pair_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/cut/omp,PairCoulCutOMP) + +#else + +#ifndef LMP_PAIR_COUL_CUT_OMP_H +#define LMP_PAIR_COUL_CUT_OMP_H + +#include "pair_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulCutOMP : public PairCoulCut, public ThrOMP { + + public: + PairCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp similarity index 76% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_coul_debye_omp.cpp index 8ed82c5e5..1c2e7b8e0 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -1,163 +1,163 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_coul_debye_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) : + PairCoulDebye(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r2inv,r,rinv,forcecoul,factor_coul,screening; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + double *special_coul = force->special_coul; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv); + fpair = factor_coul*forcecoul * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (EFLAG) + ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ - -double PairLJCutOMP::memory_usage() +double PairCoulDebyeOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairCoulDebye::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h new file mode 100644 index 000000000..7ad599bb1 --- /dev/null +++ b/src/USER-OMP/pair_coul_debye_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/debye/omp,PairCoulDebyeOMP) + +#else + +#ifndef LMP_PAIR_COUL_DEBYE_OMP_H +#define LMP_PAIR_COUL_DEBYE_OMP_H + +#include "pair_coul_debye.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP { + + public: + PairCoulDebyeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp similarity index 58% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_coul_long_omp.cpp index 8ed82c5e5..3a2e05159 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -1,163 +1,201 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) : + PairCoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + int i,j,ii,jj,jnum,itable,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double fraction,table; + double r,r2inv,rsq,forcecoul,factor_coul; + double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; - double *special_lj = force->special_lj; + double *special_coul = force->special_coul; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - if (rsq < cutsq[itype][jtype]) { + if (rsq < cut_coulsq) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = scale[itype][jtype] * qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = scale[itype][jtype] * qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + fpair = forcecoul * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = scale[itype][jtype] * qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + 0.0,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairCoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h new file mode 100644 index 000000000..7b63f762f --- /dev/null +++ b/src/USER-OMP/pair_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/long/omp,PairCoulLongOMP) + +#else + +#ifndef LMP_PAIR_COUL_LONG_OMP_H +#define LMP_PAIR_COUL_LONG_OMP_H + +#include "pair_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulLongOMP : public PairCoulLong, public ThrOMP { + + public: + PairCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp new file mode 100644 index 000000000..9ba93b19b --- /dev/null +++ b/src/USER-OMP/pair_dipole_cut_omp.cpp @@ -0,0 +1,288 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dipole_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) : + PairDipoleCut(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairDipoleCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); + else eval<1,1,0>(f, torque, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); + else eval<1,0,0>(f, torque, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); + else eval<0,0,0>(f, torque, ifrom, ito, tid); + } + + // reduce per thread forces and torques into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + double *q = atom->q; + double **mu = atom->mu; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + pre1 = qtmp*q[j]*r3inv; + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + r7inv = r5inv*r2inv; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; + pre2 = 3.0*r5inv*pjdotr; + pre3 = 3.0*r5inv*pidotr; + pre4 = -1.0*r3inv; + + forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0]; + forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1]; + forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2]; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pre1 = 3.0*q[j]*r5inv * pidotr; + pre2 = q[j]*r3inv; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + pre1 = 3.0*qtmp*r5inv * pjdotr; + pre2 = qtmp*r3inv; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp*q[j]*rinv; + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j]*r3inv*pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv*pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDipoleCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDipoleCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h new file mode 100644 index 000000000..832bd4d3b --- /dev/null +++ b/src/USER-OMP/pair_dipole_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dipole/cut/omp,PairDipoleCutOMP) + +#else + +#ifndef LMP_PAIR_DIPOLE_CUT_OMP_H +#define LMP_PAIR_DIPOLE_CUT_OMP_H + +#include "pair_dipole_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP { + + public: + PairDipoleCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp new file mode 100644 index 000000000..9ebc72d41 --- /dev/null +++ b/src/USER-OMP/pair_dipole_sf_omp.cpp @@ -0,0 +1,320 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dipole_sf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) : + PairDipoleSF(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairDipoleSFOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); + else eval<1,1,0>(f, torque, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); + else eval<1,0,0>(f, torque, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); + else eval<0,0,0>(f, torque, ifrom, ito, tid); + } + + // reduce per thread forces and torques into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; + double aforcecoulx,aforcecouly,aforcecoulz; + double bforcecoulx,bforcecouly,bforcecoulz; + double rcutlj2inv, rcutcoul2inv,rcutlj6inv; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + double *q = atom->q; + double **mu = atom->mu; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; + pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); + aforcecoulx = pre1*delx; + aforcecouly = pre1*dely; + aforcecoulz = pre1*delz; + + bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + + 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; + presf = 2.0 * r2inv * pidotr * pjdotr; + bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx); + bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely); + bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz); + + forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); + forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); + forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); + + pre2 = 3.0 * bfac * r5inv * pjdotr; + pre3 = 3.0 * bfac * r5inv * pidotr; + pre4 = -bfac * r3inv; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); + pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = q[j] * r3inv * pqfac; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); + qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = qtmp * r3inv * qpfac; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; + + rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; + rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; + forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * + rcutlj6inv * rcutlj2inv; + + forcelj = factor_lj * (forceljcut - forceljsf); + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp * q[j] * rinv * + pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2); + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j] * r3inv * pqfac * pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp * r3inv * qpfac * pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+ + rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* + rsq*rcutlj2inv+ + rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDipoleSFOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDipoleSF::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h new file mode 100644 index 000000000..e601e2d56 --- /dev/null +++ b/src/USER-OMP/pair_dipole_sf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dipole/sf/omp,PairDipoleSFOMP) + +#else + +#ifndef LMP_PAIR_DIPOLE_SF_OMP_H +#define LMP_PAIR_DIPOLE_SF_OMP_H + +#include "pair_dipole_sf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP { + + public: + PairDipoleSFOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp similarity index 62% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_dpd_omp.cpp index 8ed82c5e5..be1e32f37 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -1,163 +1,212 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_dpd_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" +#include "update.h" +#include "random_mars.h" using namespace LAMMPS_NS; +#define EPSILON 1.0e-10 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairDPDOMP::PairDPDOMP(LAMMPS *lmp) : + PairDPD(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairDPDOMP::~PairDPDOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairDPDOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + random_thr[0] = random; + #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,dot,wd,randnum,factor_dpd; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; + double **v = atom->v; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; + double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; + RanMars &rng = *random_thr[tid]; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor_dpd = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx*delvx + dely*delvy + delz*delvz; + wd = 1.0 - r/cut[itype][jtype]; + randnum = rng.gaussian(); + + // conservative force = a0 * wd + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + fpair = a0[itype][jtype]*wd; + fpair -= gamma[itype][jtype]*wd*wd*dot*rinv; + fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt; + fpair *= factor_dpd*rinv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd; + evdwl *= factor_dpd; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairDPDOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairDPD::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); return bytes; } diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h new file mode 100644 index 000000000..9385e5444 --- /dev/null +++ b/src/USER-OMP/pair_dpd_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/omp,PairDPDOMP) + +#else + +#ifndef LMP_PAIR_DPD_OMP_H +#define LMP_PAIR_DPD_OMP_H + +#include "pair_dpd.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDPDOMP : public PairDPD, public ThrOMP { + + public: + PairDPDOMP(class LAMMPS *); + virtual ~PairDPDOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp similarity index 59% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_dpd_tstat_omp.cpp index 8ed82c5e5..7e3fb8b39 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -1,163 +1,214 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_dpd_tstat_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" +#include "update.h" +#include "random_mars.h" using namespace LAMMPS_NS; +#define EPSILON 1.0e-10 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) : + PairDPDTstat(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; + random_thr = NULL; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +PairDPDTstatOMP::~PairDPDTstatOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDTstatOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + random_thr[0] = random; + #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,dot,wd,randnum,factor_dpd; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; + double **v = atom->v; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; + double dtinvsqrt = 1.0/sqrt(update->dt); double fxtmp,fytmp,fztmp; + RanMars &rng = *random_thr[tid]; + + // adjust sigma if target T is changing + + if (t_start != t_stop) { + double delta = update->ntimestep - update->beginstep; + delta /= update->endstep - update->beginstep; + temperature = t_start + delta * (t_stop-t_start); + double boltz = force->boltz; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) + sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]); + } ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor_dpd = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx*delvx + dely*delvy + delz*delvz; + wd = 1.0 - r/cut[itype][jtype]; + randnum = rng.gaussian(); + + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + fpair = -gamma[itype][jtype]*wd*wd*dot*rinv; + fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt; + fpair *= factor_dpd*rinv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + 0.0,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairDPDTstatOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairDPDTstat::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); return bytes; } diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h new file mode 100644 index 000000000..14f640a92 --- /dev/null +++ b/src/USER-OMP/pair_dpd_tstat_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/tstat/omp,PairDPDTstatOMP) + +#else + +#ifndef LMP_PAIR_DPD_TSTAT_OMP_H +#define LMP_PAIR_DPD_TSTAT_OMP_H + +#include "pair_dpd_tstat.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP { + + public: + PairDPDTstatOMP(class LAMMPS *); + virtual ~PairDPDTstatOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_alloy_omp.cpp b/src/USER-OMP/pair_eam_alloy_omp.cpp new file mode 100644 index 000000000..54be571b7 --- /dev/null +++ b/src/USER-OMP/pair_eam_alloy_omp.cpp @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_alloy_omp.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMAlloyOMP::PairEAMAlloyOMP(LAMMPS *lmp) : PairEAMOMP(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM setfl file + + if (setfl) { + for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; + delete [] setfl->elements; + delete [] setfl->mass; + memory->destroy(setfl->frho); + memory->destroy(setfl->rhor); + memory->destroy(setfl->z2r); + delete setfl; + } + setfl = new Setfl(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < setfl->nelements; j++) + if (strcmp(arg[i],setfl->elements[j]) == 0) break; + if (j < setfl->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,setfl->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::read_file(char *filename) +{ + Setfl *file = setfl; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); + memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, + "pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); + MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from setfl file + + nrho = setfl->nrho; + nr = setfl->nr; + drho = setfl->drho; + dr = setfl->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of setfl elements + 1 for zero array + + nfrho = setfl->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = # of setfl elements + + nrhor = setfl->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element's rhor to global rhor + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for setfl files, I,J mapping only depends on I + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of setfl elements + + nz2r = setfl->nelements * (setfl->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < setfl->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/USER-OMP/pair_eam_alloy_omp.h b/src/USER-OMP/pair_eam_alloy_omp.h new file mode 100644 index 000000000..7a71fbc17 --- /dev/null +++ b/src/USER-OMP/pair_eam_alloy_omp.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/alloy/omp,PairEAMAlloyOMP) + +#else + +#ifndef LMP_PAIR_EAM_ALLOY_OMP_H +#define LMP_PAIR_EAM_ALLOY_OMP_H + +#include "pair_eam_omp.h" + +namespace LAMMPS_NS { + +// need virtual public b/c of how eam/alloy/opt inherits from it + +class PairEAMAlloyOMP : virtual public PairEAMOMP { + public: + PairEAMAlloyOMP(class LAMMPS *); + virtual ~PairEAMAlloyOMP() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_fs_omp.cpp b/src/USER-OMP/pair_eam_fs_omp.cpp new file mode 100644 index 000000000..d0963fa62 --- /dev/null +++ b/src/USER-OMP/pair_eam_fs_omp.cpp @@ -0,0 +1,332 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Tim Lau (MIT) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_fs_omp.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMFSOMP::PairEAMFSOMP(LAMMPS *lmp) : PairEAMOMP(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read EAM Finnis-Sinclair file +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM Finnis-Sinclair file + + if (fs) { + for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; + delete [] fs->elements; + delete [] fs->mass; + memory->destroy(fs->frho); + memory->destroy(fs->rhor); + memory->destroy(fs->z2r); + delete fs; + } + fs = new Fs(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < fs->nelements; j++) + if (strcmp(arg[i],fs->elements[j]) == 0) break; + if (j < fs->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,fs->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::read_file(char *filename) +{ + Fs *file = fs; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1, + "pair:frho"); + memory->create(file->rhor,file->nelements,file->nelements, + file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements, + file->nr+1,"pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + + for (j = 0; j < file->nelements; j++) { + if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); + MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from fs file + + nrho = fs->nrho; + nr = fs->nr; + drho = fs->drho; + dr = fs->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of fs elements + 1 for zero array + + nfrho = fs->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < fs->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = square of # of fs elements + + nrhor = fs->nelements * fs->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element pair rhor to global rhor + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j < fs->nelements; j++) { + for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; + n++; + } + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for fs files, there is a full NxN set of rhor arrays + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i] * fs->nelements + map[j]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of fs elements + + nz2r = fs->nelements * (fs->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/USER-OMP/pair_eam_fs_omp.h b/src/USER-OMP/pair_eam_fs_omp.h new file mode 100644 index 000000000..bee6cef76 --- /dev/null +++ b/src/USER-OMP/pair_eam_fs_omp.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/fs/omp,PairEAMFSOMP) + +#else + +#ifndef LMP_PAIR_EAM_FS_OMP_H +#define LMP_PAIR_EAM_FS_OMP_H + +#include "pair_eam_omp.h" + +namespace LAMMPS_NS { + +// need virtual public b/c of how eam/fs/opt inherits from it + +class PairEAMFSOMP : virtual public PairEAMOMP { + public: + PairEAMFSOMP(class LAMMPS *); + virtual ~PairEAMFSOMP() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp new file mode 100644 index 000000000..0ae4d54fb --- /dev/null +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -0,0 +1,303 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_eam_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairEAMOMP::PairEAMOMP(LAMMPS *lmp) : + PairEAM(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, *rho_t; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (force->newton_pair) + rho_t = rho + tid*nall; + else rho_t = rho + tid*atom->nlocal; + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid); + else eval<1,1,0>(f, rho_t, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid); + else eval<1,0,0>(f, rho_t, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid); + else eval<0,0,0>(f, rho_t, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairEAMOMP::eval(double **f, double *rho_t, + int iifrom, int iito, int tid) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // zero out density + + if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double)); + else memset(rho_t, 0, nlocal*sizeof(double)); + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (NEWTON_PAIR || j < nlocal) { + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + p = rho[i]*rdrho + 1.0; + m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + coeff = frho_spline[type2frho[type[i]]][m]; + fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; + if (EFLAG) { + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (eflag_global) eng_vdwl_thr[tid] += phi; + if (eflag_atom) eatom_thr[tid][i] += phi; + } + } + + // wait until all theads are done with computation + sync_threads(); + + // communicate derivative of embedding function + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp[i]*rhojp + fp[j]*rhoip + phip; + fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = phi; + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEAM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h new file mode 100644 index 000000000..1184cb34b --- /dev/null +++ b/src/USER-OMP/pair_eam_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/omp,PairEAMOMP) + +#else + +#ifndef LMP_PAIR_EAM_OMP_H +#define LMP_PAIR_EAM_OMP_H + +#include "pair_eam.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEAMOMP : public PairEAM, public ThrOMP { + + public: + PairEAMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double *rho_t, int iifrom, int iito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp new file mode 100644 index 000000000..65b05c814 --- /dev/null +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -0,0 +1,485 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_edip_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) : + PairEDIP(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else eval<0,0,0>(f, ifrom, ito, tid); + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,ii,inum,jnum; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp,evdwl; + int *ilist,*jlist,*numneigh,**firstneigh; + register int preForceCoord_counter; + + double invR_ij; + double invR_ik; + double directorCos_ij_x; + double directorCos_ij_y; + double directorCos_ij_z; + double directorCos_ik_x; + double directorCos_ik_y; + double directorCos_ik_z; + double cosTeta; + + int interpolIDX; + double interpolTMP; + double interpolDeltaX; + double interpolY1; + double interpolY2; + + double invRMinusCutoffA; + double sigmaInvRMinusCutoffA; + double gammInvRMinusCutoffA; + double cosTetaDiff; + double cosTetaDiffCosTetaDiff; + double cutoffFunction_ij; + double exp2B_ij; + double exp2BDerived_ij; + double pow2B_ij; + double pow2BDerived_ij; + double exp3B_ij; + double exp3BDerived_ij; + double exp3B_ik; + double exp3BDerived_ik; + double qFunction; + double qFunctionDerived; + double tauFunction; + double tauFunctionDerived; + double expMinusBetaZeta_iZeta_i; + double qFunctionCosTetaDiffCosTetaDiff; + double expMinusQFunctionCosTetaDiffCosTetaDiff; + double zeta_i; + double zeta_iDerived; + double zeta_iDerivedInvR_ij; + + double forceModCoord_factor; + double forceModCoord; + double forceModCoord_ij; + double forceMod2B; + double forceMod3B_factor1_ij; + double forceMod3B_factor2_ij; + double forceMod3B_factor2; + double forceMod3B_factor1_ik; + double forceMod3B_factor2_ik; + double potentia3B_factor; + double potential2B_factor; + + double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList; + double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList; + double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList; + double *pre_thrExp2B_ij = preExp2B_ij + tid * leadDimInteractionList; + double *pre_thrExp2BDerived_ij = preExp2BDerived_ij + tid * leadDimInteractionList; + double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList; + double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ii++) { + zeta_i = 0.0; + int numForceCoordPairs = 0; + + i = ilist[ii]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // pre-loop to compute environment coordination f(Z) + + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + double dr_ij[3], r_ij; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = 1.0 / r_ij; + pre_thrInvR_ij[neighbor_j] = invR_ij; + + invRMinusCutoffA = 1.0 / (r_ij - cutoffA); + sigmaInvRMinusCutoffA = sigma * invRMinusCutoffA; + gammInvRMinusCutoffA = gamm * invRMinusCutoffA; + + interpolDeltaX = r_ij - GRIDSTART; + interpolTMP = (interpolDeltaX * GRIDDENSITY); + interpolIDX = (int) interpolTMP; + + interpolY1 = exp3B[interpolIDX]; + interpolY2 = exp3B[interpolIDX+1]; + exp3B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + exp3BDerived_ij = - exp3B_ij * gammInvRMinusCutoffA * invRMinusCutoffA; + + pre_thrExp3B_ij[neighbor_j] = exp3B_ij; + pre_thrExp3BDerived_ij[neighbor_j] = exp3BDerived_ij; + + interpolY1 = exp2B[interpolIDX]; + interpolY2 = exp2B[interpolIDX+1]; + exp2B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + exp2BDerived_ij = - exp2B_ij * sigmaInvRMinusCutoffA * invRMinusCutoffA; + + pre_thrExp2B_ij[neighbor_j] = exp2B_ij; + pre_thrExp2BDerived_ij[neighbor_j] = exp2BDerived_ij; + + interpolY1 = pow2B[interpolIDX]; + interpolY2 = pow2B[interpolIDX+1]; + pow2B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + pre_thrPow2B_ij[neighbor_j] = pow2B_ij; + + // zeta and its derivative + + if (r_ij < cutoffC) zeta_i += 1.0; + else { + interpolY1 = cutoffFunction[interpolIDX]; + interpolY2 = cutoffFunction[interpolIDX+1]; + cutoffFunction_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + zeta_i += cutoffFunction_ij; + + interpolY1 = cutoffFunctionDerived[interpolIDX]; + interpolY2 = cutoffFunctionDerived[interpolIDX+1]; + zeta_iDerived = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + zeta_iDerivedInvR_ij = zeta_iDerived * invR_ij; + + preForceCoord_counter=numForceCoordPairs*5; + pre_thrForceCoord[preForceCoord_counter+0]=zeta_iDerivedInvR_ij; + pre_thrForceCoord[preForceCoord_counter+1]=dr_ij[0]; + pre_thrForceCoord[preForceCoord_counter+2]=dr_ij[1]; + pre_thrForceCoord[preForceCoord_counter+3]=dr_ij[2]; + pre_thrForceCoord[preForceCoord_counter+4]=j; + numForceCoordPairs++; + } + } + + // quantities depending on zeta_i + + interpolDeltaX = zeta_i; + interpolTMP = (interpolDeltaX * GRIDDENSITY); + interpolIDX = (int) interpolTMP; + + interpolY1 = expMinusBetaZeta_iZeta_iGrid[interpolIDX]; + interpolY2 = expMinusBetaZeta_iZeta_iGrid[interpolIDX+1]; + expMinusBetaZeta_iZeta_i = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = qFunctionGrid[interpolIDX]; + interpolY2 = qFunctionGrid[interpolIDX+1]; + qFunction = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = tauFunctionGrid[interpolIDX]; + interpolY2 = tauFunctionGrid[interpolIDX+1]; + tauFunction = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = tauFunctionDerivedGrid[interpolIDX]; + interpolY2 = tauFunctionDerivedGrid[interpolIDX+1]; + tauFunctionDerived = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + qFunctionDerived = -mu * qFunction; + + forceModCoord_factor = 2.0 * beta * zeta_i * expMinusBetaZeta_iZeta_i; + + forceModCoord = 0.0; + + // two-body interactions, skip half of them + + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + double dr_ij[3], r_ij, f_ij[3]; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = x[j][0] - xtmp; + dr_ij[1] = x[j][1] - ytmp; + dr_ij[2] = x[j][2] - ztmp; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = pre_thrInvR_ij[neighbor_j]; + pow2B_ij = pre_thrPow2B_ij[neighbor_j]; + + potential2B_factor = pow2B_ij - expMinusBetaZeta_iZeta_i; + + exp2B_ij = pre_thrExp2B_ij[neighbor_j]; + + pow2BDerived_ij = - rho * invR_ij * pow2B_ij; + + forceModCoord += (forceModCoord_factor*exp2B_ij); + + exp2BDerived_ij = pre_thrExp2BDerived_ij[neighbor_j]; + forceMod2B = exp2BDerived_ij * potential2B_factor + + exp2B_ij * pow2BDerived_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + exp3B_ij = pre_thrExp3B_ij[neighbor_j]; + exp3BDerived_ij = pre_thrExp3BDerived_ij[neighbor_j]; + + f_ij[0] = forceMod2B * directorCos_ij_x; + f_ij[1] = forceMod2B * directorCos_ij_y; + f_ij[2] = forceMod2B * directorCos_ij_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[i][0] += f_ij[0]; + f[i][1] += f_ij[1]; + f[i][2] += f_ij[2]; + + // potential energy + + evdwl = (exp2B_ij * potential2B_factor); + + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0, + -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + + // three-body Forces + + for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = x[k][0] - xtmp; + dr_ik[1] = x[k][1] - ytmp; + dr_ik[2] = x[k][2] - ztmp; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = pre_thrInvR_ij[neighbor_k]; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + cosTetaDiff = cosTeta + tauFunction; + cosTetaDiffCosTetaDiff = cosTetaDiff * cosTetaDiff; + qFunctionCosTetaDiffCosTetaDiff = cosTetaDiffCosTetaDiff * qFunction; + expMinusQFunctionCosTetaDiffCosTetaDiff = + exp(-qFunctionCosTetaDiffCosTetaDiff); + + potentia3B_factor = lambda * + ((1.0 - expMinusQFunctionCosTetaDiffCosTetaDiff) + + eta * qFunctionCosTetaDiffCosTetaDiff); + + exp3B_ik = pre_thrExp3B_ij[neighbor_k]; + exp3BDerived_ik = pre_thrExp3BDerived_ij[neighbor_k]; + + forceMod3B_factor1_ij = - exp3BDerived_ij * exp3B_ik * + potentia3B_factor; + forceMod3B_factor2 = 2.0 * lambda * exp3B_ij * exp3B_ik * + qFunction * cosTetaDiff * + (eta + expMinusQFunctionCosTetaDiffCosTetaDiff); + forceMod3B_factor2_ij = forceMod3B_factor2 * invR_ij; + + f_ij[0] = forceMod3B_factor1_ij * directorCos_ij_x + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_x - directorCos_ik_x); + f_ij[1] = forceMod3B_factor1_ij * directorCos_ij_y + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_y - directorCos_ik_y); + f_ij[2] = forceMod3B_factor1_ij * directorCos_ij_z + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_z - directorCos_ik_z); + + forceMod3B_factor1_ik = - exp3BDerived_ik * exp3B_ij * + potentia3B_factor; + forceMod3B_factor2_ik = forceMod3B_factor2 * invR_ik; + + f_ik[0] = forceMod3B_factor1_ik * directorCos_ik_x + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_x - directorCos_ij_x); + f_ik[1] = forceMod3B_factor1_ik * directorCos_ik_y + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_y - directorCos_ij_y); + f_ik[2] = forceMod3B_factor1_ik * directorCos_ik_z + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_z - directorCos_ij_z); + + forceModCoord += (forceMod3B_factor2 * + (tauFunctionDerived - 0.5 * mu * cosTetaDiff)); + + f[j][0] += f_ij[0]; + f[j][1] += f_ij[1]; + f[j][2] += f_ij[2]; + + f[k][0] += f_ik[0]; + f[k][1] += f_ik[1]; + f[k][2] += f_ik[2]; + + f[i][0] -= f_ij[0] + f_ik[0]; + f[i][1] -= f_ij[1] + f_ik[1]; + f[i][2] -= f_ij[2] + f_ik[2]; + + // potential energy + + evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor); + + if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik); + } + } + + // forces due to environment coordination f(Z) + + for (int idx = 0; idx < numForceCoordPairs; idx++) { + double dr_ij[3], f_ij[3]; + + preForceCoord_counter = idx * 5; + zeta_iDerivedInvR_ij=pre_thrForceCoord[preForceCoord_counter+0]; + dr_ij[0]=pre_thrForceCoord[preForceCoord_counter+1]; + dr_ij[1]=pre_thrForceCoord[preForceCoord_counter+2]; + dr_ij[2]=pre_thrForceCoord[preForceCoord_counter+3]; + j = static_cast (pre_thrForceCoord[preForceCoord_counter+4]); + + forceModCoord_ij = forceModCoord * zeta_iDerivedInvR_ij; + + f_ij[0] = forceModCoord_ij * dr_ij[0]; + f_ij[1] = forceModCoord_ij * dr_ij[1]; + f_ij[2] = forceModCoord_ij * dr_ij[2]; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[i][0] += f_ij[0]; + f[i][1] += f_ij[1]; + f[i][2] += f_ij[2]; + + // potential energy + + evdwl = 0.0; + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0, + forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEDIPOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEDIP::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h new file mode 100644 index 000000000..55c34db34 --- /dev/null +++ b/src/USER-OMP/pair_edip_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(edip/omp,PairEDIPOMP) + +#else + +#ifndef LMP_PAIR_EDIP_OMP_H +#define LMP_PAIR_EDIP_OMP_H + +#include "pair_edip.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEDIPOMP : public PairEDIP, public ThrOMP { + + public: + PairEDIPOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp new file mode 100644 index 000000000..d31ad2012 --- /dev/null +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -0,0 +1,365 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_eim_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairEIMOMP::PairEIMOMP(LAMMPS *lmp) : + PairEIM(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEIMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nthreads*nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, *rho_t, *fp_t; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + if (force->newton_pair) { + rho_t = rho + tid*nall; + fp_t = fp + tid*nall; + } else { + rho_t = rho + tid*atom->nlocal; + fp_t = fp + tid*atom->nlocal; + } + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid); + else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid); + else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid); + else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t, + int iifrom, int iito, int tid) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // zero out density and fp + + if (NEWTON_PAIR) { + memset(rho_t, 0, nall*sizeof(double)); + memset(fp_t, 0, nall*sizeof(double)); + } else { + memset(rho_t, 0, nlocal*sizeof(double)); + memset(fp_t, 0, nlocal*sizeof(double)); + } + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = Fij_spline[type2Fij[itype][jtype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (NEWTON_PAIR || j < nlocal) { + coeff = Fij_spline[type2Fij[jtype][itype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 1; + comm->reverse_comm_pair(this); + } + + } else { + data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 1; + comm->forward_comm_pair(this); + } + + // wait until master is finished communicating + sync_threads(); + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = Gij_spline[type2Gij[itype][jtype]][m]; + fp_t[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]); + if (NEWTON_PAIR || j < nlocal) { + fp_t[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + + coeff[6]); + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum modified densities + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 2; + comm->reverse_comm_pair(this); + } + + } else { + data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 2; + comm->forward_comm_pair(this); + } + + // wait until master is finished communicating + sync_threads(); + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + if (EFLAG) { + phi = 0.5*rho[i]*fp[i]; + if (eflag_global) eng_vdwl_thr[tid] += phi; + if (eflag_atom) eatom_thr[tid][i] += phi; + } + } + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + + coeff = Fij_spline[type2Fij[jtype][itype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = Fij_spline[type2Fij[itype][jtype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = phiij_spline[type2phiij[itype][jtype]][m]; + phip = (coeff[0]*p + coeff[1])*p + coeff[2]; + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = Gij_spline[type2Gij[itype][jtype]][m]; + coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coulp = (coeff[0]*p + coeff[1])*p + coeff[2]; + psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp + + fp[i]*rhojp + fp[j]*rhoip; + recip = 1.0/r; + fpair = -psip*recip; + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul; + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEIMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEIM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h new file mode 100644 index 000000000..3693492e0 --- /dev/null +++ b/src/USER-OMP/pair_eim_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eim/omp,PairEIMOMP) + +#else + +#ifndef LMP_PAIR_EIM_OMP_H +#define LMP_PAIR_EIM_OMP_H + +#include "pair_eim.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEIMOMP : public PairEIM, public ThrOMP { + + public: + PairEIMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp similarity index 80% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_gauss_omp.cpp index 8ed82c5e5..e8b255d0b 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -1,163 +1,170 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_gauss_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EPSILON 1.0e-10 /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairGaussOMP::PairGaussOMP(LAMMPS *lmp) : + PairGauss(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairGaussOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,rsq,r2inv,forcelj,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; + int occ = 0; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; + // define a Gaussian well to be occupied if + // the site it interacts with is within the force maximum + + if (EFLAG) + if (eflag_global && rsq < 0.5/b[itype][jtype]) occ++; + if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + r = sqrt(rsq); + forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq * + exp(-b[itype][jtype]*rsq); fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) - + offset[itype][jtype]); evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } + if (eflag_global) pvector[0] = occ; } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairGaussOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairGauss::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h new file mode 100644 index 000000000..7f8fc9a85 --- /dev/null +++ b/src/USER-OMP/pair_gauss_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gauss/omp,PairGaussOMP) + +#else + +#ifndef LMP_PAIR_GAUSS_OMP_H +#define LMP_PAIR_GAUSS_OMP_H + +#include "pair_gauss.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGaussOMP : public PairGauss, public ThrOMP { + + public: + PairGaussOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp new file mode 100644 index 000000000..ff115e8ef --- /dev/null +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -0,0 +1,227 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gayberne_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "comm.h" +#include "atom_vec_ellipsoid.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) : + PairGayBerne(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGayBerneOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); + else eval<1,1,0>(f, torque, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); + else eval<1,0,0>(f, torque, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); + else eval<0,0,0>(f, torque, ifrom, ito, tid); + } + + // reduce per thread forces and torques into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; + double fforce[3],ttor[3],rtor[3],r12[3]; + double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3]; + int *ilist,*jlist,*numneigh,**firstneigh; + double *iquat,*jquat; + + double **x = atom->x; + int *ellipsoid = atom->ellipsoid; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + + AtomVecEllipsoid::Bonus *bonus = avec->bonus; + + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + + if (form[itype][itype] == ELLIPSE_ELLIPSE) { + iquat = bonus[ellipsoid[i]].quat; + MathExtra::quat_to_mat_trans(iquat,a1); + MathExtra::diag_times3(well[itype],a1,temp); + MathExtra::transpose_times3(a1,temp,b1); + MathExtra::diag_times3(shape2[itype],a1,temp); + MathExtra::transpose_times3(a1,temp,g1); + } + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + // r12 = center to center vector + + r12[0] = x[j][0]-x[i][0]; + r12[1] = x[j][1]-x[i][1]; + r12[2] = x[j][2]-x[i][2]; + rsq = MathExtra::dot3(r12,r12); + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + + switch (form[itype][jtype]) { + case SPHERE_SPHERE: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= -r2inv; + if (EFLAG) + one_eng = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + fforce[0] = r12[0]*forcelj; + fforce[1] = r12[1]*forcelj; + fforce[2] = r12[2]*forcelj; + ttor[0] = ttor[1] = ttor[2] = 0.0; + rtor[0] = rtor[1] = rtor[2] = 0.0; + break; + + case SPHERE_ELLIPSE: + jquat = bonus[ellipsoid[j]].quat; + MathExtra::quat_to_mat_trans(jquat,a2); + MathExtra::diag_times3(well[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,b2); + MathExtra::diag_times3(shape2[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,g2); + one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor); + ttor[0] = ttor[1] = ttor[2] = 0.0; + break; + + case ELLIPSE_SPHERE: + one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor); + rtor[0] = rtor[1] = rtor[2] = 0.0; + break; + + default: + jquat = bonus[ellipsoid[j]].quat; + MathExtra::quat_to_mat_trans(jquat,a2); + MathExtra::diag_times3(well[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,b2); + MathExtra::diag_times3(shape2[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,g2); + one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq, + fforce,ttor,rtor); + break; + } + + fforce[0] *= factor_lj; + fforce[1] *= factor_lj; + fforce[2] *= factor_lj; + ttor[0] *= factor_lj; + ttor[1] *= factor_lj; + ttor[2] *= factor_lj; + + f[i][0] += fforce[0]; + f[i][1] += fforce[1]; + f[i][2] += fforce[2]; + tor[i][0] += ttor[0]; + tor[i][1] += ttor[1]; + tor[i][2] += ttor[2]; + + if (NEWTON_PAIR || j < nlocal) { + rtor[0] *= factor_lj; + rtor[1] *= factor_lj; + rtor[2] *= factor_lj; + f[j][0] -= fforce[0]; + f[j][1] -= fforce[1]; + f[j][2] -= fforce[2]; + tor[j][0] += rtor[0]; + tor[j][1] += rtor[1]; + tor[j][2] += rtor[2]; + } + + if (EFLAG) evdwl = factor_lj*one_eng; + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fforce[0],fforce[1],fforce[2], + -r12[0],-r12[1],-r12[2],tid); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGayBerneOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGayBerne::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h new file mode 100644 index 000000000..737b4ec67 --- /dev/null +++ b/src/USER-OMP/pair_gayberne_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gayberne/omp,PairGayBerneOMP) + +#else + +#ifndef LMP_PAIR_GAYBERNE_OMP_H +#define LMP_PAIR_GAYBERNE_OMP_H + +#include "pair_gayberne.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGayBerneOMP : public PairGayBerne, public ThrOMP { + + public: + PairGayBerneOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp new file mode 100644 index 000000000..1866833af --- /dev/null +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -0,0 +1,298 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hertz_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) : + PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHertzHistoryOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) + if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); + else eval<1,0>(f, torque, ifrom, ito, tid); + else + if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); + else eval<0,0>(f, torque, ifrom, ito, tid); + + // reduce per thread forces and torque into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + + laststep = update->ntimestep; +} + +template +void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,fs1,fs2,fs3; + double shrmag,rsht,polyhertz; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *shear,*allshear,**firstshear; + + double **x = atom->x; + double **v = atom->v; + double **omega = atom->omega; + double *radius = atom->radius; + double *rmass = atom->rmass; + double *mass = atom->mass; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = list->listgranhistory->firstneigh; + firstshear = list->listgranhistory->firstdouble; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + touch = firsttouch[i]; + allshear = firstshear[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq >= radsum*radsum) { + + // unset non-touching neighbors + + touch[jj] = 0; + shear = &allshear[3*jj]; + shear[0] = 0.0; + shear[1] = 0.0; + shear[2] = 0.0; + + } else { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // normal force = Hertzian contact + normal velocity damping + + if (rmass) { + meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]); + if (mask[i] & freeze_group_bit) meff = rmass[j]; + if (mask[j] & freeze_group_bit) meff = rmass[i]; + } else { + itype = type[i]; + jtype = type[j]; + meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]); + if (mask[i] & freeze_group_bit) meff = mass[jtype]; + if (mask[j] & freeze_group_bit) meff = mass[itype]; + } + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + polyhertz = sqrt((radsum-r)*radi*radj / radsum); + ccel *= polyhertz; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + touch[jj] = 1; + shear = &allshear[3*jj]; + + if (SHEARUPDATE) { + shear[0] += vtr1*dt; + shear[1] += vtr2*dt; + shear[2] += vtr3*dt; + } + shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + + shear[2]*shear[2]); + + // rotate shear displacements + + rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear[0] -= rsht*delx; + shear[1] -= rsht*dely; + shear[2] -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = -polyhertz * (kt*shear[0] + meff*gammat*vtr1); + fs2 = -polyhertz * (kt*shear[1] + meff*gammat*vtr2); + fs3 = -polyhertz * (kt*shear[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + const double fnfs = fn/fs; + const double mgkt = meff*gammat/kt; + shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1; + shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2; + shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3; + fs1 *= fnfs; + fs2 *= fnfs; + fs3 *= fnfs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, + 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHertzHistoryOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHertzHistory::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h new file mode 100644 index 000000000..66d7bc0fa --- /dev/null +++ b/src/USER-OMP/pair_gran_hertz_history_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hertz/history/omp,PairGranHertzHistoryOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H +#define LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H + +#include "pair_gran_hertz_history.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP { + + public: + PairGranHertzHistoryOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp new file mode 100644 index 000000000..ad0537b51 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -0,0 +1,301 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hooke_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include "string.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) : + PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; + // trigger use of OpenMP version of FixShearHistory + suffix = new char[4]; + memcpy(suffix,"omp",4); +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHookeHistoryOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int shearupdate = (update->ntimestep > laststep) ? 1 : 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) + if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid); + else eval<1,0>(f, torque, ifrom, ito, tid); + else + if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid); + else eval<0,0>(f, torque, ifrom, ito, tid); + + // reduce per thread forces and torque into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + + laststep = update->ntimestep; +} + +template +void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,fs1,fs2,fs3; + double shrmag,rsht; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *shear,*allshear,**firstshear; + + double **x = atom->x; + double **v = atom->v; + double **omega = atom->omega; + double *radius = atom->radius; + double *rmass = atom->rmass; + double *mass = atom->mass; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = listgranhistory->firstneigh; + firstshear = listgranhistory->firstdouble; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + touch = firsttouch[i]; + allshear = firstshear[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq >= radsum*radsum) { + + // unset non-touching neighbors + + touch[jj] = 0; + shear = &allshear[3*jj]; + shear[0] = 0.0; + shear[1] = 0.0; + shear[2] = 0.0; + + } else { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // normal forces = Hookian contact + normal velocity damping + + if (rmass) { + meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]); + if (mask[i] & freeze_group_bit) meff = rmass[j]; + if (mask[j] & freeze_group_bit) meff = rmass[i]; + } else { + itype = type[i]; + jtype = type[j]; + meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]); + if (mask[i] & freeze_group_bit) meff = mass[jtype]; + if (mask[j] & freeze_group_bit) meff = mass[itype]; + } + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + touch[jj] = 1; + shear = &allshear[3*jj]; + + if (SHEARUPDATE) { + shear[0] += vtr1*dt; + shear[1] += vtr2*dt; + shear[2] += vtr3*dt; + } + shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + + shear[2]*shear[2]); + + // rotate shear displacements + + rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear[0] -= rsht*delx; + shear[1] -= rsht*dely; + shear[2] -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = - (kt*shear[0] + meff*gammat*vtr1); + fs2 = - (kt*shear[1] + meff*gammat*vtr2); + fs3 = - (kt*shear[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + const double fnfs = fn/fs; + const double mgkt = meff*gammat/kt; + shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1; + shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2; + shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3; + fs1 *= fnfs; + fs2 *= fnfs; + fs3 *= fnfs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, + 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHookeHistoryOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHookeHistory::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h new file mode 100644 index 000000000..33325025f --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_history_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/history/omp,PairGranHookeHistoryOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H +#define LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H + +#include "pair_gran_hooke_history.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP { + + public: + PairGranHookeHistoryOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp new file mode 100644 index 000000000..d6991fa45 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -0,0 +1,240 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hooke_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) : + PairGranHooke(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHookeOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) + if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid); + else eval<1,0>(f, torque, ifrom, ito, tid); + else + if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid); + else eval<0,0>(f, torque, ifrom, ito, tid); + + // reduce per thread forces and torque into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); +} + +template +void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,ft,fs1,fs2,fs3; + int *ilist,*jlist,*numneigh,**firstneigh; + + double **x = atom->x; + double **v = atom->v; + double **omega = atom->omega; + double *radius = atom->radius; + double *rmass = atom->rmass; + double *mass = atom->mass; + int *type = atom->type; + int *mask = atom->mask; + int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq < radsum*radsum) { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // normal forces = Hookian contact + normal velocity damping + + if (rmass) { + meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]); + if (mask[i] & freeze_group_bit) meff = rmass[j]; + if (mask[j] & freeze_group_bit) meff = rmass[i]; + } else { + itype = type[i]; + jtype = type[j]; + meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]); + if (mask[i] & freeze_group_bit) meff = mass[jtype]; + if (mask[j] & freeze_group_bit) meff = mass[itype]; + } + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // force normalization + + fn = xmu * fabs(ccel*r); + fs = meff*gammat*vrel; + if (vrel != 0.0) ft = MIN(fn,fs) / vrel; + else ft = 0.0; + + // tangential force due to tangential velocity damping + + fs1 = -ft*vtr1; + fs2 = -ft*vtr2; + fs3 = -ft*vtr3; + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHookeOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHooke::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h new file mode 100644 index 000000000..f2b093778 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/omp,PairGranHookeOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_OMP_H +#define LMP_PAIR_GRAN_HOOKE_OMP_H + +#include "pair_gran_hooke.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHookeOMP : public PairGranHooke, public ThrOMP { + + public: + PairGranHookeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp new file mode 100644 index 000000000..012fd596b --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -0,0 +1,299 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_hbond_dreiding_lj_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) : + PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; + hbcount_thr = hbeng_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingLJOMP::~PairHbondDreidingLJOMP() +{ + respa_enable = 0; + if (hbcount_thr) { + delete[] hbcount_thr; + delete[] hbeng_thr; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairHbondDreidingLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!hbcount_thr) { + hbcount_thr = new double[nthreads]; + hbeng_thr = new double[nthreads]; + } + + for (int i=0; i < nthreads; ++i) { + hbcount_thr[i] = 0.0; + hbeng_thr[i] = 0.0; + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); + + // reduce per thread hbond data + if (eflag_global) { + pvector[0] = 0.0; + pvector[1] = 0.0; + for (int i=0; i < nthreads; ++i) { + pvector[0] += hbcount_thr[i]; + pvector[1] += hbeng_thr[i]; + } + } +} + +template +void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; + double factor_hb,force_angle,force_kernel,evdwl,eng_lj; + double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; + double fi[3],fj[3],delr1[3],delr2[3]; + double r2inv,r10inv; + double switch1,switch2; + int *ilist,*jlist,*klist,*numneigh,**firstneigh; + Param *pm; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int **special = atom->special; + int **nspecial = atom->nspecial; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + + // ii = loop over donors + // jj = loop over acceptors + // kk = loop over hydrogens bonded to donor + + int hbcount = 0; + double hbeng = 0.0; + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + if (!donor[itype]) continue; + + klist = special[i]; + knum = nspecial[i][0]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_hb = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + jtype = type[j]; + if (!acceptor[jtype]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + for (kk = 0; kk < knum; kk++) { + k = atom->map(klist[kk]); + if (k < 0) continue; + ktype = type[k]; + m = type2param[itype][jtype][ktype]; + if (m < 0) continue; + pm = ¶ms[m]; + + if (rsq < pm->cut_outersq) { + delr1[0] = xtmp - x[k][0]; + delr1[1] = ytmp - x[k][1]; + delr1[2] = ztmp - x[k][2]; + domain->minimum_image(delr1); + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + r1 = sqrt(rsq1); + + delr2[0] = x[j][0] - x[k][0]; + delr2[1] = x[j][1] - x[k][1]; + delr2[2] = x[j][2] - x[k][2]; + domain->minimum_image(delr2); + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + ac = acos(c); + + if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // LJ-specific kernel + + r2inv = 1.0/rsq; + r10inv = r2inv*r2inv*r2inv*r2inv*r2inv; + force_kernel = r10inv*(pm->lj1*r2inv - pm->lj2)*r2inv * + pow(c,pm->ap); + force_angle = pm->ap * r10inv*(pm->lj3*r2inv - pm->lj4) * + pow(c,pm->ap-1)*s; + + eng_lj = r10inv*(pm->lj3*r2inv - pm->lj4); + if (rsq > pm->cut_innersq) { + switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * + (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / + pm->denom_vdw; + switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * + (rsq-pm->cut_innersq) / pm->denom_vdw; + force_kernel = force_kernel*switch1 + eng_lj*switch2; + eng_lj *= switch1; + } + + if (EFLAG) { + evdwl = eng_lj * pow(c,pm->ap); + evdwl *= factor_hb; + } + + a = factor_hb*force_angle/s; + b = factor_hb*force_kernel; + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + vx1 = a11*delr1[0] + a12*delr2[0]; + vx2 = a22*delr2[0] + a12*delr1[0]; + vy1 = a11*delr1[1] + a12*delr2[1]; + vy2 = a22*delr2[1] + a12*delr1[1]; + vz1 = a11*delr1[2] + a12*delr2[2]; + vz2 = a22*delr2[2] + a12*delr1[2]; + + fi[0] = vx1 + b*delx; + fi[1] = vy1 + b*dely; + fi[2] = vz1 + b*delz; + fj[0] = vx2 - b*delx; + fj[1] = vy2 - b*dely; + fj[2] = vz2 - b*delz; + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + + f[k][0] -= vx1 + vx2; + f[k][1] -= vy1 + vy2; + f[k][2] -= vz1 + vz2; + + // KIJ instead of IJK b/c delr1/delr2 are both with respect to k + + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EFLAG) { + hbcount++; + hbeng += evdwl; + } + } + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + hbcount_thr[tid] = static_cast(hbcount); + hbeng_thr[tid] = hbeng; +} + +/* ---------------------------------------------------------------------- */ + +double PairHbondDreidingLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += comm->nthreads * 2 * sizeof(double); + bytes += PairHbondDreidingLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h new file mode 100644 index 000000000..1aef78490 --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hbond/dreiding/lj/omp,PairHbondDreidingLJOMP) + +#else + +#ifndef LMP_PAIR_HBOND_DREIDING_LJ_OMP_H +#define LMP_PAIR_HBOND_DREIDING_LJ_OMP_H + +#include "pair_hbond_dreiding_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP { + + public: + PairHbondDreidingLJOMP(class LAMMPS *); + virtual ~PairHbondDreidingLJOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double *hbcount_thr, *hbeng_thr; + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp new file mode 100644 index 000000000..b6c966f8c --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -0,0 +1,297 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_hbond_dreiding_morse_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) : + PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; + hbcount_thr = hbeng_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingMorseOMP::~PairHbondDreidingMorseOMP() +{ + respa_enable = 0; + if (hbcount_thr) { + delete[] hbcount_thr; + delete[] hbeng_thr; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!hbcount_thr) { + hbcount_thr = new double[nthreads]; + hbeng_thr = new double[nthreads]; + } + + for (int i=0; i < nthreads; ++i) { + hbcount_thr[i] = 0.0; + hbeng_thr[i] = 0.0; + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); + + // reduce per thread hbond data + if (eflag_global) { + pvector[0] = 0.0; + pvector[1] = 0.0; + for (int i=0; i < nthreads; ++i) { + pvector[0] += hbcount_thr[i]; + pvector[1] += hbeng_thr[i]; + } + } +} + +template +void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; + double factor_hb,force_angle,force_kernel,evdwl; + double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; + double fi[3],fj[3],delr1[3],delr2[3]; + double r,dr,dexp,eng_morse,switch1,switch2; + int *ilist,*jlist,*klist,*numneigh,**firstneigh; + Param *pm; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int **special = atom->special; + int **nspecial = atom->nspecial; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + + // ii = loop over donors + // jj = loop over acceptors + // kk = loop over hydrogens bonded to donor + + int hbcount = 0; + double hbeng = 0.0; + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + if (!donor[itype]) continue; + + klist = special[i]; + knum = nspecial[i][0]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_hb = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + jtype = type[j]; + if (!acceptor[jtype]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + for (kk = 0; kk < knum; kk++) { + k = atom->map(klist[kk]); + if (k < 0) continue; + ktype = type[k]; + m = type2param[itype][jtype][ktype]; + if (m < 0) continue; + pm = ¶ms[m]; + + if (rsq < pm->cut_outersq) { + delr1[0] = xtmp - x[k][0]; + delr1[1] = ytmp - x[k][1]; + delr1[2] = ztmp - x[k][2]; + domain->minimum_image(delr1); + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + r1 = sqrt(rsq1); + + delr2[0] = x[j][0] - x[k][0]; + delr2[1] = x[j][1] - x[k][1]; + delr2[2] = x[j][2] - x[k][2]; + domain->minimum_image(delr2); + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + ac = acos(c); + + if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // Morse-specific kernel + + r = sqrt(rsq); + dr = r - pm->r0; + dexp = exp(-pm->alpha * dr); + force_kernel = pm->morse1*(dexp*dexp - dexp)/r * pow(c,pm->ap); + force_angle = pm->ap * eng_morse * pow(c,pm->ap-1)*s; + + eng_morse = pm->d0 * (dexp*dexp - 2.0*dexp); + if (rsq > pm->cut_innersq) { + switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * + (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / + pm->denom_vdw; + switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * + (rsq-pm->cut_innersq) / pm->denom_vdw; + force_kernel = force_kernel*switch1 + eng_morse*switch2; + eng_morse *= switch1; + } + + if (EFLAG) { + evdwl = eng_morse * pow(c,params[m].ap); + evdwl *= factor_hb; + } + + a = factor_hb*force_angle/s; + b = factor_hb*force_kernel; + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + vx1 = a11*delr1[0] + a12*delr2[0]; + vx2 = a22*delr2[0] + a12*delr1[0]; + vy1 = a11*delr1[1] + a12*delr2[1]; + vy2 = a22*delr2[1] + a12*delr1[1]; + vz1 = a11*delr1[2] + a12*delr2[2]; + vz2 = a22*delr2[2] + a12*delr1[2]; + + fi[0] = vx1 + b*delx; + fi[1] = vy1 + b*dely; + fi[2] = vz1 + b*delz; + fj[0] = vx2 - b*delx; + fj[1] = vy2 - b*dely; + fj[2] = vz2 - b*delz; + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + + f[k][0] -= vx1 + vx2; + f[k][1] -= vy1 + vy2; + f[k][2] -= vz1 + vz2; + + // KIJ instead of IJK b/c delr1/delr2 are both with respect to k + + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid); + if (EFLAG) { + hbcount++; + hbeng += evdwl; + } + } + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + hbcount_thr[tid] = static_cast(hbcount); + hbeng_thr[tid] = hbeng; +} + +/* ---------------------------------------------------------------------- */ + +double PairHbondDreidingMorseOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += comm->nthreads * 2 * sizeof(double); + bytes += PairHbondDreidingMorse::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h new file mode 100644 index 000000000..2a13c618c --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hbond/dreiding/morse/omp,PairHbondDreidingMorseOMP) + +#else + +#ifndef LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H +#define LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H + +#include "pair_hbond_dreiding_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP { + + public: + PairHbondDreidingMorseOMP(class LAMMPS *); + virtual ~PairHbondDreidingMorseOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double *hbcount_thr, *hbeng_thr; + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp similarity index 86% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj96_cut_omp.cpp index 8ed82c5e5..f0998363e 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -1,163 +1,162 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj96_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) : + PairLJ96Cut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJ96CutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + r3inv = sqrt(r6inv); + + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJ96CutOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJ96Cut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h new file mode 100644 index 000000000..333212303 --- /dev/null +++ b/src/USER-OMP/pair_lj96_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj96/cut/omp,PairLJ96CutOMP) + +#else + +#ifndef LMP_PAIR_LJ96_CUT_OMP_H +#define LMP_PAIR_LJ96_CUT_OMP_H + +#include "pair_lj96_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP { + + public: + PairLJ96CutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp similarity index 54% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp index 8ed82c5e5..32ad05acd 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -1,163 +1,213 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_charmm_coul_charmm_implicit_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) : + PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double philj,switch1,switch2; + double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; + invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - if (rsq < cutsq[itype][jtype]) { + if (rsq < cut_bothsq) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq) { + forcecoul = 2.0 * qqrd2e * qtmp*q[j]*r2inv; + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul; + switch2 = 12.0*rsq * (cut_coulsq-rsq) * + (rsq-cut_coul_innersq) * invdenom_coul; + forcecoul *= switch1 + switch2; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * invdenom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j]*r2inv; + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * + invdenom_coul; + ecoul *= switch1; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCharmmCoulCharmmImplicitOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCharmmCoulCharmmImplicit::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h new file mode 100644 index 000000000..ba016d7d3 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/charmm/implicit/omp,PairLJCharmmCoulCharmmImplicitOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H + +#include "pair_lj_charmm_coul_charmm_implicit.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit, public ThrOMP { + + public: + PairLJCharmmCoulCharmmImplicitOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp similarity index 55% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp index 8ed82c5e5..6dac7a17f 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -1,163 +1,213 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_charmm_coul_charmm_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) : + PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double philj,switch1,switch2; + double invdenom_coul,invdenom_lj; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; + invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - if (rsq < cutsq[itype][jtype]) { + if (rsq < cut_bothsq) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq) { + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul; + switch2 = 12.0*rsq * (cut_coulsq-rsq) * + (rsq-cut_coul_innersq) * invdenom_coul; + forcecoul *= switch1 + switch2; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * invdenom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * + invdenom_coul; + ecoul *= switch1; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCharmmCoulCharmmOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCharmmCoulCharmm::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h new file mode 100644 index 000000000..f2889b05f --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/charmm/omp,PairLJCharmmCoulCharmmOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H + +#include "pair_lj_charmm_coul_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP { + + public: + PairLJCharmmCoulCharmmOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp similarity index 50% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp index 8ed82c5e5..c99f27f2e 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -1,163 +1,234 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_charmm_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) : + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double philj,switch1,switch2; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) / denom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCharmmCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCharmmCoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h new file mode 100644 index 000000000..b14e4c1fe --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/long/omp,PairLJCharmmCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H + +#include "pair_lj_charmm_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP { + + public: + PairLJCharmmCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp similarity index 68% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp index 8ed82c5e5..032188279 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -1,163 +1,185 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_class2_coul_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) : + PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj; + double factor_coul,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + rinv = sqrt(r2inv); + + if (rsq < cut_coulsq[itype][jtype]) { + forcecoul = qqrd2e * qtmp*q[j]*rinv; + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJClass2CoulCutOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJClass2CoulCut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h new file mode 100644 index 000000000..5fe489569 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/coul/cut/omp,PairLJClass2CoulCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H +#define LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H + +#include "pair_lj_class2_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP { + + public: + PairLJClass2CoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp similarity index 62% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_class2_coul_long_omp.cpp index 8ed82c5e5..84d26ceb1 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -1,163 +1,201 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_class2_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) : + PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJClass2CoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJClass2CoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h new file mode 100644 index 000000000..da4ac3680 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/coul/long/omp,PairLJClass2CoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H + +#include "pair_lj_class2_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP { + + public: + PairLJClass2CoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp similarity index 86% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_class2_omp.cpp index 8ed82c5e5..4f5d2550f 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -1,163 +1,162 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_class2_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) : + PairLJClass2(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJClass2OMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + r3inv = sqrt(r6inv); + + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJClass2OMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJClass2::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h new file mode 100644 index 000000000..cfe24bb71 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/omp,PairLJClass2OMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_OMP_H +#define LMP_PAIR_LJ_CLASS2_OMP_H + +#include "pair_lj_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2OMP : public PairLJClass2, public ThrOMP { + + public: + PairLJClass2OMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp new file mode 100644 index 000000000..23e2a8d90 --- /dev/null +++ b/src/USER-OMP/pair_lj_coul_omp.cpp @@ -0,0 +1,234 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_coul_omp.h" +#include "atom.h" +#include "comm.h" +#include "math_vector.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) : + PairLJCoul(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCoulOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid) +{ + double evdwl,ecoul,fpair; + evdwl = ecoul = 0.0; + + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + double *x0 = x[0]; + double *f0 = f[0], *fi = f0; + + int *ilist = list->ilist; + + // loop over neighbors of my atoms + + int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int *jneigh, *jneighn, typei, typej, ni; + double qi, qri, *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double rsq, r2inv, force_coul, force_lj; + double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; + vector xi, d; + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (order1) qri = (qi = q[i])*qqrd2e; // initialize constants + offseti = offset[typei = type[i]]; + lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; + cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + + for (; jneigh= cutsqi[typej = type[j]]) continue; + r2inv = 1.0/rsq; + + if (order1 && (rsq < cut_coulsq)) { // coulombic + if (!ncoultablebits || rsq <= tabinnersq) { // series real space + register double r = sqrt(rsq), x = g_ewald*r; + register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s; + if (EFLAG) ecoul = t; + } + else { // special case + r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r; + if (EFLAG) ecoul = t-r; + } + } // table real space + else { + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask)>>ncoulshiftbits; + register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; + if (ni == 0) { + force_coul = qiqj*(ftable[k]+f*dftable[k]); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // special case + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } + else force_coul = ecoul = 0.0; + + if (rsq < cut_ljsqi[typej]) { // lj + if (order6) { // long-range lj + register double rn = r2inv*r2inv*r2inv; + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[typej]; + if (ni == 0) { + force_lj = + (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) + evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]; + if (EFLAG) + evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; + } + } + else { // cut lj + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + force_lj = rn*(rn*lj1i[typej]-lj2i[typej]); + if (EFLAG) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]; + } + else { // special case + register double f = special_lj[ni]; + force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]); + if (EFLAG) + evdwl = f * (rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]); + } + } + } + else force_lj = evdwl = 0.0; + + fpair = (force_coul+force_lj)*r2inv; + + if (NEWTON_PAIR || j < nlocal) { + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } + else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,d[0],d[1],d[2],tid); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCoulOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCoul::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h new file mode 100644 index 000000000..619e609ba --- /dev/null +++ b/src/USER-OMP/pair_lj_coul_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/coul/omp,PairLJCoulOMP) + +#else + +#ifndef LMP_PAIR_LJ_COUL_OMP_H +#define LMP_PAIR_LJ_COUL_OMP_H + +#include "pair_lj_coul.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCoulOMP : public PairLJCoul, public ThrOMP { + + public: + PairLJCoulOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp similarity index 79% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_cubic_omp.cpp index 8ed82c5e5..4f806bd71 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -1,163 +1,173 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_cubic_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +using namespace PairLJCubicConstants; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) : + PairLJCubic(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCubicOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,rmin; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq <= cut_inner_sq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + } else { + r = sqrt(rsq); + rmin = sigma[itype][jtype]*RT6TWO; + t = (r - cut_inner[itype][jtype])/rmin; + forcelj = epsilon[itype][jtype]*(-DPHIDS + A3*t*t/2.0)*r/rmin; + } fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + if (rsq <= cut_inner_sq[itype][jtype]) + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + else + evdwl = epsilon[itype][jtype]* + (PHIS + DPHIDS*t - A3*t*t*t/6.0); + evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCubicOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCubic::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h new file mode 100644 index 000000000..559a6125a --- /dev/null +++ b/src/USER-OMP/pair_lj_cubic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cubic/omp,PairLJCubicOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUBIC_OMP_H +#define LMP_PAIR_LJ_CUBIC_OMP_H + +#include "pair_lj_cubic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCubicOMP : public PairLJCubic, public ThrOMP { + + public: + PairLJCubicOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp similarity index 69% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp index 8ed82c5e5..be98ec38f 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -1,163 +1,183 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_cut_coul_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) : + PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCutCoulCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rinv,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq[itype][jtype]) { + rinv = sqrt(r2inv); + forcecoul = qqrd2e * qtmp*q[j]*rinv; + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } + } else evdwl = 0.0; if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCutCoulCutOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCutCoulCut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h new file mode 100644 index 000000000..c8c34e259 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/cut/omp,PairLJCutCoulCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H + +#include "pair_lj_cut_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP { + + public: + PairLJCutCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp similarity index 67% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp index 8ed82c5e5..13a4a1906 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -1,163 +1,186 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_cut_coul_debye_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) : + PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double r,rinv,screening; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + + if (rsq < cut_coulsq[itype][jtype]) { + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv); + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCutCoulDebyeOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCutCoulDebye::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h new file mode 100644 index 000000000..00cf540be --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/debye/omp,PairLJCutCoulDebyeOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H + +#include "pair_lj_cut_coul_debye.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP { + + public: + PairLJCutCoulDebyeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp similarity index 54% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_cut_coul_long_omp.cpp index 8ed82c5e5..1d8f977c9 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -1,163 +1,220 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_cut_coul_long_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) : + PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJCutCoulLongOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid) { - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJCutCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJCutCoulLong::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h new file mode 100644 index 000000000..ac408ba88 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/omp,PairLJCutCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H + +#include "pair_lj_cut_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP { + + public: + PairLJCutCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp new file mode 100644 index 000000000..6ada944c5 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp @@ -0,0 +1,462 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_long_tip4p_omp.h" +#include "atom.h" +#include "domain.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "error.h" +#include "memory.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) : + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; + + // for caching m-shift corrected positions + maxmpos = 0; + h1idx = h2idx = NULL; + mpos = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongTIP4POMP::~PairLJCutCoulLongTIP4POMP() +{ + memory->destroy(h1idx); + memory->destroy(h2idx); + memory->destroy(mpos); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + // reallocate per-atom arrays, if necessary + if (nall > maxmpos) { + maxmpos = nall; + memory->grow(mpos,maxmpos,3,"pair:mpos"); + memory->grow(h1idx,maxmpos,"pair:h1idx"); + memory->grow(h2idx,maxmpos,"pair:h2idx"); + } + + // cache corrected M positions in mpos[] + double **x = atom->x; + int *type = atom->type; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,h1idx[i],h2idx[i],mpos[i]); + } else { + mpos[i][0] = x[i][0]; + mpos[i][1] = x[i][1]; + mpos[i][2] = x[i][2]; + } + } + for (int i = nlocal; i < nall; i++) { + if (type[i] == typeO) { + find_M_permissive(i,h1idx[i],h2idx[i],mpos[i]); + } else { + mpos[i][0] = x[i][0]; + mpos[i][1] = x[i][1]; + mpos[i][2] = x[i][2]; + } + } + + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (vflag) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (vflag) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + int n,vlist[6]; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double fraction,table; + double delxOM,delyOM,delzOM; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc,ddotf; + double v[6],xH1[3],xH2[3]; + double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz; + double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + x1 = mpos[i]; + iH1 = h1idx[i]; + iH2 = h2idx[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + + r2inv = 1.0/rsq; + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, + evdwl,0.0,forcelj,delx,dely,delz,tid); + } + + // adjust rsq and delxyz for off-site O charge(s) + + if (itype == typeO || jtype == typeO) { + x2 = mpos[j]; + jH1 = h1idx[j]; + jH2 = h2idx[j]; + if (jtype == typeO && ( jH1 < 0 || jH2 < 0 )) + error->one(FLERR,"TIP4P hydrogen is missing"); + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // test current rsq against cutoff and compute Coulombic force + + if (rsq < cut_coulsq) { + r2inv = 1 / rsq; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + cforce = forcecoul * r2inv; + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (VFLAG) { + v[0] = x[i][0] * delx * cforce; + v[1] = x[i][1] * dely * cforce; + v[2] = x[i][2] * delz * cforce; + v[3] = x[i][0] * dely * cforce; + v[4] = x[i][0] * delz * cforce; + v[5] = x[i][1] * delz * cforce; + vlist[n++] = i; + } + + } else { + + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + delxOM = x[i][0] - x1[0]; + delyOM = x[i][1] - x1[1]; + delzOM = x[i][2] - x1[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1][0] += fHx; + f[iH1][1] += fHy; + f[iH1][2] += fHz; + + f[iH2][0] += fHx; + f[iH2][1] += fHy; + f[iH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[i],x[iH1],xH1); + domain->closest_image(x[i],x[iH2],xH2); + + v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (VFLAG) { + v[0] -= x[j][0] * delx * cforce; + v[1] -= x[j][1] * dely * cforce; + v[2] -= x[j][2] * delz * cforce; + v[3] -= x[j][0] * dely * cforce; + v[4] -= x[j][0] * delz * cforce; + v[5] -= x[j][1] * delz * cforce; + vlist[n++] = j; + } + + } else { + + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; + + delxOM = x[j][0] - x2[0]; + delyOM = x[j][1] - x2[1]; + delzOM = x[j][2] - x2[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + f[j][0] += fOx; + f[j][1] += fOy; + f[j][2] += fOz; + + f[jH1][0] += fHx; + f[jH1][1] += fHy; + f[jH1][2] += fHz; + + f[jH2][0] += fHx; + f[jH2][1] += fHy; + f[jH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[j],x[jH1],xH1); + domain->closest_image(x[j],x[jH2],xH2); + + v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + } + + if (EFLAG) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid); + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongTIP4POMP::find_M_permissive(int i, int &iH1, int &iH2, double *xM) +{ + // test that O is correctly bonded to 2 succesive H atoms + + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) + return; + else + find_M(i,iH1,iH2,xM); +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulLongTIP4POMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLongTIP4P::memory_usage(); + bytes += 2 * maxmpos * sizeof(int); + bytes += 3 * maxmpos * sizeof(double); + bytes += maxmpos * sizeof(double *); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h new file mode 100644 index 000000000..093fc0216 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h @@ -0,0 +1,57 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/tip4p/omp,PairLJCutCoulLongTIP4POMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H + +#include "pair_lj_cut_coul_long_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { + + public: + PairLJCutCoulLongTIP4POMP(class LAMMPS *); + virtual ~PairLJCutCoulLongTIP4POMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + + // this is to cache m-shift corrected positions. + int maxmpos; // size of the following arrays + int *h1idx, *h2idx; // local index of hydrogen atoms + double **mpos; // coordinates corrected for m-shift. + void find_M_permissive(int, int &, int &, double *); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp index 8ed82c5e5..3d82149fe 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -1,163 +1,160 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_lj_cut_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : PairLJCut(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ void PairLJCutOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ double PairLJCutOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairLJCut::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp similarity index 87% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_expand_omp.cpp index 8ed82c5e5..7b06503ee 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -1,163 +1,164 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_expand_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) : + PairLJExpand(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJExpandOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,rshift,rshiftsq; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; + r = sqrt(rsq); + rshift = r - shift[itype][jtype]; + rshiftsq = rshift*rshift; + r2inv = 1.0/rshiftsq; r6inv = r2inv*r2inv*r2inv; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + fpair = factor_lj*forcelj/rshift/r; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJExpandOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJExpand::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h new file mode 100644 index 000000000..29488deae --- /dev/null +++ b/src/USER-OMP/pair_lj_expand_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/omp,PairLJExpandOMP) + +#else + +#ifndef LMP_PAIR_LJ_EXPAND_OMP_H +#define LMP_PAIR_LJ_EXPAND_OMP_H + +#include "pair_lj_expand.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJExpandOMP : public PairLJExpand, public ThrOMP { + + public: + PairLJExpandOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp similarity index 57% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp index 8ed82c5e5..2e97fa1b5 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -1,163 +1,210 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_gromacs_coul_gromacs_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) : + PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double r,tlj,tc,fswitch,fswitchcoul,eswitch,ecoulswitch; int *ilist,*jlist,*numneigh,**firstneigh; - evdwl = 0.0; + evdwl = ecoul = 0.0; double **x = atom->x; + double *q = atom->q; int *type = atom->type; int nlocal = atom->nlocal; + double *special_coul = force->special_coul; double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; + qtmp = q[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + + // skip if qi or qj = 0.0 since this potential may be used as + // coarse-grain model with many uncharged atoms + + if (rsq < cut_coulsq && qtmp != 0.0 && q[j] != 0.0) { + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + r = sqrt(rsq); + tc = r - cut_coul_inner; + fswitchcoul = qqrd2e * qtmp*q[j]*r*tc*tc*(coulsw1 + coulsw2*tc); + forcecoul += fswitchcoul; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + r = sqrt(rsq); + tlj = r - cut_lj_inner; + fswitch = r*tlj*tlj*(ljsw1[itype][jtype] + + ljsw2[itype][jtype]*tlj); + forcelj += fswitch; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j] * (sqrt(r2inv) - coulsw5); + if (rsq > cut_coul_innersq) { + ecoulswitch = tc*tc*tc * (coulsw3 + coulsw4*tc); + ecoul += qqrd2e*qtmp*q[j]*ecoulswitch; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + evdwl += ljsw5[itype][jtype]; + if (rsq > cut_lj_innersq) { + eswitch = tlj*tlj*tlj * + (ljsw3[itype][jtype] + ljsw4[itype][jtype]*tlj); + evdwl += eswitch; + } + evdwl *= factor_lj; + } else evdwl = 0.0; } - + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,tid); + evdwl,ecoul,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJGromacsCoulGromacsOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJGromacsCoulGromacs::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h new file mode 100644 index 000000000..d789bd679 --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/gromacs/coul/gromacs/omp,PairLJGromacsCoulGromacsOMP) + +#else + +#ifndef LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H +#define LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H + +#include "pair_lj_gromacs_coul_gromacs.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrOMP { + + public: + PairLJGromacsCoulGromacsOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp similarity index 80% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_gromacs_omp.cpp index 8ed82c5e5..f1c7d2faf 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -1,163 +1,172 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_gromacs_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) : + PairLJGromacs(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJGromacsOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,fswitch,eswitch; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_inner_sq[itype][jtype]) { + r = sqrt(rsq); + t = r - cut_inner[itype][jtype]; + fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t); + forcelj += fswitch; + } + fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + evdwl += ljsw5[itype][jtype]; + if (rsq > cut_inner_sq[itype][jtype]) { + eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t); + evdwl += eswitch; + } evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJGromacsOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJGromacs::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h new file mode 100644 index 000000000..d192a414e --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/gromacs/omp,PairLJGromacsOMP) + +#else + +#ifndef LMP_PAIR_LJ_GROMACS_OMP_H +#define LMP_PAIR_LJ_GROMACS_OMP_H + +#include "pair_lj_gromacs.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP { + + public: + PairLJGromacsOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp similarity index 83% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_sf_omp.cpp index 8ed82c5e5..55ee908e4 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_omp.cpp @@ -1,163 +1,163 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_sf_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) : + PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJShiftedForceOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double t,rsq,r2inv,r6inv,forcelj,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; r6inv = r2inv*r2inv*r2inv; + t = sqrt(r2inv*cutsq[itype][jtype]); + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - + t*foffset[itype][jtype]; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + + (t-1.0)*foffset[itype][jtype] - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJShiftedForceOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJShiftedForce::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h new file mode 100644 index 000000000..6fba43fb8 --- /dev/null +++ b/src/USER-OMP/pair_lj_sf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sf/omp,PairLJShiftedForceOMP) + +#else + +#ifndef LMP_PAIR_LJ_SF_OMP_H +#define LMP_PAIR_LJ_SF_OMP_H + +#include "pair_lj_sf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP { + + public: + PairLJShiftedForceOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp similarity index 76% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_lj_smooth_omp.cpp index 8ed82c5e5..1ad88044a 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -1,163 +1,176 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_lj_smooth_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) : + PairLJSmooth(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairLJSmoothOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,tsq,fskin; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq < cut_inner_sq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]); + } else { + r = sqrt(rsq); + t = r - cut_inner[itype][jtype]; + tsq = t*t; + fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t + + ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t; + forcelj = fskin*r; + } + fpair = factor_lj*forcelj*r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + if (rsq < cut_inner_sq[itype][jtype]) + evdwl = r6inv * (lj3[itype][jtype]*r6inv - + lj4[itype][jtype]) - offset[itype][jtype]; + else + evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t - + ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 - + ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairLJSmoothOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairLJSmooth::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h new file mode 100644 index 000000000..de27a4008 --- /dev/null +++ b/src/USER-OMP/pair_lj_smooth_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/smooth/omp,PairLJSmoothOMP) + +#else + +#ifndef LMP_PAIR_LJ_SMOOTH_OMP_H +#define LMP_PAIR_LJ_SMOOTH_OMP_H + +#include "pair_lj_smooth.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP { + + public: + PairLJSmoothOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp new file mode 100644 index 000000000..d45e0bf1b --- /dev/null +++ b/src/USER-OMP/pair_lubricate_omp.cpp @@ -0,0 +1,328 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lubricate_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "update.h" +#include "neighbor.h" +#include "random_mars.h" +#include "neigh_list.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) : + PairLubricate(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairLubricateOMP::~PairLubricateOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLubricateOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + random_thr[0] = random; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); + else eval<1,1,0>(f, torque, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); + else eval<1,0,0>(f, torque, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); + else eval<0,0,0>(f, torque, ifrom, ito, tid); + } + + // reduce per thread forces and torques into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairLubricateOMP::eval(double **f, double **torque, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fpair,fx,fy,fz,tx,ty,tz; + double rsq,r,h_sep,radi,tfmag; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3; + double vt1,vt2,vt3,w1,w2,w3,v_shear1,v_shear2,v_shear3; + double omega_t_1,omega_t_2,omega_t_3; + double n_cross_omega_t_1,n_cross_omega_t_2,n_cross_omega_t_3; + double wr1,wr2,wr3,wnnr,wn1,wn2,wn3; + double P_dot_wrel_1,P_dot_wrel_2,P_dot_wrel_3; + double a_squeeze,a_shear,a_pump,a_twist; + int *ilist,*jlist,*numneigh,**firstneigh; + + double **x = atom->x; + double **v = atom->v; + double **omega = atom->omega; + double *radius = atom->radius; + int *type = atom->type; + int nlocal = atom->nlocal; + double vxmu2f = force->vxmu2f; + RanMars &rng = *random_thr[tid]; + + double prethermostat = sqrt(2.0 * force->boltz * t_target / update->dt); + prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e); + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + a_squeeze = a_shear = a_pump = a_twist = 0.0; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + + r = sqrt(rsq); + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component N.(v1-v2) = nn.(v1-v2) + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vnnr /= r; + vn1 = delx*vnnr / r; + vn2 = dely*vnnr / r; + vn3 = delz*vnnr / r; + + // tangential component -P.(v1-v2) + // P = (I - nn) where n is vector between centers + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // additive rotational velocity = omega_1 + omega_2 + + w1 = omega[i][0] + omega[j][0]; + w2 = omega[i][1] + omega[j][1]; + w3 = omega[i][2] + omega[j][2]; + + // relative velocities n X P . (v1-v2) = n X (I-nn) . (v1-v2) + + v_shear1 = (dely*vt3 - delz*vt2) / r; + v_shear2 = -(delx*vt3 - delz*vt1) / r; + v_shear3 = (delx*vt2 - dely*vt1) / r; + + // relative rotation rate P.(omega1 + omega2) + + omega_t_1 = w1 - delx*(delx*w1) / rsq; + omega_t_2 = w2 - dely*(dely*w2) / rsq; + omega_t_3 = w3 - delz*(delz*w3) / rsq; + + // n X omega_t + + n_cross_omega_t_1 = (dely*omega_t_3 - delz*omega_t_2) / r; + n_cross_omega_t_2 = -(delx*omega_t_3 - delz*omega_t_1) / r; + n_cross_omega_t_3 = (delx*omega_t_2 - dely*omega_t_1) / r; + + // N.(w1-w2) and P.(w1-w2) + + wr1 = omega[i][0] - omega[j][0]; + wr2 = omega[i][1] - omega[j][1]; + wr3 = omega[i][2] - omega[j][2]; + + wnnr = wr1*delx + wr2*dely + wr3*delz; + wn1 = delx*wnnr / rsq; + wn2 = dely*wnnr / rsq; + wn3 = delz*wnnr / rsq; + + P_dot_wrel_1 = wr1 - delx*(delx*wr1)/rsq; + P_dot_wrel_2 = wr2 - dely*(dely*wr2)/rsq; + P_dot_wrel_3 = wr3 - delz*(delz*wr3)/rsq; + + // compute components of pair-hydro + + h_sep = r - 2.0*radi; + + if (flag1) + a_squeeze = (3.0*MY_PI*mu*2.0*radi/2.0) * (2.0*radi/4.0/h_sep); + if (flag2) + a_shear = (MY_PI*mu*2.*radi/2.0) * + log(2.0*radi/2.0/h_sep)*(2.0*radi+h_sep)*(2.0*radi+h_sep)/4.0; + if (flag3) + a_pump = (MY_PI*mu*pow(2.0*radi,4)/8.0) * + ((3.0/20.0) * log(2.0*radi/2.0/h_sep) + + (63.0/250.0) * (h_sep/2.0/radi) * log(2.0*radi/2.0/h_sep)); + if (flag4) + a_twist = (MY_PI*mu*pow(2.0*radi,4)/4.0) * + (h_sep/2.0/radi) * log(2.0/(2.0*h_sep)); + + if (h_sep >= cut_inner[itype][jtype]) { + fx = -a_squeeze*vn1 - a_shear*(2.0/r)*(2.0/r)*vt1 + + (2.0/r)*a_shear*n_cross_omega_t_1; + fy = -a_squeeze*vn2 - a_shear*(2.0/r)*(2.0/r)*vt2 + + (2.0/r)*a_shear*n_cross_omega_t_2; + fz = -a_squeeze*vn3 - a_shear*(2.0/r)*(2.0/r)*vt3 + + (2.0/r)*a_shear*n_cross_omega_t_3; + fx *= vxmu2f; + fy *= vxmu2f; + fz *= vxmu2f; + + // add in thermostat force + + tfmag = prethermostat*sqrt(a_squeeze)*(rng.uniform()-0.5); + fx -= tfmag * delx/r; + fy -= tfmag * dely/r; + fz -= tfmag * delz/r; + + tx = -(2.0/r)*a_shear*v_shear1 - a_shear*omega_t_1 - + a_pump*P_dot_wrel_1 - a_twist*wn1; + ty = -(2.0/r)*a_shear*v_shear2 - a_shear*omega_t_2 - + a_pump*P_dot_wrel_2 - a_twist*wn2; + tz = -(2.0/r)*a_shear*v_shear3 - a_shear*omega_t_3 - + a_pump*P_dot_wrel_3 - a_twist*wn3; + torque[i][0] += vxmu2f * tx; + torque[i][1] += vxmu2f * ty; + torque[i][2] += vxmu2f * tz; + + } else { + a_squeeze = (3.0*MY_PI*mu*2.0*radi/2.0) * + (2.0*radi/4.0/cut_inner[itype][jtype]); + fpair = -a_squeeze*vnnr; + fpair *= vxmu2f; + + // add in thermostat force + + fpair -= prethermostat*sqrt(a_squeeze)*(rng.uniform()-0.5); + + fx = fpair * delx/r; + fy = fpair * dely/r; + fz = fpair * delz/r; + } + + f[i][0] += fx; + f[i][1] += fy; + f[i][2] += fz; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + + if (h_sep >= cut_inner[itype][jtype]) { + tx = -(2.0/r)*a_shear*v_shear1 - a_shear*omega_t_1 + + a_pump*P_dot_wrel_1 + a_twist*wn1; + ty = -(2.0/r)*a_shear*v_shear2 - a_shear*omega_t_2 + + a_pump*P_dot_wrel_2 + a_twist*wn2; + tz = -(2.0/r)*a_shear*v_shear3 - a_shear*omega_t_3 + + a_pump*P_dot_wrel_3 + a_twist*wn3; + torque[j][0] += vxmu2f * tx; + torque[j][1] += vxmu2f * ty; + torque[j][2] += vxmu2f * tz; + } + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + 0.0,0.0,fx,fy,fz,delx,dely,delz,tid); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLubricateOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLubricate::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lubricate_omp.h b/src/USER-OMP/pair_lubricate_omp.h new file mode 100644 index 000000000..d36d19046 --- /dev/null +++ b/src/USER-OMP/pair_lubricate_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lubricate/omp,PairLubricateOMP) + +#else + +#ifndef LMP_PAIR_LUBRICATE_OMP_H +#define LMP_PAIR_LUBRICATE_OMP_H + +#include "pair_lubricate.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLubricateOMP : public PairLubricate, public ThrOMP { + + public: + PairLubricateOMP(class LAMMPS *); + virtual ~PairLubricateOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp similarity index 83% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_morse_omp.cpp index 8ed82c5e5..a53e35a97 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -1,163 +1,160 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_morse_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairMorseOMP::PairMorseOMP(LAMMPS *lmp) : + PairMorse(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairMorseOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r,dr,dexp,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + dr = r - r0[itype][jtype]; + dexp = exp(-alpha[itype][jtype] * dr); + fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) - + offset[itype][jtype]; evdwl *= factor_lj; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairMorseOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairMorse::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h new file mode 100644 index 000000000..a966e6f11 --- /dev/null +++ b/src/USER-OMP/pair_morse_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(morse/omp,PairMorseOMP) + +#else + +#ifndef LMP_PAIR_MORSE_OMP_H +#define LMP_PAIR_MORSE_OMP_H + +#include "pair_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairMorseOMP : public PairMorse, public ThrOMP { + + public: + PairMorseOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp new file mode 100644 index 000000000..7cb1e8308 --- /dev/null +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -0,0 +1,456 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "float.h" +#include "pair_peri_lps_omp.h" +#include "fix.h" +#include "fix_peri_neigh.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "lattice.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) : + PairPeriLPS(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriLPSOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow bond forces array if necessary + + if (atom->nmax > nmax) { + memory->destroy(s0_new); + memory->destroy(theta); + nmax = atom->nmax; + memory->create(s0_new,nmax,"pair:s0_new"); + memory->create(theta,nmax,"pair:theta"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0; + double rsq,r,dr,rk,evdwl,fpair,fbond; + int *ilist,*jlist,*numneigh,**firstneigh; + double d_ij,delta,stretch; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + + double *vfrac = atom->vfrac; + double *s0 = atom->s0; + double **x0 = atom->x0; + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume; + + // lc = lattice constant + // init_style guarantees it's the same in x, y, and z + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + double vfrac_scale = 1.0; + + // short-range forces + + int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + // need minimg() for x0 difference since not ghosted + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0; + jtype = type[j]; + + r = sqrt(rsq); + + // short-range interaction distance based on initial particle position + // 0.9 and 1.35 are constants + + d_ij = MIN(0.9*sqrt(rsq0),1.35*lc); + + // short-range contact forces + // 15 is constant taken from the EMU Theory Manual + // Silling, 12 May 2005, p 18 + + if (r < d_ij) { + dr = r - d_ij; + + // kshort based upon short-range force constant + // of the bond-based theory used in PMB model + + double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) / + (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]); + rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]); + + if (r > 0.0) fpair = -(rk/r); + else fpair = 0.0; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair*vfrac[i],delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + + // wait until all threads are done since we + // need to distribute the work differently. + sync_threads(); + +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + nlocal/comm->nthreads; + iifrom = tid*idelta; + iito = iifrom + idelta; + if (iito > nlocal) + iito = nlocal; +#else + iifrom = 0; + iito = nlocal; +#endif + + // Compute the dilatation on each particle + compute_dilatation_thr(iifrom, iito); + + // wait until all threads are done before communication + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { // communicate dilatation (theta) of each particle + comm->forward_comm_pair(this); + // communicate wighted volume (wvolume) upon every reneighbor + if (neighbor->ago == 0) + comm->forward_comm_fix(modify->fix[ifix_peri]); + } + + sync_threads(); + + // Volume-dependent part of the energy + if (EFLAG) { + for (i = iifrom; i < iito; i++) { + itype = type[i]; + if (eflag_global) + eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); + if (eflag_atom) + eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]); + } + } + + // loop over my particles and their partners + // partner list contains all bond partners, so I-J appears twice + // if bond already broken, skip this partner + // first = true if this is first neighbor of particle i + + bool first; + double omega_minus, omega_plus; + + for (i = iifrom; i < iito; ++i) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jnum = npartner[i]; + first = true; + + for (jj = 0; jj < jnum; jj++) { + if (partner[i][jj] == 0) continue; + j = atom->map(partner[i][jj]); + + // check if lost a partner without first breaking bond + + if (j < 0) { + partner[i][jj] = 0; + continue; + } + + // compute force density, add to PD equation of motion + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + jtype = type[j]; + delta = cut[itype][jtype]; + r = sqrt(rsq); + dr = r - r0[i][jj]; + + // avoid roundoff errors + + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + omega_plus = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0); + omega_minus = influence_function(delx0,dely0,delz0); + rk = ( (3.0 * bulkmodulus[itype][itype]) - + (5.0 * shearmodulus[itype][itype]) ) * vfrac[j] * vfrac_scale * + ( (omega_plus * theta[i] / wvolume[i]) + + ( omega_minus * theta[j] / wvolume[j] ) ) * r0[i][jj]; + rk += 15.0 * ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) * + ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * dr; + + if (r > 0.0) fbond = -(rk/r); + else fbond = 0.0; + + f[i][0] += delx*fbond; + f[i][1] += dely*fbond; + f[i][2] += delz*fbond; + + // since I-J is double counted, set newton off & use 1/2 factor and I,I + + double deviatoric_extension = dr - (theta[i]* r0[i][jj] / 3.0); + if (EFLAG) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) * + omega_plus*(deviatoric_extension * deviatoric_extension) * + vfrac[j] * vfrac_scale; + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, + 0.5*fbond*vfrac[i],delx,dely,delz,tid); + + // find stretch in bond I-J and break if necessary + // use s0 from previous timestep + + stretch = dr / r0[i][jj]; + if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0; + + // update s0 for next timestep + + if (first) + s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); + else + s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + + first = false; + } + } + + sync_threads(); + + // store new s0 (in parallel) + for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriLPSOMP::compute_dilatation_thr(int ifrom, int ito) +{ + int i,j,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0; + double rsq,r,dr; + double delta; + + double **x = atom->x; + int *type = atom->type; + double **x0 = atom->x0; + double *vfrac = atom->vfrac; + double vfrac_scale = 1.0; + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume; + + int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic; + + // compute the dilatation theta + + for (i = ifrom; i < ito; i++) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + jnum = npartner[i]; + theta[i] = 0.0; + itype = type[i]; + + for (jj = 0; jj < jnum; jj++) { + + // if bond already broken, skip this partner + if (partner[i][jj] == 0) continue; + + // Look up local index of this partner particle + j = atom->map(partner[i][jj]); + + // Skip if particle is "lost" + if (j < 0) continue; + + // Compute force density and add to PD equation of motion + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + + r = sqrt(rsq); + dr = r - r0[i][jj]; + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + jtype = type[j]; + delta = cut[itype][jtype]; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr * + vfrac[j] * vfrac_scale; + } + + // if wvolume[i] is zero, then particle i has no bonds + // therefore, the dilatation is set to + + if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i]; + else theta[i] = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairPeriLPSOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairPeriLPS::memory_usage(); + + return bytes; +} + diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h new file mode 100644 index 000000000..2068830ca --- /dev/null +++ b/src/USER-OMP/pair_peri_lps_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(peri/lps/omp,PairPeriLPSOMP) + +#else + +#ifndef LMP_PAIR_PERI_LPS_OMP_H +#define LMP_PAIR_PERI_LPS_OMP_H + +#include "pair_peri_lps.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP { + + public: + PairPeriLPSOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + void compute_dilatation_thr(int ifrom, int ito); + + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp new file mode 100644 index 000000000..4e46d142d --- /dev/null +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -0,0 +1,312 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "float.h" +#include "pair_peri_pmb_omp.h" +#include "fix.h" +#include "fix_peri_neigh.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "lattice.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) : + PairPeriPMB(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriPMBOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow bond forces array if necessary + + if (atom->nmax > nmax) { + memory->destroy(s0_new); + nmax = atom->nmax; + memory->create(s0_new,nmax,"pair:s0_new"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); + else eval<0,0,0>(f, ifrom, ito, tid); + } + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0; + double rsq,r,dr,rk,evdwl,fpair,fbond; + int *ilist,*jlist,*numneigh,**firstneigh; + double d_ij,delta,stretch; + + evdwl = 0.0; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + + double *vfrac = atom->vfrac; + double *s0 = atom->s0; + double **x0 = atom->x0; + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + + // lc = lattice constant + // init_style guarantees it's the same in x, y, and z + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + double vfrac_scale = 1.0; + + // short-range forces + + int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + // need minimg() for x0 difference since not ghosted + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0; + jtype = type[j]; + + r = sqrt(rsq); + + // short-range interaction distance based on initial particle position + // 0.9 and 1.35 are constants + + d_ij = MIN(0.9*sqrt(rsq0),1.35*lc); + + // short-range contact forces + // 15 is constant taken from the EMU Theory Manual + // Silling, 12 May 2005, p 18 + + if (r < d_ij) { + dr = r - d_ij; + + rk = (15.0 * kspring[itype][jtype] * vfrac[j]) * + (dr / cut[itype][jtype]); + if (r > 0.0) fpair = -(rk/r); + else fpair = 0.0; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair*vfrac[i],delx,dely,delz,tid); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + + // wait until all threads are done since we + // need to distribute the work differently. + sync_threads(); + +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + nlocal/comm->nthreads; + iifrom = tid*idelta; + iito = iifrom + idelta; + if (iito > nlocal) + iito = nlocal; +#else + iifrom = 0; + iito = nlocal; +#endif + + // loop over my particles and their partners + // partner list contains all bond partners, so I-J appears twice + // if bond already broken, skip this partner + // first = true if this is first neighbor of particle i + + bool first; + + for (i = iifrom; i < iito; ++i) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jnum = npartner[i]; + s0_new[i] = DBL_MAX; + first = true; + + for (jj = 0; jj < jnum; jj++) { + if (partner[i][jj] == 0) continue; + j = atom->map(partner[i][jj]); + + // check if lost a partner without first breaking bond + + if (j < 0) { + partner[i][jj] = 0; + continue; + } + + // compute force density, add to PD equation of motion + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + delta = cut[itype][jtype]; + r = sqrt(rsq); + dr = r - r0[i][jj]; + + // avoid roundoff errors + + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + stretch = dr / r0[i][jj]; + rk = (kspring[itype][jtype] * vfrac[j]) * vfrac_scale * stretch; + if (r > 0.0) fbond = -(rk/r); + else fbond = 0.0; + + f[i][0] += delx*fbond; + f[i][1] += dely*fbond; + f[i][2] += delz*fbond; + + // since I-J is double counted, set newton off & use 1/2 factor and I,I + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) + ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, + 0.5*fbond*vfrac[i],delx,dely,delz,tid); + + // find stretch in bond I-J and break if necessary + // use s0 from previous timestep + + if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0; + + // update s0 for next timestep + + if (first) + s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); + else + s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + first = false; + } + } + + sync_threads(); + + // store new s0 + for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; +} + +/* ---------------------------------------------------------------------- */ + +double PairPeriPMBOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairPeriPMB::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h new file mode 100644 index 000000000..9940e5ed1 --- /dev/null +++ b/src/USER-OMP/pair_peri_pmb_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(peri/pmb/omp,PairPeriPMBOMP) + +#else + +#ifndef LMP_PAIR_PERI_PMB_OMP_H +#define LMP_PAIR_PERI_PMB_OMP_H + +#include "pair_peri_pmb.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP { + + public: + PairPeriPMBOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_rebo_omp.cpp b/src/USER-OMP/pair_rebo_omp.cpp new file mode 100644 index 000000000..70b5c4e8a --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.cpp @@ -0,0 +1,33 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_rebo_omp.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairREBOOMP::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); + + cutlj = 0.0; + ljflag = torflag = 0; +} diff --git a/src/USER-OMP/pair_rebo_omp.h b/src/USER-OMP/pair_rebo_omp.h new file mode 100644 index 000000000..4606e56ae --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.h @@ -0,0 +1,36 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(rebo/omp,PairREBOOMP) + +#else + +#ifndef LMP_PAIR_REBO_OMP_H +#define LMP_PAIR_REBO_OMP_H + +#include "pair_airebo_omp.h" + +namespace LAMMPS_NS { + +class PairREBOOMP : public PairAIREBOOMP { + public: + PairREBOOMP(class LAMMPS *); + virtual void settings(int, char **); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp new file mode 100644 index 000000000..487055305 --- /dev/null +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -0,0 +1,210 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_resquared_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "comm.h" +#include "atom_vec_ellipsoid.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) : + PairRESquared(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairRESquaredOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f, **torque; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + torque = atom->torque + tid*nall; + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid); + else eval<1,1,0>(f, torque, ifrom, ito, tid); + } else { + if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid); + else eval<1,0,0>(f, torque, ifrom, ito, tid); + } + } else { + if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid); + else eval<0,0,0>(f, torque, ifrom, ito, tid); + } + + // reduce per thread forces and torques into global arrays. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid) +{ + int i,j,ii,jj,jnum,itype,jtype; + double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; + double fforce[3],ttor[3],rtor[3],r12[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + RE2Vars wi,wj; + + double **x = atom->x; + int *ellipsoid = atom->ellipsoid; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + + // not a LJ sphere + + if (lshape[itype] != 0.0) precompute_i(i,wi); + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + // r12 = center to center vector + + r12[0] = x[j][0]-x[i][0]; + r12[1] = x[j][1]-x[i][1]; + r12[2] = x[j][2]-x[i][2]; + rsq = MathExtra::dot3(r12,r12); + jtype = type[j]; + + // compute if less than cutoff + + if (rsq < cutsq[itype][jtype]) { + switch (form[itype][jtype]) { + + case SPHERE_SPHERE: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= -r2inv; + if (EFLAG) one_eng = + r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + fforce[0] = r12[0]*forcelj; + fforce[1] = r12[1]*forcelj; + fforce[2] = r12[2]*forcelj; + break; + + case SPHERE_ELLIPSE: + precompute_i(j,wj); + if (NEWTON_PAIR || j < nlocal) { + one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,true); + tor[j][0] += rtor[0]*factor_lj; + tor[j][1] += rtor[1]*factor_lj; + tor[j][2] += rtor[2]*factor_lj; + } else + one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false); + break; + + case ELLIPSE_SPHERE: + one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true); + tor[i][0] += ttor[0]*factor_lj; + tor[i][1] += ttor[1]*factor_lj; + tor[i][2] += ttor[2]*factor_lj; + break; + + default: + precompute_i(j,wj); + one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor); + tor[i][0] += ttor[0]*factor_lj; + tor[i][1] += ttor[1]*factor_lj; + tor[i][2] += ttor[2]*factor_lj; + if (NEWTON_PAIR || j < nlocal) { + tor[j][0] += rtor[0]*factor_lj; + tor[j][1] += rtor[1]*factor_lj; + tor[j][2] += rtor[2]*factor_lj; + } + break; + } + + fforce[0] *= factor_lj; + fforce[1] *= factor_lj; + fforce[2] *= factor_lj; + f[i][0] += fforce[0]; + f[i][1] += fforce[1]; + f[i][2] += fforce[2]; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fforce[0]; + f[j][1] -= fforce[1]; + f[j][2] -= fforce[2]; + } + + if (EFLAG) evdwl = factor_lj*one_eng; + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fforce[0],fforce[1],fforce[2], + -r12[0],-r12[1],-r12[2],tid); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairRESquaredOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairRESquared::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h new file mode 100644 index 000000000..2a50bb6dd --- /dev/null +++ b/src/USER-OMP/pair_resquared_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(resquared/omp,PairRESquaredOMP) + +#else + +#ifndef LMP_PAIR_RESQUARED_OMP_H +#define LMP_PAIR_RESQUARED_OMP_H + +#include "pair_resquared.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairRESquaredOMP : public PairRESquared, public ThrOMP { + + public: + PairRESquaredOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, double **torque, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp similarity index 82% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_soft_omp.cpp index 8ed82c5e5..7667efa98 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -1,163 +1,160 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_soft_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; +#define SMALL 1.0e-4 + /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairSoftOMP::PairSoftOMP(LAMMPS *lmp) : + PairSoft(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairSoftOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,rsq,arg,factor_lj; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + arg = PI/cut[itype][jtype]; + if (r > SMALL) fpair = factor_lj * prefactor[itype][jtype] * + sin(arg*r) * arg/r; + else fpair = 0.0; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } + if (EFLAG) + evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r)); if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairSoftOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairSoft::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h new file mode 100644 index 000000000..840d87460 --- /dev/null +++ b/src/USER-OMP/pair_soft_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(soft/omp,PairSoftOMP) + +#else + +#ifndef LMP_PAIR_SOFT_OMP_H +#define LMP_PAIR_SOFT_OMP_H + +#include "pair_soft.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairSoftOMP : public PairSoft, public ThrOMP { + + public: + PairSoftOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp new file mode 100644 index 000000000..5d7f1a60d --- /dev/null +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -0,0 +1,212 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_sw_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairSWOMP::PairSWOMP(LAMMPS *lmp) : + PairSW(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairSWOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + eval<1,1>(f, ifrom, ito, tid); + } else { + eval<1,0>(f, ifrom, ito, tid); + } + } else eval<0,0>(f, ifrom, ito, tid); + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairSWOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fj[3],fk[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + int *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double fxtmp,fytmp,fztmp; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + ijparam = elem2param[itype][jtype][jtype]; + if (rsq > params[ijparam].cutsq) continue; + + twobody(¶ms[ijparam],rsq,fpair,EFLAG,evdwl); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,tid); + } + + jnumm1 = jnum - 1; + + for (jj = 0; jj < jnumm1; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[ijparam].cutsq) continue; + + double fjxtmp,fjytmp,fjztmp; + fjxtmp = fjytmp = fjztmp = 0.0; + + for (kk = jj+1; kk < jnum; kk++) { + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[ikparam].cutsq) continue; + + threebody(¶ms[ijparam],¶ms[ikparam],¶ms[ijkparam], + rsq1,rsq2,delr1,delr2,fj,fk,EFLAG,evdwl); + + fxtmp -= fj[0] + fk[0]; + fytmp -= fj[1] + fk[1]; + fztmp -= fj[2] + fk[2]; + fjxtmp += fj[0]; + fjytmp += fj[1]; + fjztmp += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairSWOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairSW::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h new file mode 100644 index 000000000..40052d7d4 --- /dev/null +++ b/src/USER-OMP/pair_sw_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(sw/omp,PairSWOMP) + +#else + +#ifndef LMP_PAIR_SW_OMP_H +#define LMP_PAIR_SW_OMP_H + +#include "pair_sw.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairSWOMP : public PairSW, public ThrOMP { + + public: + PairSWOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_table_omp.cpp similarity index 61% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_table_omp.cpp index 8ed82c5e5..6b14d4c98 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_table_omp.cpp @@ -1,163 +1,202 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_table_omp.h" #include "atom.h" #include "comm.h" +#include "error.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairTableOMP::PairTableOMP(LAMMPS *lmp) : + PairTable(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairTableOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairTableOMP::eval(double **f, int iifrom, int iito, int tid) { - int i,j,ii,jj,jnum,itype,jtype; + int i,j,ii,jj,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,factor_lj,fraction,value,a,b; int *ilist,*jlist,*numneigh,**firstneigh; + Table *tb; + + union_int_float_t rsq_lookup; + int tlm1 = tablength - 1; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; factor_lj = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; - + if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + tb = &tables[tabindex[itype][jtype]]; + if (rsq < tb->innersq) + error->one(FLERR,"Pair distance < table inner cutoff"); + + if (tabstyle == LOOKUP) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + fpair = factor_lj * tb->f[itable]; + } else if (tabstyle == LINEAR) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + fraction = (rsq - tb->rsq[itable]) * tb->invdelta; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } else if (tabstyle == SPLINE) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + b = (rsq - tb->rsq[itable]) * tb->invdelta; + a = 1.0 - b; + value = a * tb->f[itable] + b * tb->f[itable+1] + + ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * + tb->deltasq6; + fpair = factor_lj * value; + } else { + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; + itable >>= tb->nshiftbits; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + if (tabstyle == LOOKUP) + evdwl = tb->e[itable]; + else if (tabstyle == LINEAR || tabstyle == BITMAP) + evdwl = tb->e[itable] + fraction*tb->de[itable]; + else + evdwl = a * tb->e[itable] + b * tb->e[itable+1] + + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * + tb->deltasq6; evdwl *= factor_lj; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } + f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairTableOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairTable::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h new file mode 100644 index 000000000..6fd1ce74a --- /dev/null +++ b/src/USER-OMP/pair_table_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(table/omp,PairTableOMP) + +#else + +#ifndef LMP_PAIR_TABLE_OMP_H +#define LMP_PAIR_TABLE_OMP_H + +#include "pair_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTableOMP : public PairTable, public ThrOMP { + + public: + PairTableOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp new file mode 100644 index 000000000..f59a8488f --- /dev/null +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -0,0 +1,252 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tersoff_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) : + PairTersoff(lmp), ThrOMP(lmp, PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + ev_setup_thr(this); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int ifrom, ito, tid; + double **f; + + f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid); + else eval<1,1,0>(f, ifrom, ito, tid); + } else { + if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid); + else eval<1,0,0>(f, ifrom, ito, tid); + } + } else eval<0,0,0>(f, ifrom, ito, tid); + + // reduce per thread forces into global force array. + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); + } // end of omp parallel region + + // reduce per thread energy and virial, if requested. + if (evflag) ev_reduce_thr(this); + if (vflag_fdotr) virial_fdotr_compute(); +} + +template +void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid) +{ + int i,j,k,ii,jj,kk,jnum; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + double **x = atom->x; + int *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double fxtmp,fytmp,fztmp; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,tid); + } + + // three-body interactions + // skip immediately if I-J is not within cutoff + double fjxtmp,fjytmp,fjztmp; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + } + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],rsq1,zeta_ij,fpair,prefactor,EFLAG,evdwl); + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],tid); + + // attractive term via loop over k + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + fjxtmp += fj[0]; + fjytmp += fj[1]; + fjztmp += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTersoffOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTersoff::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h new file mode 100644 index 000000000..5e5dc066d --- /dev/null +++ b/src/USER-OMP/pair_tersoff_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/omp,PairTersoffOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_OMP_H +#define LMP_PAIR_TERSOFF_OMP_H + +#include "pair_tersoff.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffOMP : public PairTersoff, public ThrOMP { + + public: + PairTersoffOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.cpp b/src/USER-OMP/pair_tersoff_zbl_omp.cpp new file mode 100644 index 000000000..4265d84fb --- /dev/null +++ b/src/USER-OMP/pair_tersoff_zbl_omp.cpp @@ -0,0 +1,296 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Aidan Thompson (SNL) - original Tersoff implementation + David Farrell (NWU) - ZBL addition +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_tersoff_zbl_omp.h" +#include "atom.h" +#include "update.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function +------------------------------------------------------------------------- */ + +static double F_fermi(const double r, const double expsc, const double cut) +{ + return 1.0 / (1.0 + exp(-expsc*(r-cut))); +} + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function derivative with respect to r +------------------------------------------------------------------------- */ + +static double F_fermi_d(const double r, const double expsc, const double cut) +{ + return expsc*exp(-expsc*(r-cut)) / pow(1.0 + exp(-expsc*(r-cut)),2.0); +} + +/* ---------------------------------------------------------------------- */ + +PairTersoffZBLOMP::PairTersoffZBLOMP(LAMMPS *lmp) : PairTersoffOMP(lmp) +{ + // hard-wired constants in metal or real units + // a0 = Bohr radius + // epsilon0 = permittivity of vacuum = q / energy-distance units + // e = unit charge + // 1 Kcal/mole = 0.043365121 eV + + if (strcmp(update->unit_style,"metal") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635; + global_e = 1.0; + } else if (strcmp(update->unit_style,"real") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635 * 0.043365121; + global_e = 1.0; + } else error->all(FLERR,"Pair tersoff/zbl requires metal or real units"); +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::read_file(char *file) +{ + int params_per_line = 21; + char **words = new char*[params_per_line+1]; + + delete [] params; + params = NULL; + nparams = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = fopen(file,"r"); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open Tersoff potential file %s",file); + error->one(FLERR,str); + } + } + + // read each line out of file, skipping blank lines or leading '#' + // store line of params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if (ptr = strchr(line,'#')) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if (ptr = strchr(line,'#')) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in Tersoff potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].powerm = atof(words[3]); + params[nparams].gamma = atof(words[4]); + params[nparams].lam3 = atof(words[5]); + params[nparams].c = atof(words[6]); + params[nparams].d = atof(words[7]); + params[nparams].h = atof(words[8]); + params[nparams].powern = atof(words[9]); + params[nparams].beta = atof(words[10]); + params[nparams].lam2 = atof(words[11]); + params[nparams].bigb = atof(words[12]); + params[nparams].bigr = atof(words[13]); + params[nparams].bigd = atof(words[14]); + params[nparams].lam1 = atof(words[15]); + params[nparams].biga = atof(words[16]); + params[nparams].Z_i = atof(words[17]); + params[nparams].Z_j = atof(words[18]); + params[nparams].ZBLcut = atof(words[19]); + params[nparams].ZBLexpscale = atof(words[20]); + + // currently only allow m exponent of 1 or 3 + + params[nparams].powermint = int(params[nparams].powerm); + + if ( + params[nparams].lam3 < 0.0 || params[nparams].c < 0.0 || + params[nparams].d < 0.0 || params[nparams].powern < 0.0 || + params[nparams].beta < 0.0 || params[nparams].lam2 < 0.0 || + params[nparams].bigb < 0.0 || params[nparams].bigr < 0.0 || + params[nparams].bigd < 0.0 || + params[nparams].bigd > params[nparams].bigr || + params[nparams].lam3 < 0.0 || params[nparams].biga < 0.0 || + params[nparams].powerm - params[nparams].powermint != 0.0 || + (params[nparams].powermint != 3 && params[nparams].powermint != 1) || + params[nparams].gamma < 0.0 || + params[nparams].Z_i < 1.0 || params[nparams].Z_j < 1.0 || + params[nparams].ZBLcut < 0.0 || params[nparams].ZBLexpscale < 0.0) + error->all(FLERR,"Illegal Tersoff parameter"); + + nparams++; + } + + delete [] words; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::force_zeta(Param *param, double rsq, double zeta_ij, + double &fforce, double &prefactor, + int eflag, double &eng) +{ + double r,fa,fa_d,bij; + + r = sqrt(rsq); + + fa = (r > param->bigr + param->bigd) ? 0.0 : + -param->bigb * exp(-param->lam2 * r) * ters_fc(r,param) * + F_fermi(r,param->ZBLexpscale,param->ZBLcut); + + fa_d = (r > param->bigr + param->bigd) ? 0.0 : + param->bigb * exp(-param->lam2 * r) * + (param->lam2 * ters_fc(r,param) * + F_fermi(r,param->ZBLexpscale,param->ZBLcut) - + ters_fc_d(r,param) * F_fermi(r,param->ZBLexpscale,param->ZBLcut) + - ters_fc(r,param) * F_fermi_d(r,param->ZBLexpscale,param->ZBLcut)); + + bij = ters_bij(zeta_ij,param); + fforce = 0.5*bij*fa_d / r; + prefactor = -0.5*fa * ters_bij_d(zeta_ij,param); + if (eflag) eng = 0.5*bij*fa; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::repulsive(Param *param, double rsq, double &fforce, + int eflag, double &eng) +{ + double r,tmp_fc,tmp_fc_d,tmp_exp; + + // Tersoff repulsive portion + + r = sqrt(rsq); + tmp_fc = ters_fc(r,param); + tmp_fc_d = ters_fc_d(r,param); + tmp_exp = exp(-param->lam1 * r); + double fforce_ters = param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1); + double eng_ters = tmp_fc * param->biga * tmp_exp; + + // ZBL repulsive portion + + double esq = pow(global_e,2.0); + double a_ij = (0.8854*global_a_0) / + (pow(param->Z_i,0.23) + pow(param->Z_j,0.23)); + double premult = (param->Z_i * param->Z_j * esq)/(4.0*MY_PI*global_epsilon_0); + double r_ov_a = r/a_ij; + double phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + + 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); + double dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - + 0.9423*0.5099*exp(-0.9423*r_ov_a) - + 0.4029*0.2802*exp(-0.4029*r_ov_a) - + 0.2016*0.02817*exp(-0.2016*r_ov_a)); + double fforce_ZBL = premult*-pow(r,-2.0)* phi + premult*pow(r,-1.0)*dphi; + double eng_ZBL = premult*(1.0/r)*phi; + + // combine two parts with smoothing by Fermi-like function + + fforce = -(-F_fermi_d(r,param->ZBLexpscale,param->ZBLcut) * eng_ZBL + + (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*fforce_ZBL + + F_fermi_d(r,param->ZBLexpscale,param->ZBLcut)*eng_ters + + F_fermi(r,param->ZBLexpscale,param->ZBLcut)*fforce_ters) / r; + + if (eflag) + eng = (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*eng_ZBL + + F_fermi(r,param->ZBLexpscale,param->ZBLcut)*eng_ters; +} + diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.h b/src/USER-OMP/pair_tersoff_zbl_omp.h new file mode 100644 index 000000000..84d6ef113 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_zbl_omp.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/zbl/omp,PairTersoffZBLOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_ZBL_OMP_H +#define LMP_PAIR_TERSOFF_ZBL_OMP_H + +#include "pair_tersoff_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffZBLOMP : public PairTersoffOMP { + public: + PairTersoffZBLOMP(class LAMMPS *); + virtual ~PairTersoffZBLOMP() {} + + protected: + double global_a_0; // Bohr radius for Coulomb repulsion + double global_epsilon_0; // permittivity of vacuum for Coulomb repulsion + double global_e; // proton charge (negative of electron charge) + + virtual void read_file(char *); + virtual void repulsive(Param *, double, double &, int, double &); + virtual void force_zeta(Param *, double, double, double &, double &, int, double &); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp similarity index 79% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_yukawa_colloid_omp.cpp index 8ed82c5e5..710ad9df1 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -1,163 +1,164 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_yukawa_colloid_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) : + PairYukawaColloid(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairYukawaColloidOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj; + double rsq,r,rinv,r2inv,screening,forceyukawa,factor; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; + double *radius = atom->radius; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; + radi = radius[i]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*(r-(radi+radj))); + forceyukawa = a[itype][jtype] * screening; + + fpair = factor*forceyukawa * rinv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype]; + evdwl *= factor; } - if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairYukawaColloidOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairYukawaColloid::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h new file mode 100644 index 000000000..9483cd15c --- /dev/null +++ b/src/USER-OMP/pair_yukawa_colloid_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/colloid/omp,PairYukawaColloidOMP) + +#else + +#ifndef LMP_PAIR_YUKAWA_COLLOID_OMP_H +#define LMP_PAIR_YUKAWA_COLLOID_OMP_H + +#include "pair_yukawa_colloid.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP { + + public: + PairYukawaColloidOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp similarity index 82% copy from src/USER-OMP/pair_lj_cut_omp.cpp copy to src/USER-OMP/pair_yukawa_omp.cpp index 8ed82c5e5..1380e2239 100644 --- a/src/USER-OMP/pair_lj_cut_omp.cpp +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -1,163 +1,162 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" -#include "pair_lj_cut_omp.h" +#include "pair_yukawa_omp.h" #include "atom.h" #include "comm.h" #include "force.h" #include "neighbor.h" #include "neigh_list.h" -#include "neigh_request.h" -#include "memory.h" -#include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : - PairLJCut(lmp), ThrOMP(lmp, PAIR) +PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) : + PairYukawa(lmp), ThrOMP(lmp, PAIR) { respa_enable = 0; } /* ---------------------------------------------------------------------- */ -void PairLJCutOMP::compute(int eflag, int vflag) +void PairYukawaOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); ev_setup_thr(this); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(shared) #endif { int ifrom, ito, tid; double **f; f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads); if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid); else eval<1,1,0>(f, ifrom, ito, tid); } else { if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid); else eval<1,0,0>(f, ifrom, ito, tid); } } else { if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid); else eval<0,0,0>(f, ifrom, ito, tid); } // reduce per thread forces into global force array. - force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid); + data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid); } // end of omp parallel region // reduce per thread energy and virial, if requested. if (evflag) ev_reduce_thr(this); if (vflag_fdotr) virial_fdotr_compute(); } template -void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid) +void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; + double rsq,r2inv,r,rinv,screening,forceyukawa,factor; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; double **x = atom->x; int *type = atom->type; int nlocal = atom->nlocal; double *special_lj = force->special_lj; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; + factor = special_lj[sbmask(j)]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - fpair = factor_lj*forcelj*r2inv; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forceyukawa = a[itype][jtype] * screening * (kappa + rinv); + + fpair = factor*forceyukawa * r2inv; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; + evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype]; + evdwl *= factor; } if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, evdwl,0.0,fpair,delx,dely,delz,tid); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; } } /* ---------------------------------------------------------------------- */ -double PairLJCutOMP::memory_usage() +double PairYukawaOMP::memory_usage() { double bytes = memory_usage_thr(); - bytes += PairLJCut::memory_usage(); + bytes += PairYukawa::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h new file mode 100644 index 000000000..e363ac6d1 --- /dev/null +++ b/src/USER-OMP/pair_yukawa_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/omp,PairYukawaOMP) + +#else + +#ifndef LMP_PAIR_YUKAWA_OMP_H +#define LMP_PAIR_YUKAWA_OMP_H + +#include "pair_yukawa.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairYukawaOMP : public PairYukawa, public ThrOMP { + + public: + PairYukawaOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(double **f, int ifrom, int ito, int tid); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index d05fae5b3..37ce1f198 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -1,392 +1,833 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- OpenMP based threading support for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "thr_omp.h" #include "memory.h" #include "atom.h" #include "comm.h" #include "force.h" #include "pair.h" #include "dihedral.h" #if defined(_OPENMP) #include #endif +#include "math_const.h" + using namespace LAMMPS_NS; +using namespace MathConst; /* ---------------------------------------------------------------------- */ ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr) { // initialize fixed size per thread storage eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL; virial_thr = NULL; + lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr"); lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr"); lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr"); lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr"); // variable size per thread, per atom storage // the actually allocation happens via memory->grow() in ev_steup_thr() maxeatom_thr = maxvatom_thr = 0; + evflag_global = evflag_atom = 0; eatom_thr = NULL; vatom_thr = NULL; } /* ---------------------------------------------------------------------- */ ThrOMP::~ThrOMP() { lmp->memory->destroy(eng_vdwl_thr); lmp->memory->destroy(eng_coul_thr); lmp->memory->destroy(eng_bond_thr); lmp->memory->destroy(virial_thr); lmp->memory->destroy(eatom_thr); lmp->memory->destroy(vatom_thr); } /* ---------------------------------------------------------------------- */ -void ThrOMP::ev_zero_acc_thr(int ntotal, int eflag_global, int vflag_global, +void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global, int eflag_atom, int vflag_atom, int nthreads) { int t,i; + + evflag_global = (eflag_global || vflag_global); + evflag_atom = (eflag_atom || vflag_atom); for (t = 0; t < nthreads; ++t) { if (eflag_global) eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0; if (vflag_global) for (i = 0; i < 6; ++i) virial_thr[t][i] = 0.0; if (eflag_atom) for (i = 0; i < ntotal; ++i) eatom_thr[t][i] = 0.0; if (vflag_atom) for (i = 0; i < ntotal; ++i) { vatom_thr[t][i][0] = 0.0; vatom_thr[t][i][1] = 0.0; vatom_thr[t][i][2] = 0.0; vatom_thr[t][i][3] = 0.0; vatom_thr[t][i][4] = 0.0; vatom_thr[t][i][5] = 0.0; } } } /* ---------------------------------------------------------------------- */ void ThrOMP::ev_setup_thr(Dihedral *dihed) { int nthreads = lmp->comm->nthreads; // reallocate per-atom arrays if necessary if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) { maxeatom_thr = lmp->atom->nmax; lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); } if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) { maxvatom_thr = lmp->atom->nmax; lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); } int ntotal = (lmp->force->newton_bond) ? (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - // zero per thread accumulators - ev_zero_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global, - dihed->eflag_atom, dihed->vflag_atom, nthreads); + // set up per thread accumulators + ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global, + dihed->eflag_atom, dihed->vflag_atom, nthreads); } /* ---------------------------------------------------------------------- */ void ThrOMP::ev_setup_thr(Pair *pair) { int nthreads = lmp->comm->nthreads; // reallocate per-atom arrays if necessary if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) { maxeatom_thr = lmp->atom->nmax; lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr"); } if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) { maxvatom_thr = lmp->atom->nmax; lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr"); } int ntotal = (lmp->force->newton) ? (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; - // zero per thread accumulators - ev_zero_acc_thr(ntotal, pair->eflag_global, pair->vflag_global, - pair->eflag_atom, pair->vflag_atom, nthreads); + // set up per thread accumulators + ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global, + pair->eflag_atom, pair->vflag_atom, nthreads); } /* ---------------------------------------------------------------------- reduce the per thread accumulated E/V data into the canonical accumulators. ------------------------------------------------------------------------- */ void ThrOMP::ev_reduce_thr(Dihedral *dihed) { int nthreads = lmp->comm->nthreads; int ntotal = (lmp->force->newton_bond) ? (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; for (int n = 0; n < nthreads; ++n) { dihed->energy += eng_bond_thr[n]; if (dihed->vflag_either) { dihed->virial[0] += virial_thr[n][0]; dihed->virial[1] += virial_thr[n][1]; dihed->virial[2] += virial_thr[n][2]; dihed->virial[3] += virial_thr[n][3]; dihed->virial[4] += virial_thr[n][4]; dihed->virial[5] += virial_thr[n][5]; if (dihed->vflag_atom) { for (int i = 0; i < ntotal; ++i) { dihed->vatom[i][0] += vatom_thr[n][i][0]; dihed->vatom[i][1] += vatom_thr[n][i][1]; dihed->vatom[i][2] += vatom_thr[n][i][2]; dihed->vatom[i][3] += vatom_thr[n][i][3]; dihed->vatom[i][4] += vatom_thr[n][i][4]; dihed->vatom[i][5] += vatom_thr[n][i][5]; } } } if (dihed->eflag_atom) { for (int i = 0; i < ntotal; ++i) { dihed->eatom[i] += eatom_thr[n][i]; } } } } +/* ---------------------------------------------------------------------- + reduce the per thread accumulated E/V data into the canonical accumulators. +------------------------------------------------------------------------- */ +void ThrOMP::ev_reduce_thr(Pair *pair) +{ + const int nthreads = lmp->comm->nthreads; + const int ntotal = (lmp->force->newton) ? + (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + + for (int n = 0; n < nthreads; ++n) { + pair->eng_vdwl += eng_vdwl_thr[n]; + pair->eng_coul += eng_coul_thr[n]; + if (pair->vflag_either) { + pair->virial[0] += virial_thr[n][0]; + pair->virial[1] += virial_thr[n][1]; + pair->virial[2] += virial_thr[n][2]; + pair->virial[3] += virial_thr[n][3]; + pair->virial[4] += virial_thr[n][4]; + pair->virial[5] += virial_thr[n][5]; + if (pair->vflag_atom) { + for (int i = 0; i < ntotal; ++i) { + pair->vatom[i][0] += vatom_thr[n][i][0]; + pair->vatom[i][1] += vatom_thr[n][i][1]; + pair->vatom[i][2] += vatom_thr[n][i][2]; + pair->vatom[i][3] += vatom_thr[n][i][3]; + pair->vatom[i][4] += vatom_thr[n][i][4]; + pair->vatom[i][5] += vatom_thr[n][i][5]; + } + } + } + if (pair->eflag_atom) { + for (int i = 0; i < ntotal; ++i) { + pair->eatom[i] += eatom_thr[n][i]; + } + } + } +} + /* ---------------------------------------------------------------------- tally eng_vdwl and virial into per thread global and per-atom accumulators need i < nlocal test since called by bond_quartic and dihedral_charmm ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal, int newton_pair, double evdwl, double ecoul, double fpair, double delx, double dely, double delz, int tid) { double evdwlhalf,ecoulhalf,epairhalf,v[6]; if (pair->eflag_either) { if (pair->eflag_global) { if (newton_pair) { eng_vdwl_thr[tid] += evdwl; eng_coul_thr[tid] += ecoul; } else { evdwlhalf = 0.5*evdwl; ecoulhalf = 0.5*ecoul; if (i < nlocal) { eng_vdwl_thr[tid] += evdwlhalf; eng_coul_thr[tid] += ecoulhalf; } if (j < nlocal) { eng_vdwl_thr[tid] += evdwlhalf; eng_coul_thr[tid] += ecoulhalf; } } } if (pair->eflag_atom) { epairhalf = 0.5 * (evdwl + ecoul); if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; } } if (pair->vflag_either) { v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; if (pair->vflag_global) { if (newton_pair) { virial_thr[tid][0] += v[0]; virial_thr[tid][1] += v[1]; virial_thr[tid][2] += v[2]; virial_thr[tid][3] += v[3]; virial_thr[tid][4] += v[4]; virial_thr[tid][5] += v[5]; } else { if (i < nlocal) { virial_thr[tid][0] += 0.5*v[0]; virial_thr[tid][1] += 0.5*v[1]; virial_thr[tid][2] += 0.5*v[2]; virial_thr[tid][3] += 0.5*v[3]; virial_thr[tid][4] += 0.5*v[4]; virial_thr[tid][5] += 0.5*v[5]; } if (j < nlocal) { virial_thr[tid][0] += 0.5*v[0]; virial_thr[tid][1] += 0.5*v[1]; virial_thr[tid][2] += 0.5*v[2]; virial_thr[tid][3] += 0.5*v[3]; virial_thr[tid][4] += 0.5*v[4]; virial_thr[tid][5] += 0.5*v[5]; } } } if (pair->vflag_atom) { if (newton_pair || i < nlocal) { vatom_thr[tid][i][0] += 0.5*v[0]; vatom_thr[tid][i][1] += 0.5*v[1]; vatom_thr[tid][i][2] += 0.5*v[2]; vatom_thr[tid][i][3] += 0.5*v[3]; vatom_thr[tid][i][4] += 0.5*v[4]; vatom_thr[tid][i][5] += 0.5*v[5]; } if (newton_pair || j < nlocal) { vatom_thr[tid][j][0] += 0.5*v[0]; vatom_thr[tid][j][1] += 0.5*v[1]; vatom_thr[tid][j][2] += 0.5*v[2]; vatom_thr[tid][j][3] += 0.5*v[3]; vatom_thr[tid][j][4] += 0.5*v[4]; vatom_thr[tid][j][5] += 0.5*v[5]; } } } } /* ---------------------------------------------------------------------- - reduce the per thread accumulated E/V data into the canonical accumulators. + tally eng_vdwl and virial into global and per-atom accumulators + for virial, have delx,dely,delz and fx,fy,fz ------------------------------------------------------------------------- */ -void ThrOMP::ev_reduce_thr(Pair *pair) + +void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal, + int newton_pair, double evdwl, double ecoul, + double fx, double fy, double fz, + double delx, double dely, double delz, int tid) { - const int nthreads = lmp->comm->nthreads; - const int ntotal = (lmp->force->newton) ? - (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal; + double evdwlhalf,ecoulhalf,epairhalf,v[6]; - for (int n = 0; n < nthreads; ++n) { - pair->eng_vdwl += eng_vdwl_thr[n]; - pair->eng_coul += eng_coul_thr[n]; - if (pair->vflag_either) { - pair->virial[0] += virial_thr[n][0]; - pair->virial[1] += virial_thr[n][1]; - pair->virial[2] += virial_thr[n][2]; - pair->virial[3] += virial_thr[n][3]; - pair->virial[4] += virial_thr[n][4]; - pair->virial[5] += virial_thr[n][5]; - if (pair->vflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->vatom[i][0] += vatom_thr[n][i][0]; - pair->vatom[i][1] += vatom_thr[n][i][1]; - pair->vatom[i][2] += vatom_thr[n][i][2]; - pair->vatom[i][3] += vatom_thr[n][i][3]; - pair->vatom[i][4] += vatom_thr[n][i][4]; - pair->vatom[i][5] += vatom_thr[n][i][5]; - } + if (pair->eflag_either) { + if (pair->eflag_global) { + if (newton_pair) { + eng_vdwl_thr[tid] += evdwl; + eng_coul_thr[tid] += ecoul; + } else { + evdwlhalf = 0.5*evdwl; + ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + eng_vdwl_thr[tid] += evdwlhalf; + eng_coul_thr[tid] += ecoulhalf; + } + if (j < nlocal) { + eng_vdwl_thr[tid] += evdwlhalf; + eng_coul_thr[tid] += ecoulhalf; + } } } if (pair->eflag_atom) { - for (int i = 0; i < ntotal; ++i) { - pair->eatom[i] += eatom_thr[n][i]; + epairhalf = 0.5 * (evdwl + ecoul); + if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf; + if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf; + } + } + + if (pair->vflag_either) { + v[0] = delx*fx; + v[1] = dely*fy; + v[2] = delz*fz; + v[3] = delx*fy; + v[4] = delx*fz; + v[5] = dely*fz; + + if (pair->vflag_global) { + if (newton_pair) { + virial_thr[tid][0] += v[0]; + virial_thr[tid][1] += v[1]; + virial_thr[tid][2] += v[2]; + virial_thr[tid][3] += v[3]; + virial_thr[tid][4] += v[4]; + virial_thr[tid][5] += v[5]; + } else { + if (i < nlocal) { + virial_thr[tid][0] += 0.5*v[0]; + virial_thr[tid][1] += 0.5*v[1]; + virial_thr[tid][2] += 0.5*v[2]; + virial_thr[tid][3] += 0.5*v[3]; + virial_thr[tid][4] += 0.5*v[4]; + virial_thr[tid][5] += 0.5*v[5]; + } + if (j < nlocal) { + virial_thr[tid][0] += 0.5*v[0]; + virial_thr[tid][1] += 0.5*v[1]; + virial_thr[tid][2] += 0.5*v[2]; + virial_thr[tid][3] += 0.5*v[3]; + virial_thr[tid][4] += 0.5*v[4]; + virial_thr[tid][5] += 0.5*v[5]; + } + } + } + + if (pair->vflag_atom) { + if (newton_pair || i < nlocal) { + vatom_thr[tid][i][0] += 0.5*v[0]; + vatom_thr[tid][i][1] += 0.5*v[1]; + vatom_thr[tid][i][2] += 0.5*v[2]; + vatom_thr[tid][i][3] += 0.5*v[3]; + vatom_thr[tid][i][4] += 0.5*v[4]; + vatom_thr[tid][i][5] += 0.5*v[5]; + } + if (newton_pair || j < nlocal) { + vatom_thr[tid][j][0] += 0.5*v[0]; + vatom_thr[tid][j][1] += 0.5*v[1]; + vatom_thr[tid][j][2] += 0.5*v[2]; + vatom_thr[tid][j][3] += 0.5*v[3]; + vatom_thr[tid][j][4] += 0.5*v[4]; + vatom_thr[tid][j][5] += 0.5*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by SW and hbond potentials, newton_pair is always on + virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul, + double *fj, double *fk, double *drji, double *drki, int tid) +{ + double epairthird,v[6]; + + if (pair->eflag_either) { + if (pair->eflag_global) { + eng_vdwl_thr[tid] += evdwl; + eng_coul_thr[tid] += ecoul; + } + if (pair->eflag_atom) { + epairthird = THIRD * (evdwl + ecoul); + eatom_thr[tid][i] += epairthird; + eatom_thr[tid][j] += epairthird; + eatom_thr[tid][k] += epairthird; + } + } + + if (pair->vflag_either) { + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; + v[1] = drji[1]*fj[1] + drki[1]*fk[1]; + v[2] = drji[2]*fj[2] + drki[2]*fk[2]; + v[3] = drji[0]*fj[1] + drki[0]*fk[1]; + v[4] = drji[0]*fj[2] + drki[0]*fk[2]; + v[5] = drji[1]*fj[2] + drki[1]*fk[2]; + + if (pair->vflag_global) { + virial_thr[tid][0] += v[0]; + virial_thr[tid][1] += v[1]; + virial_thr[tid][2] += v[2]; + virial_thr[tid][3] += v[3]; + virial_thr[tid][4] += v[4]; + virial_thr[tid][5] += v[5]; + } + + if (pair->vflag_atom) { + for (int n=0; n < 6; ++n) { + vatom_thr[tid][i][n] += THIRD*v[n]; + vatom_thr[tid][j][n] += THIRD*v[n]; + vatom_thr[tid][k][n] += THIRD*v[n]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by AIREBO potential, newton_pair is always on + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl, + double *fi, double *fj, double *fk, + double *drim, double *drjm, double *drkm,int tid) +{ + double epairfourth,v[6]; + + if (pair->eflag_either) { + if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl; + if (pair->eflag_atom) { + epairfourth = 0.25 * evdwl; + eatom_thr[tid][i] += epairfourth; + eatom_thr[tid][j] += epairfourth; + eatom_thr[tid][k] += epairfourth; + eatom_thr[tid][m] += epairfourth; + } + } + + if (pair->vflag_atom) { + v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); + v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); + v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); + v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); + v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); + v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); + + vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; + vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; + vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; + vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; + vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; + vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; + vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; + vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; + } +} + +/* ---------------------------------------------------------------------- + tally ecoul and virial into each of n atoms in list + called by TIP4P potential, newton_pair is always on + changes v values by dividing by n + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid) +{ + int i,j; + + if (pair->eflag_either) { + if (pair->eflag_global) eng_coul_thr[tid] += ecoul; + if (pair->eflag_atom) { + double epairatom = ecoul/n; + for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom; + } + } + + if (pair->vflag_either) { + if (pair->vflag_global) { + virial_thr[tid][0] += v[0]; + virial_thr[tid][1] += v[1]; + virial_thr[tid][2] += v[2]; + virial_thr[tid][3] += v[3]; + virial_thr[tid][4] += v[4]; + virial_thr[tid][5] += v[5]; + } + + if (pair->vflag_atom) { + v[0] /= n; + v[1] /= n; + v[2] /= n; + v[3] /= n; + v[4] /= n; + v[5] /= n; + for (i = 0; i < n; i++) { + j = list[i]; + vatom_thr[tid][j][0] += v[0]; + vatom_thr[tid][j][1] += v[1]; + vatom_thr[tid][j][2] += v[2]; + vatom_thr[tid][j][3] += v[3]; + vatom_thr[tid][j][4] += v[4]; + vatom_thr[tid][j][5] += v[5]; } } } } +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4, + int nlocal, int newton_bond, + double edihedral, double *f1, double *f3, double *f4, + double vb1x, double vb1y, double vb1z, + double vb2x, double vb2y, double vb2z, + double vb3x, double vb3y, double vb3z, int tid) +{ + double edihedralquarter,v[6]; + int cnt; + + if (dihed->eflag_either) { + if (dihed->eflag_global) { + if (newton_bond) { + eng_bond_thr[tid] += edihedral; + } else { + edihedralquarter = 0.25*edihedral; + cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + eng_bond_thr[tid] += static_cast(cnt) * edihedralquarter; + } + } + if (dihed->eflag_atom) { + edihedralquarter = 0.25*edihedral; + if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter; + } + } + + if (dihed->vflag_either) { + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (dihed->vflag_global) { + if (newton_bond) { + virial_thr[tid][0] += v[0]; + virial_thr[tid][1] += v[1]; + virial_thr[tid][2] += v[2]; + virial_thr[tid][3] += v[3]; + virial_thr[tid][4] += v[4]; + virial_thr[tid][5] += v[5]; + } else { + if (i1 < nlocal) { + virial_thr[tid][0] += 0.25*v[0]; + virial_thr[tid][1] += 0.25*v[1]; + virial_thr[tid][2] += 0.25*v[2]; + virial_thr[tid][3] += 0.25*v[3]; + virial_thr[tid][4] += 0.25*v[4]; + virial_thr[tid][5] += 0.25*v[5]; + } + if (i2 < nlocal) { + virial_thr[tid][0] += 0.25*v[0]; + virial_thr[tid][1] += 0.25*v[1]; + virial_thr[tid][2] += 0.25*v[2]; + virial_thr[tid][3] += 0.25*v[3]; + virial_thr[tid][4] += 0.25*v[4]; + virial_thr[tid][5] += 0.25*v[5]; + } + if (i3 < nlocal) { + virial_thr[tid][0] += 0.25*v[0]; + virial_thr[tid][1] += 0.25*v[1]; + virial_thr[tid][2] += 0.25*v[2]; + virial_thr[tid][3] += 0.25*v[3]; + virial_thr[tid][4] += 0.25*v[4]; + virial_thr[tid][5] += 0.25*v[5]; + } + if (i4 < nlocal) { + virial_thr[tid][0] += 0.25*v[0]; + virial_thr[tid][1] += 0.25*v[1]; + virial_thr[tid][2] += 0.25*v[2]; + virial_thr[tid][3] += 0.25*v[3]; + virial_thr[tid][4] += 0.25*v[4]; + virial_thr[tid][5] += 0.25*v[5]; + } + } + } + + if (dihed->vflag_atom) { + if (newton_bond || i1 < nlocal) { + vatom_thr[tid][i1][0] += 0.25*v[0]; + vatom_thr[tid][i1][1] += 0.25*v[1]; + vatom_thr[tid][i1][2] += 0.25*v[2]; + vatom_thr[tid][i1][3] += 0.25*v[3]; + vatom_thr[tid][i1][4] += 0.25*v[4]; + vatom_thr[tid][i1][5] += 0.25*v[5]; + } + if (newton_bond || i2 < nlocal) { + vatom_thr[tid][i2][0] += 0.25*v[0]; + vatom_thr[tid][i2][1] += 0.25*v[1]; + vatom_thr[tid][i2][2] += 0.25*v[2]; + vatom_thr[tid][i2][3] += 0.25*v[3]; + vatom_thr[tid][i2][4] += 0.25*v[4]; + vatom_thr[tid][i2][5] += 0.25*v[5]; + } + if (newton_bond || i3 < nlocal) { + vatom_thr[tid][i3][0] += 0.25*v[0]; + vatom_thr[tid][i3][1] += 0.25*v[1]; + vatom_thr[tid][i3][2] += 0.25*v[2]; + vatom_thr[tid][i3][3] += 0.25*v[3]; + vatom_thr[tid][i3][4] += 0.25*v[4]; + vatom_thr[tid][i3][5] += 0.25*v[5]; + } + if (newton_bond || i4 < nlocal) { + vatom_thr[tid][i4][0] += 0.25*v[0]; + vatom_thr[tid][i4][1] += 0.25*v[1]; + vatom_thr[tid][i4][2] += 0.25*v[2]; + vatom_thr[tid][i4][3] += 0.25*v[3]; + vatom_thr[tid][i4][4] += 0.25*v[4]; + vatom_thr[tid][i4][5] += 0.25*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO potential, newton_pair is always on + fpair is magnitude of force on atom I +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid) +{ + double v[6]; + + v[0] = 0.5 * drij[0]*drij[0]*fpair; + v[1] = 0.5 * drij[1]*drij[1]*fpair; + v[2] = 0.5 * drij[2]*drij[2]*fpair; + v[3] = 0.5 * drij[0]*drij[1]*fpair; + v[4] = 0.5 * drij[0]*drij[2]*fpair; + v[5] = 0.5 * drij[1]*drij[2]*fpair; + + vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; + vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; + vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; + vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO and Tersoff potential, newton_pair is always on +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj, + double *drik, double *drjk, int tid) +{ + double v[6]; + + v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]); + v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]); + v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]); + v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]); + v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); + v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); + + vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; + vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; + vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; + vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; + vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; + vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO potential, newton_pair is always on +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally4_thr(int i, int j, int k, int m, + double *fi, double *fj, double *fk, + double *drim, double *drjm, double *drkm, int tid) +{ + double v[6]; + + v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); + v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); + v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); + v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); + v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); + v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); + + vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2]; + vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5]; + vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2]; + vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5]; + vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2]; + vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5]; + vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2]; + vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5]; +} + /* ---------------------------------------------------------------------- */ // set loop range thread id, and force array offset for threaded runs. double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid, int inum, int nall, int nthreads) { #if defined(_OPENMP) - if (nthreads > 1) { - tid = omp_get_thread_num(); + tid = omp_get_thread_num(); - // each thread works on a fixed chunk of atoms. - const int idelta = 1 + inum/nthreads; - ifrom = tid*idelta; - ito = ifrom + idelta; - if (ito > inum) - ito = inum; - - return f + nall*tid; - - } else { -#endif - tid = 0; - ifrom = 0; + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + inum/nthreads; + ifrom = tid*idelta; + ito = ifrom + idelta; + if (ito > inum) ito = inum; - return f; -#if defined(_OPENMP) - } + + return f + nall*tid; +#else + tid = 0; + ifrom = 0; + ito = inum; + return f; #endif } /* ---------------------------------------------------------------------- */ -// reduce per thread forces into the first part of the force +// reduce per thread data into the first part of the data // array that is used for the non-threaded parts and reset -// the temporary storage to 0.0. this routine depends on the -// forces arrays stored in this order x1,y1,z1,x2,y2,z2,... +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... // we need to post a barrier to wait until all threads are done -// with computing forces. -void ThrOMP::force_reduce_thr(double *fall, int nall, - int nthreads, int tid) +// with writing to the array . +void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads, + int ndim, int tid) { #if defined(_OPENMP) // NOOP in non-threaded execution. if (nthreads == 1) return; #pragma omp barrier { - double *f; - const int idelta = 1 + nall/nthreads; - const int ifrom = 3*tid*idelta; - const int ito = 3*(((ifrom + idelta) > nall) ? nall : (ifrom + idelta)); - - for (int n = 1; n < nthreads; ++n) { - const int toffs = 3*n*nall; - f = fall + toffs; - for (int m = ifrom; m < ito; ++m) { - fall[m] += f[m]; - f[m] = 0.0; + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; } } } #else // NOOP in non-threaded execution. return; #endif } /* ---------------------------------------------------------------------- */ double ThrOMP::memory_usage_thr() { const int nthreads=lmp->comm->nthreads; double bytes = nthreads * (3 + 7) * sizeof(double); bytes += nthreads * maxeatom_thr * sizeof(double); bytes += nthreads * maxvatom_thr * 6 * sizeof(double); return bytes; } diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h index 24963e91d..9966c9de0 100644 --- a/src/USER-OMP/thr_omp.h +++ b/src/USER-OMP/thr_omp.h @@ -1,79 +1,114 @@ /* -*- c++ -*- ------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #ifndef LMP_THR_OMP_H #define LMP_THR_OMP_H #include "pointers.h" namespace LAMMPS_NS { // forward declarations class Pair; class Dihedral; class ThrOMP { + public: + struct global { + double eng_vdwl; + double eng_coul; + double eng_bond; + double virial[6]; + }; protected: const int thr_style; enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE}; LAMMPS *lmp; // reference to base lammps object. double *eng_vdwl_thr; // per thread accumulated vdw energy double *eng_coul_thr; // per thread accumulated coulomb energies double *eng_bond_thr; // per thread accumlated bonded energy double **virial_thr; // per thread virial double **eatom_thr; // per thread per atom energy double ***vatom_thr; // per thread per atom virial int maxeatom_thr, maxvatom_thr; + int evflag_global, evflag_atom; public: ThrOMP(LAMMPS *, int); virtual ~ThrOMP(); double memory_usage_thr(); + inline void sync_threads() { +#if defined(_OPENMP) +#pragma omp barrier +#endif + { ; } + }; + protected: // extra ev_tally work for threaded styles void ev_setup_thr(Pair *); void ev_setup_thr(Dihedral *); void ev_reduce_thr(Pair *); void ev_reduce_thr(Dihedral *); private: // internal method to be used by multiple ev_setup_thr() methods - void ev_zero_acc_thr(int, int, int, int, int, int); + void ev_setup_acc_thr(int, int, int, int, int, int); protected: // threading adapted versions of the ev_tally infrastructure + // style specific versions (need access to style class flags) void ev_tally_thr(Pair *, int, int, int, int, double, double, double, double, double, double, int); + void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double, + double, double, double, double, double, double, int); + void ev_tally3_thr(Pair *, int, int, int, double, double, + double *, double *, double *, double *, int); + void ev_tally4_thr(Pair *, int, int, int, int, double, + double *, double *, double *, + double *, double *, double *, int); + void ev_tally_list_thr(Pair *, int, int *, double , double *, int); + + void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double, + double *, double *, double *, double, double, double, + double, double, double, double, double, double, int); + + // style independent versions + void v_tally2_thr(int, int, double, double *, int); + void v_tally3_thr(int, int, int, double *, double *, double *, double *, int); + void v_tally4_thr(int, int, int, int, double *, double *, double *, + double *, double *, double *, int); protected: // set loop range, thread id, and force array offset for threaded runs. double **loop_setup_thr(double **, int &, int &, int &, int, int, int); - // reduce per thread forces into the first part of the force array - void force_reduce_thr(double *, int, int, int); + // reduce per thread data into the first part of the array + void data_reduce_thr(double *, int, int, int, int); + }; } #endif