diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp index eedb9bf55..da7d337b9 100644 --- a/src/MOLECULE/fix_cmap.cpp +++ b/src/MOLECULE/fix_cmap.cpp @@ -1,1451 +1,1451 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Implementation of the CHARMM CMAP; adds an extra energy term for the - peptide backbone dihedrals. The tools/ch2lmp/charmm2lammps.pl - conversion script, which generates an extra section in the LAMMPS data - file, is needed in order to generate the info used by this fix style. - Contributing authors: Xiaohu Hu, CMB/ORNL (hux2@ornl.gov) David Hyde-Volpe, Tigran Abramyan, and Robert A. Latour (Clemson University) Chris Lorenz (Kings College-London) + Implementation of the CHARMM CMAP; adds an extra energy term for the + peptide backbone dihedrals. The tools/ch2lmp/charmm2lammps.pl + conversion script, which generates an extra section in the LAMMPS data + file, is needed in order to generate the info used by this fix style. + References: - MacKerell et al., J. Am. Chem. Soc. 126(2004):698-699. - MacKerell et al., J. Comput. Chem. 25(2004):1400-1415. - -------------------------------------------------------------------------*/ +------------------------------------------------------------------------- */ #include #include #include #include #include #include "fix_cmap.h" #include "atom.h" #include "atom_vec.h" #include "update.h" #include "respa.h" #include "modify.h" #include "domain.h" #include "force.h" #include "group.h" #include "comm.h" #include "math_const.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; #define MAXLINE 256 #define LISTDELTA 10000 #define LB_FACTOR 1.5 #define CMAPMAX 6 // max # of CMAP terms stored by one atom #define CMAPDIM 24 // grid map dimension is 24 x 24 #define CMAPXMIN -360.0 #define CMAPXMIN2 -180.0 #define CMAPDX 15.0 // 360/CMAPDIM /* ---------------------------------------------------------------------- */ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), crosstermlist(NULL), num_crossterm(NULL), crossterm_type(NULL), crossterm_atom1(NULL), crossterm_atom2(NULL), crossterm_atom3(NULL), crossterm_atom4(NULL), crossterm_atom5(NULL), g_axis(NULL), cmapgrid(NULL), d1cmapgrid(NULL), d2cmapgrid(NULL), d12cmapgrid(NULL) { if (narg != 4) error->all(FLERR,"Illegal fix cmap command"); restart_global = 1; restart_peratom = 1; peatom_flag = 1; virial_flag = 1; peratom_freq = 1; scalar_flag = 1; global_freq = 1; extscalar = 1; extvector = 1; wd_header = 1; wd_section = 1; MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); // allocate memory for CMAP data memory->create(g_axis,CMAPDIM,"cmap:g_axis"); memory->create(cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:grid"); memory->create(d1cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d1grid"); memory->create(d2cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d2grid"); memory->create(d12cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d12grid"); // read and setup CMAP data read_grid_map(arg[3]); // perform initial allocation of atom-based arrays // register with Atom class num_crossterm = NULL; crossterm_type = NULL; crossterm_atom1 = NULL; crossterm_atom2 = NULL; crossterm_atom3 = NULL; crossterm_atom4 = NULL; crossterm_atom5 = NULL; nmax_previous = 0; grow_arrays(atom->nmax); atom->add_callback(0); atom->add_callback(1); // local list of crossterms ncmap = 0; maxcrossterm = 0; crosstermlist = NULL; } /* --------------------------------------------------------------------- */ FixCMAP::~FixCMAP() { // unregister callbacks to this fix from Atom class atom->delete_callback(id,0); atom->delete_callback(id,1); memory->destroy(g_axis); memory->destroy(cmapgrid); memory->destroy(d1cmapgrid); memory->destroy(d2cmapgrid); memory->destroy(d12cmapgrid); memory->destroy(crosstermlist); memory->destroy(num_crossterm); memory->destroy(crossterm_type); memory->destroy(crossterm_atom1); memory->destroy(crossterm_atom2); memory->destroy(crossterm_atom3); memory->destroy(crossterm_atom4); memory->destroy(crossterm_atom5); } /* ---------------------------------------------------------------------- */ int FixCMAP::setmask() { int mask = 0; mask |= PRE_NEIGHBOR; mask |= PRE_REVERSE; mask |= POST_FORCE; mask |= THERMO_ENERGY; mask |= POST_FORCE_RESPA; mask |= MIN_POST_FORCE; return mask; } /* ---------------------------------------------------------------------- */ void FixCMAP::init() { int i; double angle; i = 0; angle = -180.0; while (angle < 180.0) { g_axis[i] = angle; angle += CMAPDX; i++; } // pre-compute the derivatives of the maps for (i = 0; i < 6; i++) set_map_derivatives(cmapgrid[i],d1cmapgrid[i],d2cmapgrid[i],d12cmapgrid[i]); // define newton_bond here in case restart file was read (not data file) newton_bond = force->newton_bond; } /* --------------------------------------------------------------------- */ void FixCMAP::setup(int vflag) { pre_neighbor(); if (strstr(update->integrate_style,"verlet")) post_force(vflag); else { ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); post_force_respa(vflag,nlevels_respa-1,0); ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); } } /* --------------------------------------------------------------------- */ void FixCMAP::setup_pre_neighbor() { pre_neighbor(); } /* --------------------------------------------------------------------- */ void FixCMAP::min_setup(int vflag) { pre_neighbor(); post_force(vflag); } /* ---------------------------------------------------------------------- store local neighbor list as if newton_bond = OFF, even if actually ON ------------------------------------------------------------------------- */ void FixCMAP::pre_neighbor() { int i,m,itype,atom1,atom2,atom3,atom4,atom5; // guesstimate initial length of local crossterm list // if ncmap was not set (due to read_restart, no read_data), // then list will grow by LISTDELTA chunks if (maxcrossterm == 0) { if (nprocs == 1) maxcrossterm = ncmap; else maxcrossterm = static_cast (LB_FACTOR*ncmap/nprocs); memory->create(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); } int nlocal = atom->nlocal; ncrosstermlist = 0; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_crossterm[i]; m++) { atom1 = atom->map(crossterm_atom1[i][m]); atom2 = atom->map(crossterm_atom2[i][m]); atom3 = atom->map(crossterm_atom3[i][m]); atom4 = atom->map(crossterm_atom4[i][m]); atom5 = atom->map(crossterm_atom5[i][m]); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1 || atom5 == -1) { char str[128]; sprintf(str,"CMAP atoms " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " missing on proc %d at step " BIGINT_FORMAT, crossterm_atom1[i][m],crossterm_atom2[i][m], crossterm_atom3[i][m],crossterm_atom4[i][m], crossterm_atom5[i][m],me,update->ntimestep); error->one(FLERR,str); } atom1 = domain->closest_image(i,atom1); atom2 = domain->closest_image(i,atom2); atom3 = domain->closest_image(i,atom3); atom4 = domain->closest_image(i,atom4); atom5 = domain->closest_image(i,atom5); if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4 && i <= atom5) { if (ncrosstermlist == maxcrossterm) { maxcrossterm += LISTDELTA; memory->grow(crosstermlist,maxcrossterm,6,"cmap:crosstermlist"); } crosstermlist[ncrosstermlist][0] = atom1; crosstermlist[ncrosstermlist][1] = atom2; crosstermlist[ncrosstermlist][2] = atom3; crosstermlist[ncrosstermlist][3] = atom4; crosstermlist[ncrosstermlist][4] = atom5; crosstermlist[ncrosstermlist][5] = crossterm_type[i][m]; ncrosstermlist++; } } } } /* ---------------------------------------------------------------------- store eflag, so can use it in post_force to tally per-atom energies ------------------------------------------------------------------------- */ void FixCMAP::pre_reverse(int eflag, int vflag) { eflag_caller = eflag; } /* ---------------------------------------------------------------------- compute CMAP terms as if newton_bond = OFF, even if actually ON ------------------------------------------------------------------------- */ void FixCMAP::post_force(int vflag) { int n,i1,i2,i3,i4,i5,type,nlist; int li1, li2, mli1,mli2,mli11,mli21,t1,li3,li4,mli3,mli4,mli31,mli41; int list[5]; // vectors needed to calculate the cross-term dihedral angles double vb21x,vb21y,vb21z,vb32x,vb32y,vb32z,vb34x,vb34y,vb34z; double vb23x,vb23y,vb23z; double vb43x,vb43y,vb43z,vb45x,vb45y,vb45z,a1x,a1y,a1z,b1x,b1y,b1z; double a2x,a2y,a2z,b2x,b2y,b2z,r32,a1sq,b1sq,a2sq,b2sq,dpr21r32,dpr34r32; double dpr32r43,dpr45r43,r43,vb12x,vb12y,vb12z,vb54x,vb54y,vb54z; // cross-term dihedral angles double phi,psi,phi1,psi1; double f1[3],f2[3],f3[3],f4[3],f5[3],vcmap[6]; double gs[4],d1gs[4],d2gs[4],d12gs[4]; double engfraction; // vectors needed for the gradient/force calculation double dphidr1x,dphidr1y,dphidr1z,dphidr2x,dphidr2y,dphidr2z; double dphidr3x,dphidr3y,dphidr3z,dphidr4x,dphidr4y,dphidr4z; double dpsidr1x,dpsidr1y,dpsidr1z,dpsidr2x,dpsidr2y,dpsidr2z; double dpsidr3x,dpsidr3y,dpsidr3z,dpsidr4x,dpsidr4y,dpsidr4z; // Definition of cross-term dihedrals // phi dihedral // |--------------------| // a1-----a2-----a3-----a4-----a5 cross-term atoms // C N CA C N cross-term atom types // |--------------------| // psi dihedral double **x = atom->x; double **f = atom->f; int nlocal = atom->nlocal; ecmap = 0.0; int eflag = eflag_caller; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; for (n = 0; n < ncrosstermlist; n++) { i1 = crosstermlist[n][0]; i2 = crosstermlist[n][1]; i3 = crosstermlist[n][2]; i4 = crosstermlist[n][3]; i5 = crosstermlist[n][4]; type = crosstermlist[n][5]; if (type == 0) continue; // calculate bond vectors for both dihedrals // phi // vb21 = r2 - r1 vb21x = x[i2][0] - x[i1][0]; vb21y = x[i2][1] - x[i1][1]; vb21z = x[i2][2] - x[i1][2]; vb12x = -1.0*vb21x; vb12y = -1.0*vb21y; vb12z = -1.0*vb21z; vb32x = x[i3][0] - x[i2][0]; vb32y = x[i3][1] - x[i2][1]; vb32z = x[i3][2] - x[i2][2]; vb23x = -1.0*vb32x; vb23y = -1.0*vb32y; vb23z = -1.0*vb32z; vb34x = x[i3][0] - x[i4][0]; vb34y = x[i3][1] - x[i4][1]; vb34z = x[i3][2] - x[i4][2]; // psi // bond vectors same as for phi: vb32 vb43x = -1.0*vb34x; vb43y = -1.0*vb34y; vb43z = -1.0*vb34z; vb45x = x[i4][0] - x[i5][0]; vb45y = x[i4][1] - x[i5][1]; vb45z = x[i4][2] - x[i5][2]; vb54x = -1.0*vb45x; vb54y = -1.0*vb45y; vb54z = -1.0*vb45z; // calculate normal vectors for planes that define the dihedral angles a1x = vb12y*vb23z - vb12z*vb23y; a1y = vb12z*vb23x - vb12x*vb23z; a1z = vb12x*vb23y - vb12y*vb23x; b1x = vb43y*vb23z - vb43z*vb23y; b1y = vb43z*vb23x - vb43x*vb23z; b1z = vb43x*vb23y - vb43y*vb23x; a2x = vb23y*vb34z - vb23z*vb34y; a2y = vb23z*vb34x - vb23x*vb34z; a2z = vb23x*vb34y - vb23y*vb34x; b2x = vb45y*vb43z - vb45z*vb43y; b2y = vb45z*vb43x - vb45x*vb43z; b2z = vb45x*vb43y - vb45y*vb43x; // calculate terms used later in calculations r32 = sqrt(vb32x*vb32x + vb32y*vb32y + vb32z*vb32z); a1sq = a1x*a1x + a1y*a1y + a1z*a1z; b1sq = b1x*b1x + b1y*b1y + b1z*b1z; r43 = sqrt(vb43x*vb43x + vb43y*vb43y + vb43z*vb43z); a2sq = a2x*a2x + a2y*a2y + a2z*a2z; b2sq = b2x*b2x + b2y*b2y + b2z*b2z; //if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) // printf("a1sq b1sq a2sq b2sq: %f %f %f %f \n",a1sq,b1sq,a2sq,b2sq); if (a1sq<0.0001 || b1sq<0.0001 || a2sq<0.0001 || b2sq<0.0001) continue; dpr21r32 = vb21x*vb32x + vb21y*vb32y + vb21z*vb32z; dpr34r32 = vb34x*vb32x + vb34y*vb32y + vb34z*vb32z; dpr32r43 = vb32x*vb43x + vb32y*vb43y + vb32z*vb43z; dpr45r43 = vb45x*vb43x + vb45y*vb43y + vb45z*vb43z; // calculate the backbone dihedral angles as VMD and GROMACS phi = dihedral_angle_atan2(vb21x,vb21y,vb21z,a1x,a1y,a1z,b1x,b1y,b1z,r32); psi = dihedral_angle_atan2(vb32x,vb32y,vb32z,a2x,a2y,a2z,b2x,b2y,b2z,r43); if (phi == 180.0) phi= -180.0; if (psi == 180.0) psi= -180.0; phi1 = phi; if (phi1 < 0.0) phi1 += 360.0; psi1 = psi; if (psi1 < 0.0) psi1 += 360.0; // find the neighbor grid point index li1 = int(((phi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); li2 = int(((psi1+CMAPXMIN2)/CMAPDX)+((CMAPDIM*1.0)/2.0)); li3 = int((phi-CMAPXMIN2)/CMAPDX); li4 = int((psi-CMAPXMIN2)/CMAPDX); mli3 = li3 % CMAPDIM; mli4 = li4 % CMAPDIM; mli31 = (li3+1) % CMAPDIM; mli41 = (li4+1) %CMAPDIM; mli1 = li1 % CMAPDIM; mli2 = li2 % CMAPDIM; mli11 = (li1+1) % CMAPDIM; mli21 = (li2+1) %CMAPDIM; t1 = type-1; if (t1 < 0 || t1 > 5) error->all(FLERR,"Invalid CMAP crossterm_type"); // determine the values and derivatives for the grid square points gs[0] = cmapgrid[t1][mli3][mli4]; gs[1] = cmapgrid[t1][mli31][mli4]; gs[2] = cmapgrid[t1][mli31][mli41]; gs[3] = cmapgrid[t1][mli3][mli41]; d1gs[0] = d1cmapgrid[t1][mli1][mli2]; d1gs[1] = d1cmapgrid[t1][mli11][mli2]; d1gs[2] = d1cmapgrid[t1][mli11][mli21]; d1gs[3] = d1cmapgrid[t1][mli1][mli21]; d2gs[0] = d2cmapgrid[t1][mli1][mli2]; d2gs[1] = d2cmapgrid[t1][mli11][mli2]; d2gs[2] = d2cmapgrid[t1][mli11][mli21]; d2gs[3] = d2cmapgrid[t1][mli1][mli21]; d12gs[0] = d12cmapgrid[t1][mli1][mli2]; d12gs[1] = d12cmapgrid[t1][mli11][mli2]; d12gs[2] = d12cmapgrid[t1][mli11][mli21]; d12gs[3] = d12cmapgrid[t1][mli1][mli21]; // calculate the cmap energy and the gradient (dE/dphi,dE/dpsi) bc_interpol(phi,psi,li3,li4,gs,d1gs,d2gs,d12gs); // sum up cmap energy contributions engfraction = 0.2 * E; if (i1 < nlocal) ecmap += engfraction; if (i2 < nlocal) ecmap += engfraction; if (i3 < nlocal) ecmap += engfraction; if (i4 < nlocal) ecmap += engfraction; if (i5 < nlocal) ecmap += engfraction; // calculate the derivatives dphi/dr_i dphidr1x = 1.0*r32/a1sq*a1x; dphidr1y = 1.0*r32/a1sq*a1y; dphidr1z = 1.0*r32/a1sq*a1z; dphidr2x = -1.0*r32/a1sq*a1x - dpr21r32/a1sq/r32*a1x + dpr34r32/b1sq/r32*b1x; dphidr2y = -1.0*r32/a1sq*a1y - dpr21r32/a1sq/r32*a1y + dpr34r32/b1sq/r32*b1y; dphidr2z = -1.0*r32/a1sq*a1z - dpr21r32/a1sq/r32*a1z + dpr34r32/b1sq/r32*b1z; dphidr3x = dpr34r32/b1sq/r32*b1x - dpr21r32/a1sq/r32*a1x - r32/b1sq*b1x; dphidr3y = dpr34r32/b1sq/r32*b1y - dpr21r32/a1sq/r32*a1y - r32/b1sq*b1y; dphidr3z = dpr34r32/b1sq/r32*b1z - dpr21r32/a1sq/r32*a1z - r32/b1sq*b1z; dphidr4x = r32/b1sq*b1x; dphidr4y = r32/b1sq*b1y; dphidr4z = r32/b1sq*b1z; // calculate the derivatives dpsi/dr_i dpsidr1x = 1.0*r43/a2sq*a2x; dpsidr1y = 1.0*r43/a2sq*a2y; dpsidr1z = 1.0*r43/a2sq*a2z; dpsidr2x = r43/a2sq*a2x + dpr32r43/a2sq/r43*a2x - dpr45r43/b2sq/r43*b2x; dpsidr2y = r43/a2sq*a2y + dpr32r43/a2sq/r43*a2y - dpr45r43/b2sq/r43*b2y; dpsidr2z = r43/a2sq*a2z + dpr32r43/a2sq/r43*a2z - dpr45r43/b2sq/r43*b2z; dpsidr3x = dpr45r43/b2sq/r43*b2x - dpr32r43/a2sq/r43*a2x - r43/b2sq*b2x; dpsidr3y = dpr45r43/b2sq/r43*b2y - dpr32r43/a2sq/r43*a2y - r43/b2sq*b2y; dpsidr3z = dpr45r43/b2sq/r43*b2z - dpr32r43/a2sq/r43*a2z - r43/b2sq*b2z; dpsidr4x = r43/b2sq*b2x; dpsidr4y = r43/b2sq*b2y; dpsidr4z = r43/b2sq*b2z; // calculate forces on cross-term atoms: F = -(dE/dPhi)*(dPhi/dr) f1[0] = dEdPhi*dphidr1x; f1[1] = dEdPhi*dphidr1y; f1[2] = dEdPhi*dphidr1z; f2[0] = dEdPhi*dphidr2x + dEdPsi*dpsidr1x; f2[1] = dEdPhi*dphidr2y + dEdPsi*dpsidr1y; f2[2] = dEdPhi*dphidr2z + dEdPsi*dpsidr1z; f3[0] = -dEdPhi*dphidr3x - dEdPsi*dpsidr2x; f3[1] = -dEdPhi*dphidr3y - dEdPsi*dpsidr2y; f3[2] = -dEdPhi*dphidr3z - dEdPsi*dpsidr2z; f4[0] = -dEdPhi*dphidr4x - dEdPsi*dpsidr3x; f4[1] = -dEdPhi*dphidr4y - dEdPsi*dpsidr3y; f4[2] = -dEdPhi*dphidr4z - dEdPsi*dpsidr3z; f5[0] = -dEdPsi*dpsidr4x; f5[1] = -dEdPsi*dpsidr4y; f5[2] = -dEdPsi*dpsidr4z; // apply force to each of the 5 atoms if (i1 < nlocal) { f[i1][0] += f1[0]; f[i1][1] += f1[1]; f[i1][2] += f1[2]; } if (i2 < nlocal) { f[i2][0] += f2[0]; f[i2][1] += f2[1]; f[i2][2] += f2[2]; } if (i3 < nlocal) { f[i3][0] += f3[0]; f[i3][1] += f3[1]; f[i3][2] += f3[2]; } if (i4 < nlocal) { f[i4][0] += f4[0]; f[i4][1] += f4[1]; f[i4][2] += f4[2]; } if (i5 < nlocal) { f[i5][0] += f5[0]; f[i5][1] += f5[1]; f[i5][2] += f5[2]; } // tally energy and/or virial if (evflag) { nlist = 0; if (i1 < nlocal) list[nlist++] = i1; if (i2 < nlocal) list[nlist++] = i2; if (i3 < nlocal) list[nlist++] = i3; if (i4 < nlocal) list[nlist++] = i4; if (i5 < nlocal) list[nlist++] = i5; vcmap[0] = (vb12x*f1[0])+(vb32x*f3[0])+((vb43x+vb32x)*f4[0])+ ((vb54x+vb43x+vb32x)*f5[0]); vcmap[1] = (vb12y*f1[1])+(vb32y*f3[1])+((vb43y+vb32y)*f4[1])+ ((vb54y+vb43y+vb32y)*f5[1]); vcmap[2] = (vb12z*f1[2])+(vb32z*f3[2])+((vb43z+vb32z)*f4[2])+ ((vb54z+vb43z+vb32z)*f5[2]); vcmap[3] = (vb12x*f1[1])+(vb32x*f3[1])+((vb43x+vb32x)*f4[1])+ ((vb54x+vb43x+vb32x)*f5[1]); vcmap[4] = (vb12x*f1[2])+(vb32x*f3[2])+((vb43x+vb32x)*f4[2])+ ((vb54x+vb43x+vb32x)*f5[2]); vcmap[5] = (vb12y*f1[2])+(vb32y*f3[2])+((vb43y+vb32y)*f4[2])+ ((vb54y+vb43y+vb32y)*f5[2]); ev_tally(nlist,list,5.0,E,vcmap); //ev_tally(5,list,nlocal,newton_bond,E,vcmap); } } } /* ---------------------------------------------------------------------- */ void FixCMAP::post_force_respa(int vflag, int ilevel, int iloop) { if (ilevel == nlevels_respa-1) post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixCMAP::min_post_force(int vflag) { post_force(vflag); } /* ---------------------------------------------------------------------- energy of CMAP term ------------------------------------------------------------------------- */ double FixCMAP::compute_scalar() { double all; MPI_Allreduce(&ecmap,&all,1,MPI_DOUBLE,MPI_SUM,world); return all; } // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- // methods to read CMAP potential file, perform interpolation // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- void FixCMAP::read_grid_map(char *cmapfile) { char linebuf[MAXLINE]; char *chunk,*line; int i1, i2, i3, i4, i5, i6, j1, j2, j3, j4, j5, j6, counter; FILE *fp = NULL; if (comm->me == 0) { fp = force->open_potential(cmapfile); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open fix cmap file %s",cmapfile); error->one(FLERR,str); } } for (int ix1 = 0; ix1 < 6; ix1++) for (int ix2 = 0; ix2 < CMAPDIM; ix2++) for (int ix3 = 0; ix3 < CMAPDIM; ix3++) cmapgrid[ix1][ix2][ix3] = 0.0; counter = 0; i1 = i2 = i3 = i4 = i5 = i6 = 0; j1 = j2 = j3 = j4 = j5 = j6 = 0; int done = 0; while (!done) { // only read on rank 0 and broadcast to all other ranks if (comm->me == 0) done = (fgets(linebuf,MAXLINE,fp) == NULL); MPI_Bcast(&done,1,MPI_INT,0,world); if (done) continue; MPI_Bcast(linebuf,MAXLINE,MPI_CHAR,0,world); // remove leading whitespace line = linebuf; while (line && (*line == ' ' || *line == '\t' || *line == '\r')) ++line; // skip if empty line or comment if (!line || *line =='\n' || *line == '\0' || *line == '#') continue; // read in the cmap grid point values // NOTE: The order to read the 6 grid maps is HARD-CODED, thus errors // will occur if content of the file "cmap.data" is altered // // Reading order of the maps: // 1. Alanine map // 2. Alanine before proline map // 3. Proline map // 4. Two adjacent prolines map // 5. Glycine map // 6. Glycine before proline map chunk = strtok(line, " \r\n"); while (chunk != NULL) { // alanine map if (counter < CMAPDIM*CMAPDIM) { cmapgrid[0][i1][j1] = atof(chunk); chunk = strtok(NULL, " \r\n"); j1++; if (j1 == CMAPDIM) { j1 = 0; i1++; } counter++; } // alanine-proline map else if (counter >= CMAPDIM*CMAPDIM && counter < 2*CMAPDIM*CMAPDIM) { cmapgrid[1][i2][j2]= atof(chunk); chunk = strtok(NULL, " \r\n"); j2++; if (j2 == CMAPDIM) { j2 = 0; i2++; } counter++; } // proline map else if (counter >= 2*CMAPDIM*CMAPDIM && counter < 3*CMAPDIM*CMAPDIM) { cmapgrid[2][i3][j3] = atof(chunk); chunk = strtok(NULL, " \r\n"); j3++; if (j3 == CMAPDIM) { j3 = 0; i3++; } counter++; } // 2 adjacent prolines map else if (counter >= 3*CMAPDIM*CMAPDIM && counter < 4*CMAPDIM*CMAPDIM) { cmapgrid[3][i4][j4] = atof(chunk); chunk = strtok(NULL, " \r\n"); j4++; if (j4 == CMAPDIM) { j4 = 0; i4++; } counter++; } // glycine map else if (counter >= 4*CMAPDIM*CMAPDIM && counter < 5*CMAPDIM*CMAPDIM) { cmapgrid[4][i5][j5] = atof(chunk); chunk = strtok(NULL, " \r\n"); j5++; if (j5 == CMAPDIM) { j5 = 0; i5++; } counter++; } // glycine-proline map else if (counter >= 5*CMAPDIM*CMAPDIM && counter < 6*CMAPDIM*CMAPDIM) { cmapgrid[5][i6][j6] = atof(chunk); chunk = strtok(NULL, " \r\n"); j6++; if (j6 == CMAPDIM) { j6 = 0; i6++; } counter++; } else break; } } if (comm->me == 0) fclose(fp); } /* ---------------------------------------------------------------------- */ void FixCMAP::spline(double *y, double *ddy, int n) { // create the 2nd dervatives of a taublated function y_i(x_i) // at the tabulated points int i, j; double p, *u; memory->create(u,n-1,"cmap:u"); ddy[0] = u[0] = 0.0; for (i = 1; i <= n-2; i++) { p = 1.0/(ddy[i-1]+4.0); ddy[i] = -p; u[i] = ((((6.0*y[i+1])-(12.0*y[i])+(6.0*y[i-1]))/(CMAPDX*CMAPDX))-u[i-1])*p; } ddy[n-1] = 0.0; for (j = n-2; j >= 0; j--) ddy[j] = ddy[j]*ddy[j+1] + u[j]; memory->destroy(u); } /* ---------------------------------------------------------------------- */ void FixCMAP::spl_interpolate(double x, double *y, double *ddy, double &yo, double &dyo) { // perform a 1D cubic spline interpolation int ix; double a,b,a1,b1,a2,b2; ix = int((x-CMAPXMIN)/CMAPDX-(1./2.)); a = (CMAPXMIN+(ix*1.0)*CMAPDX-x)/CMAPDX; b = (x-CMAPXMIN-(((ix-1)*1.0)*CMAPDX))/CMAPDX; a1 = a*a*a-a; b1 = b*b*b-b; a2 = 3.0*a*a-1.0; b2 = 3.0*b*b-1.0; yo = a*y[ix]+b*y[ix+1]+(a1*ddy[ix]+b1*ddy[ix+1])*(CMAPDX*CMAPDX)/6.0; dyo = (y[ix+1]-y[ix])/CMAPDX-a2/6.0*CMAPDX*ddy[ix]+b2/6.0*CMAPDX*ddy[ix+1]; } /* ---------------------------------------------------------------------- */ void FixCMAP::set_map_derivatives(double **map, double **d1yo, double **d2yo, double **d12yo) { // precompute the gradient and cross-derivatives of the map grid points. // use the bicubic spline to calculate the derivatives int i, j, k, ii, jj, xm, p; double phi, psi, y, d1y, d2y, d12y, tyyk,tdyk; double *tmp_y, *tmp_dy, *tmp_ddy, **tmap, **tddmap; int ix; double a,b,a1,b1,a2,b2; xm = CMAPDIM/2; p = CMAPDIM; y = 0.; d1y = 0.; d2y = 0.; d12y = 0.; memory->create(tmp_y,CMAPDIM*2,"cmap:tmp_y"); memory->create(tmp_dy,CMAPDIM*2,"cmap:tmp_dy"); memory->create(tmp_ddy,CMAPDIM*2,"cmap:tmp_ddy"); memory->create(tmap,CMAPDIM*2,CMAPDIM*2,"cmap:tmap"); memory->create(tddmap,CMAPDIM*2,CMAPDIM*2,"cmap:tddmap"); // periodically expand the original map // use the expanded map for bicubic spline interpolation, // which is used to obtain the derivatives // actual interpolation is done with bicubic interpolation for (i = 0; i < CMAPDIM*2; i++) { ii = ((i+CMAPDIM-xm)%CMAPDIM); for (j = 0; j < CMAPDIM*2; j++) { jj = ((j+CMAPDIM-xm)%CMAPDIM); tmap[i][j] = map[ii][jj]; } } for (i = 0; i < CMAPDIM*2; i++) spline(tmap[i], tddmap[i], CMAPDIM*2); for (i = xm; i < CMAPDIM+xm; i++) { phi = (i-xm)*CMAPDX-180.0; for (j = xm; j < CMAPDIM+xm; j++) { psi = (j-xm)*CMAPDX-180.0; ix = int((psi-CMAPXMIN)/CMAPDX); a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-psi)/CMAPDX; b = (psi-CMAPXMIN-((ix)*1.0)*CMAPDX)/CMAPDX; a1 = a*a*a-a; b1 = b*b*b-b; a2 = 3.0*a*a-1.0; b2 = 3.0*b*b-1.0; for (k = 0; k < CMAPDIM*2; k++) { tyyk = tmp_y[k]; tdyk = tmp_dy[k]; tyyk = a*tmap[k][ix]+b*tmap[k][ix+1]+ (a1*tddmap[k][ix]+b1*tddmap[k][ix+1])*(CMAPDX*CMAPDX)/6.0; tdyk = (tmap[k][ix+1]-tmap[k][ix])/CMAPDX- (a2/6.0*CMAPDX*tddmap[k][ix])+(b2/6.0*CMAPDX*tddmap[k][ix+1]); tmp_y[k] = tyyk; tmp_dy[k] = tdyk; } spline(tmp_y,tmp_ddy,CMAPDIM+xm+xm); ix = int((phi-CMAPXMIN)/CMAPDX); a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-phi)/CMAPDX; b = (phi-CMAPXMIN-(ix*1.0)*CMAPDX)/CMAPDX; a1 = a*a*a-a; b1 = b*b*b-b; a2 = 3.0*a*a-1.0; b2 = 3.0*b*b-1.0; y = a*tmp_y[ix]+b*tmp_y[ix+1]+ (a1*tmp_ddy[ix]+b1*tmp_ddy[ix+1])*(CMAPDX*CMAPDX)/6.0; d1y = (tmp_y[ix+1]-tmp_y[ix])/CMAPDX- a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1]; spline(tmp_dy,tmp_ddy,CMAPDIM+xm+xm); ix = int((phi-CMAPXMIN)/CMAPDX); a = (CMAPXMIN+((ix+1)*1.0)*CMAPDX-phi)/CMAPDX; b = (phi-CMAPXMIN-(ix*1.0)*CMAPDX)/CMAPDX; a1 = a*a*a-a; b1 = b*b*b-b; a2 = 3.0*a*a-1.0; b2 = 3.0*b*b-1.0; d2y = a*tmp_dy[ix]+b*tmp_dy[ix+1]+ (a1*tmp_ddy[ix]+b1*tmp_ddy[ix+1])*(CMAPDX*CMAPDX)/6.0; d12y = (tmp_dy[ix+1]-tmp_dy[ix])/CMAPDX- a2/6.0*CMAPDX*tmp_ddy[ix]+b2/6.0*CMAPDX*tmp_ddy[ix+1]; d1yo[i%p][j%p] = d1y; d2yo[i%p][j%p] = d2y; d12yo[i%p][j%p] = d12y; } } memory->destroy(tmp_y); memory->destroy(tmp_dy); memory->destroy(tmp_ddy); memory->destroy(tmap); memory->destroy(tddmap); } /* ---------------------------------------------------------------------- */ double FixCMAP::dihedral_angle_atan2(double fx, double fy, double fz, double ax, double ay, double az, double bx, double by, double bz, double absg) { // calculate the dihedral angle double angle, arg1, arg2; arg1 = absg*(fx*bx+fy*by+fz*bz); arg2 = ax*bx+ay*by+az*bz; if (arg1 == 0 && arg2 == 0) error->all(FLERR,"CMAP: atan2 function cannot take 2 zero arguments"); else { angle = atan2(arg1,arg2); angle = angle*180.0/MY_PI; } return angle; } /* ---------------------------------------------------------------------- */ void FixCMAP::bc_coeff(double *gs, double *d1gs, double *d2gs, double *d12gs) { // calculate the bicubic interpolation coefficients c_ij static int wt[16][16] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0, 2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 3, 0, 0, 0, 0,-2, 0, 0,-1, 0, 0, 0, 0, 2, 0, 0,-2, 0, 0, 0, 0, 1, 0, 0, 1, -3, 3, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0, 9,-9, 9,-9, 6, 3,-3,-6, 6,-6,-3, 3, 4, 2, 1, 2, -6, 6,-6, 6,-4,-2, 2, 4,-3, 3, 3,-3,-2,-1,-1,-2, 2,-2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0, -6, 6,-6, 6,-3,-3, 3, 3,-4, 4, 2,-2,-2,-2,-1,-1, 4,-4, 4,-4, 2, 2,-2,-2, 2,-2,-2, 2, 1, 1, 1, 1 }; int i, j, k, in; double xx, x[16]; for (i = 0; i < 4; i++) { x[i] = gs[i]; x[i+4] = d1gs[i]*CMAPDX; x[i+8] = d2gs[i]*CMAPDX; x[i+12] = d12gs[i]*CMAPDX*CMAPDX; } in = 0; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { xx = 0.0; for (k = 0; k < 16; k++) xx += wt[in][k]*x[k]; in++; cij[i][j] = xx; } } } /* ---------------------------------------------------------------------- */ void FixCMAP::bc_interpol(double x1, double x2, int low1, int low2, double *gs, double *d1gs, double *d2gs, double *d12gs) { // for a given point of interest and its corresponding grid square values, // gradients and cross-derivatives // calculate the interpolated value of the point of interest (POI) int i, p=12; double t, u, fac, gs1l, gs2l, gs1u, gs2u; // set the interpolation coefficients bc_coeff(gs,d1gs,d2gs,d12gs); gs1l = g_axis[low1]; gs2l = g_axis[low2]; t = (x1-gs1l)/CMAPDX; u = (x2-gs2l)/CMAPDX; E = dEdPhi = dEdPsi = 0.0; for (i = 3; i >= 0; i--) { E = t*E + ((cij[i][3]*u+cij[i][2])*u+cij[i][1])*u+cij[i][0]; dEdPhi = u*dEdPhi + (3.0*cij[3][i]*t+2.0*cij[2][i])*t+cij[1][i]; dEdPsi = t*dEdPsi + (3.0*cij[i][3]*u+2.0*cij[i][2])*u+cij[i][1]; } dEdPhi *= (180.0/MY_PI/CMAPDX); dEdPsi *= (180.0/MY_PI/CMAPDX); } // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- // methods to read and write data file // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- void FixCMAP::read_data_header(char *line) { if (strstr(line,"crossterms")) { sscanf(line,BIGINT_FORMAT,&ncmap); } else error->all(FLERR,"Invalid read data header line for fix cmap"); // didn't set in constructor b/c this fix could be defined // before newton command newton_bond = force->newton_bond; } /* ---------------------------------------------------------------------- unpack N lines in buf from section of data file labeled by keyword id_offset is applied to atomID fields if multiple data files are read store CMAP interactions as if newton_bond = OFF, even if actually ON ------------------------------------------------------------------------- */ void FixCMAP::read_data_section(char *keyword, int n, char *buf, tagint id_offset) { int m,tmp,itype; tagint atom1,atom2,atom3,atom4,atom5; char *next; next = strchr(buf,'\n'); *next = '\0'; int nwords = atom->count_words(buf); *next = '\n'; if (nwords != 7) { char str[128]; sprintf(str,"Incorrect %s format in data file",keyword); error->all(FLERR,str); } // loop over lines of CMAP crossterms // tokenize the line into values // add crossterm to one of my atoms, depending on newton_bond for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, &tmp,&itype,&atom1,&atom2,&atom3,&atom4,&atom5); atom1 += id_offset; atom2 += id_offset; atom3 += id_offset; atom4 += id_offset; atom5 += id_offset; if ((m = atom->map(atom1)) >= 0) { if (num_crossterm[m] == CMAPMAX) error->one(FLERR,"Too many CMAP crossterms for one atom"); crossterm_type[m][num_crossterm[m]] = itype; crossterm_atom1[m][num_crossterm[m]] = atom1; crossterm_atom2[m][num_crossterm[m]] = atom2; crossterm_atom3[m][num_crossterm[m]] = atom3; crossterm_atom4[m][num_crossterm[m]] = atom4; crossterm_atom5[m][num_crossterm[m]] = atom5; num_crossterm[m]++; } if ((m = atom->map(atom2)) >= 0) { if (num_crossterm[m] == CMAPMAX) error->one(FLERR,"Too many CMAP crossterms for one atom"); crossterm_type[m][num_crossterm[m]] = itype; crossterm_atom1[m][num_crossterm[m]] = atom1; crossterm_atom2[m][num_crossterm[m]] = atom2; crossterm_atom3[m][num_crossterm[m]] = atom3; crossterm_atom4[m][num_crossterm[m]] = atom4; crossterm_atom5[m][num_crossterm[m]] = atom5; num_crossterm[m]++; } if ((m = atom->map(atom3)) >= 0) { if (num_crossterm[m] == CMAPMAX) error->one(FLERR,"Too many CMAP crossterms for one atom"); crossterm_type[m][num_crossterm[m]] = itype; crossterm_atom1[m][num_crossterm[m]] = atom1; crossterm_atom2[m][num_crossterm[m]] = atom2; crossterm_atom3[m][num_crossterm[m]] = atom3; crossterm_atom4[m][num_crossterm[m]] = atom4; crossterm_atom5[m][num_crossterm[m]] = atom5; num_crossterm[m]++; } if ((m = atom->map(atom4)) >= 0) { if (num_crossterm[m] == CMAPMAX) error->one(FLERR,"Too many CMAP crossterms for one atom"); crossterm_type[m][num_crossterm[m]] = itype; crossterm_atom1[m][num_crossterm[m]] = atom1; crossterm_atom2[m][num_crossterm[m]] = atom2; crossterm_atom3[m][num_crossterm[m]] = atom3; crossterm_atom4[m][num_crossterm[m]] = atom4; crossterm_atom5[m][num_crossterm[m]] = atom5; num_crossterm[m]++; } if ((m = atom->map(atom5)) >= 0) { if (num_crossterm[m] == CMAPMAX) error->one(FLERR,"Too many CMAP crossterms for one atom"); crossterm_type[m][num_crossterm[m]] = itype; crossterm_atom1[m][num_crossterm[m]] = atom1; crossterm_atom2[m][num_crossterm[m]] = atom2; crossterm_atom3[m][num_crossterm[m]] = atom3; crossterm_atom4[m][num_crossterm[m]] = atom4; crossterm_atom5[m][num_crossterm[m]] = atom5; num_crossterm[m]++; } buf = next + 1; } } /* ---------------------------------------------------------------------- */ bigint FixCMAP::read_data_skip_lines(char *keyword) { return ncmap; } /* ---------------------------------------------------------------------- write Mth header line to file only called by proc 0 ------------------------------------------------------------------------- */ void FixCMAP::write_data_header(FILE *fp, int mth) { fprintf(fp,BIGINT_FORMAT " cmap crossterms\n",ncmap); } /* ---------------------------------------------------------------------- return size I own for Mth data section # of data sections = 1 for this fix nx = # of crossterms owned by my local atoms if newton_bond off, atom only owns crossterm if it is atom3 ny = columns = type + 5 atom IDs ------------------------------------------------------------------------- */ void FixCMAP::write_data_section_size(int mth, int &nx, int &ny) { int i,m; tagint *tag = atom->tag; int nlocal = atom->nlocal; nx = 0; for (i = 0; i < nlocal; i++) for (m = 0; m < num_crossterm[i]; m++) if (crossterm_atom3[i][m] == tag[i]) nx++; ny = 6; } /* ---------------------------------------------------------------------- pack values for Mth data section into 2d buf buf allocated by caller as owned crossterms by 6 ------------------------------------------------------------------------- */ void FixCMAP::write_data_section_pack(int mth, double **buf) { int i,m; // 1st column = CMAP type // 2nd-6th columns = 5 atom IDs tagint *tag = atom->tag; int nlocal = atom->nlocal; int n = 0; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_crossterm[i]; m++) { if (crossterm_atom3[i][m] != tag[i]) continue; buf[n][0] = ubuf(crossterm_type[i][m]).d; buf[n][1] = ubuf(crossterm_atom1[i][m]).d; buf[n][2] = ubuf(crossterm_atom2[i][m]).d; buf[n][3] = ubuf(crossterm_atom3[i][m]).d; buf[n][4] = ubuf(crossterm_atom4[i][m]).d; buf[n][5] = ubuf(crossterm_atom5[i][m]).d; n++; } } } /* ---------------------------------------------------------------------- write section keyword for Mth data section to file use Molecules or Charges if that is only field, else use fix ID only called by proc 0 ------------------------------------------------------------------------- */ void FixCMAP::write_data_section_keyword(int mth, FILE *fp) { fprintf(fp,"\nCMAP\n\n"); } /* ---------------------------------------------------------------------- write N lines from buf to file convert buf fields to int or double depending on styles index can be used to prepend global numbering only called by proc 0 ------------------------------------------------------------------------- */ void FixCMAP::write_data_section(int mth, FILE *fp, int n, double **buf, int index) { for (int i = 0; i < n; i++) fprintf(fp,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT "\n", index+i,(int) ubuf(buf[i][0]).i,(tagint) ubuf(buf[i][1]).i, (tagint) ubuf(buf[i][2]).i,(tagint) ubuf(buf[i][3]).i, (tagint) ubuf(buf[i][4]).i,(tagint) ubuf(buf[i][5]).i); } // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- // methods for restart and communication // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- /* ---------------------------------------------------------------------- pack entire state of Fix into one write ------------------------------------------------------------------------- */ void FixCMAP::write_restart(FILE *fp) { if (comm->me == 0) { int size = sizeof(bigint); fwrite(&size,sizeof(int),1,fp); fwrite(&ncmap,sizeof(bigint),1,fp); } } /* ---------------------------------------------------------------------- use state info from restart file to restart the Fix ------------------------------------------------------------------------- */ void FixCMAP::restart(char *buf) { ncmap = *((bigint *) buf); } /* ---------------------------------------------------------------------- pack values in local atom-based arrays for restart file ------------------------------------------------------------------------- */ int FixCMAP::pack_restart(int i, double *buf) { int n = 1; for (int m = 0; m < num_crossterm[i]; m++) { buf[n++] = ubuf(MAX(crossterm_type[i][m],-crossterm_type[i][m])).d; buf[n++] = ubuf(crossterm_atom1[i][m]).d; buf[n++] = ubuf(crossterm_atom2[i][m]).d; buf[n++] = ubuf(crossterm_atom3[i][m]).d; buf[n++] = ubuf(crossterm_atom4[i][m]).d; buf[n++] = ubuf(crossterm_atom5[i][m]).d; } buf[0] = n; return n; } /* ---------------------------------------------------------------------- unpack values from atom->extra array to restart the fix ------------------------------------------------------------------------- */ void FixCMAP::unpack_restart(int nlocal, int nth) { double **extra = atom->extra; // skip to Nth set of extra values int n = 0; for (int i = 0; i < nth; i++) n += static_cast (extra[nlocal][n]); int count = static_cast (extra[nlocal][n++]); num_crossterm[nlocal] = (count-1)/6; for (int m = 0; m < num_crossterm[nlocal]; m++) { crossterm_type[nlocal][m] = (int) ubuf(extra[nlocal][n++]).i; crossterm_atom1[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i; crossterm_atom2[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i; crossterm_atom3[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i; crossterm_atom4[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i; crossterm_atom5[nlocal][m] = (tagint) ubuf(extra[nlocal][n++]).i; } } /* ---------------------------------------------------------------------- maxsize of any atom's restart data ------------------------------------------------------------------------- */ int FixCMAP::maxsize_restart() { return 1 + CMAPMAX*6; } /* ---------------------------------------------------------------------- size of atom nlocal's restart data ------------------------------------------------------------------------- */ int FixCMAP::size_restart(int nlocal) { return 1 + num_crossterm[nlocal]*6; } /* ---------------------------------------------------------------------- allocate atom-based array ------------------------------------------------------------------------- */ void FixCMAP::grow_arrays(int nmax) { num_crossterm = memory->grow(num_crossterm,nmax,"cmap:num_crossterm"); crossterm_type = memory->grow(crossterm_type,nmax,CMAPMAX, "cmap:crossterm_type"); crossterm_atom1 = memory->grow(crossterm_atom1,nmax,CMAPMAX, "cmap:crossterm_atom1"); crossterm_atom2 = memory->grow(crossterm_atom2,nmax,CMAPMAX, "cmap:crossterm_atom2"); crossterm_atom3 = memory->grow(crossterm_atom3,nmax,CMAPMAX, "cmap:crossterm_atom3"); crossterm_atom4 = memory->grow(crossterm_atom4,nmax,CMAPMAX, "cmap:crossterm_atom4"); crossterm_atom5 = memory->grow(crossterm_atom5,nmax,CMAPMAX, "cmap:crossterm_atom5"); // must initialize num_crossterm to 0 for added atoms // may never be set for some atoms when data file is read for (int i = nmax_previous; i < nmax; i++) num_crossterm[i] = 0; nmax_previous = nmax; } /* ---------------------------------------------------------------------- copy values within local atom-based array ------------------------------------------------------------------------- */ void FixCMAP::copy_arrays(int i, int j, int delflag) { num_crossterm[j] = num_crossterm[i]; for (int k = 0; k < num_crossterm[j]; k++){ crossterm_type[j][k] = crossterm_type[i][k]; crossterm_atom1[j][k] = crossterm_atom1[i][k]; crossterm_atom2[j][k] = crossterm_atom2[i][k]; crossterm_atom3[j][k] = crossterm_atom3[i][k]; crossterm_atom4[j][k] = crossterm_atom4[i][k]; crossterm_atom5[j][k] = crossterm_atom5[i][k]; } } /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ void FixCMAP::set_arrays(int i) { num_crossterm[i] = 0; } /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ int FixCMAP::pack_exchange(int i, double *buf) { int n = 0; buf[n++] = ubuf(num_crossterm[i]).d; for (int m = 0; m < num_crossterm[i]; m++) { buf[n++] = ubuf(crossterm_type[i][m]).d; buf[n++] = ubuf(crossterm_atom1[i][m]).d; buf[n++] = ubuf(crossterm_atom2[i][m]).d; buf[n++] = ubuf(crossterm_atom3[i][m]).d; buf[n++] = ubuf(crossterm_atom4[i][m]).d; buf[n++] = ubuf(crossterm_atom5[i][m]).d; } return n; } /* ---------------------------------------------------------------------- unpack values in local atom-based array from exchange with another proc ------------------------------------------------------------------------- */ int FixCMAP::unpack_exchange(int nlocal, double *buf) { int n = 0; num_crossterm[nlocal] = (int) ubuf(buf[n++]).i; for (int m = 0; m < num_crossterm[nlocal]; m++) { crossterm_type[nlocal][m] = (int) ubuf(buf[n++]).i; crossterm_atom1[nlocal][m] = (tagint) ubuf(buf[n++]).i; crossterm_atom2[nlocal][m] = (tagint) ubuf(buf[n++]).i; crossterm_atom3[nlocal][m] = (tagint) ubuf(buf[n++]).i; crossterm_atom4[nlocal][m] = (tagint) ubuf(buf[n++]).i; crossterm_atom5[nlocal][m] = (tagint) ubuf(buf[n++]).i; } return n; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double FixCMAP::memory_usage() { int nmax = atom->nmax; double bytes = nmax * sizeof(int); // num_crossterm bytes += nmax*CMAPMAX * sizeof(int); // crossterm_type bytes += 5*nmax*CMAPMAX * sizeof(int); // crossterm_atom 12345 bytes += maxcrossterm*6 * sizeof(int); // crosstermlist return bytes; } diff --git a/src/USER-CG-CMM/angle_sdk.cpp b/src/USER-CG-CMM/angle_sdk.cpp index cc5498599..a4f979961 100644 --- a/src/USER-CG-CMM/angle_sdk.cpp +++ b/src/USER-CG-CMM/angle_sdk.cpp @@ -1,504 +1,505 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) + Variant of the harmonic angle potential for use with the lj/sdk potential for coarse grained MD simulations. - Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include #include #include "angle_sdk.h" #include "atom.h" #include "neighbor.h" #include "pair.h" #include "domain.h" #include "comm.h" #include "force.h" #include "math_const.h" #include "memory.h" #include "error.h" #include "lj_sdk_common.h" using namespace LAMMPS_NS; using namespace MathConst; using namespace LJSDKParms; #define SMALL 0.001 /* ---------------------------------------------------------------------- */ AngleSDK::AngleSDK(LAMMPS *lmp) : Angle(lmp) { repflag = 0;} /* ---------------------------------------------------------------------- */ AngleSDK::~AngleSDK() { if (allocated) { memory->destroy(setflag); memory->destroy(k); memory->destroy(theta0); memory->destroy(repscale); allocated = 0; } } /* ---------------------------------------------------------------------- */ void AngleSDK::compute(int eflag, int vflag) { int i1,i2,i3,n,type; double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3; double eangle,f1[3],f3[3],e13,f13; double dtheta,tk; double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22; eangle = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; double **x = atom->x; double **f = atom->f; int **anglelist = neighbor->anglelist; int nanglelist = neighbor->nanglelist; int nlocal = atom->nlocal; int newton_bond = force->newton_bond; for (n = 0; n < nanglelist; n++) { i1 = anglelist[n][0]; i2 = anglelist[n][1]; i3 = anglelist[n][2]; type = anglelist[n][3]; // 1st bond delx1 = x[i1][0] - x[i2][0]; dely1 = x[i1][1] - x[i2][1]; delz1 = x[i1][2] - x[i2][2]; rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; r1 = sqrt(rsq1); // 2nd bond delx2 = x[i3][0] - x[i2][0]; dely2 = x[i3][1] - x[i2][1]; delz2 = x[i3][2] - x[i2][2]; rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; r2 = sqrt(rsq2); // angle (cos and sin) c = delx1*delx2 + dely1*dely2 + delz1*delz2; c /= r1*r2; if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; s = sqrt(1.0 - c*c); if (s < SMALL) s = SMALL; s = 1.0/s; // 1-3 LJ interaction. // we only want to use the repulsive part, // and it can be scaled (or off). // so this has to be done here and not in the // general non-bonded code. f13 = e13 = delx3 = dely3 = delz3 = 0.0; if (repflag) { delx3 = x[i1][0] - x[i3][0]; dely3 = x[i1][1] - x[i3][1]; delz3 = x[i1][2] - x[i3][2]; rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3; const int type1 = atom->type[i1]; const int type3 = atom->type[i3]; f13=0.0; e13=0.0; if (rsq3 < rminsq[type1][type3]) { const int ljt = lj_type[type1][type3]; const double r2inv = 1.0/rsq3; if (ljt == LJ12_4) { const double r4inv=r2inv*r2inv; f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]); if (eflag) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]); } else if (ljt == LJ9_6) { const double r3inv = r2inv*sqrt(r2inv); const double r6inv = r3inv*r3inv; f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]); if (eflag) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]); } else if (ljt == LJ12_6) { const double r6inv = r2inv*r2inv*r2inv; f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]); if (eflag) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]); } // make sure energy is 0.0 at the cutoff. if (eflag) e13 -= emin[type1][type3]; f13 *= r2inv; } } // force & energy dtheta = acos(c) - theta0[type]; tk = k[type] * dtheta; if (eflag) eangle = tk*dtheta; a = -2.0 * tk * s; a11 = a*c / rsq1; a12 = -a / (r1*r2); a22 = a*c / rsq2; f1[0] = a11*delx1 + a12*delx2; f1[1] = a11*dely1 + a12*dely2; f1[2] = a11*delz1 + a12*delz2; f3[0] = a22*delx2 + a12*delx1; f3[1] = a22*dely2 + a12*dely1; f3[2] = a22*delz2 + a12*delz1; // apply force to each of the 3 atoms if (newton_bond || i1 < nlocal) { f[i1][0] += f1[0] + f13*delx3; f[i1][1] += f1[1] + f13*dely3; f[i1][2] += f1[2] + f13*delz3; } if (newton_bond || i2 < nlocal) { f[i2][0] -= f1[0] + f3[0]; f[i2][1] -= f1[1] + f3[1]; f[i2][2] -= f1[2] + f3[2]; } if (newton_bond || i3 < nlocal) { f[i3][0] += f3[0] - f13*delx3; f[i3][1] += f3[1] - f13*dely3; f[i3][2] += f3[2] - f13*delz3; } if (evflag) { ev_tally(i1,i2,i3,nlocal,newton_bond,eangle,f1,f3, delx1,dely1,delz1,delx2,dely2,delz2); if (repflag) ev_tally13(i1,i3,nlocal,newton_bond,e13,f13,delx3,dely3,delz3); } } } /* ---------------------------------------------------------------------- */ void AngleSDK::allocate() { allocated = 1; int n = atom->nangletypes; memory->create(k,n+1,"angle:k"); memory->create(theta0,n+1,"angle:theta0"); memory->create(repscale,n+1,"angle:repscale"); memory->create(setflag,n+1,"angle:setflag"); for (int i = 1; i <= n; i++) setflag[i] = 0; } /* ---------------------------------------------------------------------- set coeffs for one or more types ------------------------------------------------------------------------- */ void AngleSDK::coeff(int narg, char **arg) { if ((narg < 3) || (narg > 6)) error->all(FLERR,"Incorrect args for angle coefficients"); if (!allocated) allocate(); int ilo,ihi; force->bounds(FLERR,arg[0],atom->nangletypes,ilo,ihi); double k_one = force->numeric(FLERR,arg[1]); double theta0_one = force->numeric(FLERR,arg[2]); double repscale_one=1.0; // backward compatibility with old cg/cmm style input: // this had // if epsilon is set to 0.0 we accept it as repscale 0.0 // otherwise assume repscale 1.0, since we were using // epsilon to turn repulsion on or off. if (narg == 6) { repscale_one = force->numeric(FLERR,arg[4]); if (repscale_one > 0.0) repscale_one = 1.0; } else if (narg == 4) repscale_one = force->numeric(FLERR,arg[3]); else if (narg == 3) repscale_one = 1.0; else error->all(FLERR,"Incorrect args for angle coefficients"); // convert theta0 from degrees to radians and store coefficients int count = 0; for (int i = ilo; i <= ihi; i++) { k[i] = k_one; theta0[i] = theta0_one/180.0 * MY_PI; repscale[i] = repscale_one; setflag[i] = 1; count++; } if (count == 0) error->all(FLERR,"Incorrect args for angle coefficients"); } /* ---------------------------------------------------------------------- error check and initialize all values needed for force computation ------------------------------------------------------------------------- */ void AngleSDK::init_style() { // make sure we use an SDK pair_style and that we need the 1-3 repulsion repflag = 0; for (int i = 1; i <= atom->nangletypes; i++) if (repscale[i] > 0.0) repflag = 1; // set up pointers to access SDK LJ parameters for 1-3 interactions if (repflag) { int itmp; if (force->pair == NULL) error->all(FLERR,"Angle style SDK requires use of a compatible with Pair style"); lj1 = (double **) force->pair->extract("lj1",itmp); lj2 = (double **) force->pair->extract("lj2",itmp); lj3 = (double **) force->pair->extract("lj3",itmp); lj4 = (double **) force->pair->extract("lj4",itmp); lj_type = (int **) force->pair->extract("lj_type",itmp); rminsq = (double **) force->pair->extract("rminsq",itmp); emin = (double **) force->pair->extract("emin",itmp); if (!lj1 || !lj2 || !lj3 || !lj4 || !lj_type || !rminsq || !emin) error->all(FLERR,"Angle style SDK is incompatible with Pair style"); } } /* ---------------------------------------------------------------------- */ double AngleSDK::equilibrium_angle(int i) { return theta0[i]; } /* ---------------------------------------------------------------------- proc 0 writes out coeffs to restart file ------------------------------------------------------------------------- */ void AngleSDK::write_restart(FILE *fp) { fwrite(&k[1],sizeof(double),atom->nangletypes,fp); fwrite(&theta0[1],sizeof(double),atom->nangletypes,fp); fwrite(&repscale[1],sizeof(double),atom->nangletypes,fp); } /* ---------------------------------------------------------------------- proc 0 reads coeffs from restart file, bcasts them ------------------------------------------------------------------------- */ void AngleSDK::read_restart(FILE *fp) { allocate(); if (comm->me == 0) { fread(&k[1],sizeof(double),atom->nangletypes,fp); fread(&theta0[1],sizeof(double),atom->nangletypes,fp); fread(&repscale[1],sizeof(double),atom->nangletypes,fp); } MPI_Bcast(&k[1],atom->nangletypes,MPI_DOUBLE,0,world); MPI_Bcast(&theta0[1],atom->nangletypes,MPI_DOUBLE,0,world); MPI_Bcast(&repscale[1],atom->nangletypes,MPI_DOUBLE,0,world); for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1; } /* ---------------------------------------------------------------------- proc 0 writes to data file ------------------------------------------------------------------------- */ void AngleSDK::write_data(FILE *fp) { for (int i = 1; i <= atom->nangletypes; i++) fprintf(fp,"%d %g %g\n",i,k[i],theta0[i]/MY_PI*180.0); } /* ---------------------------------------------------------------------- */ void AngleSDK::ev_tally13(int i, int j, int nlocal, int newton_bond, double evdwl, double fpair, double delx, double dely, double delz) { double v[6]; if (eflag_either) { if (eflag_global) { if (newton_bond) { energy += evdwl; } else { if (i < nlocal) energy += 0.5*evdwl; if (j < nlocal) energy += 0.5*evdwl; } } if (eflag_atom) { if (newton_bond || i < nlocal) eatom[i] += 0.5*evdwl; if (newton_bond || j < nlocal) eatom[j] += 0.5*evdwl; } } if (vflag_either) { v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; if (vflag_global) { if (newton_bond) { virial[0] += v[0]; virial[1] += v[1]; virial[2] += v[2]; virial[3] += v[3]; virial[4] += v[4]; virial[5] += v[5]; } else { if (i < nlocal) { virial[0] += 0.5*v[0]; virial[1] += 0.5*v[1]; virial[2] += 0.5*v[2]; virial[3] += 0.5*v[3]; virial[4] += 0.5*v[4]; virial[5] += 0.5*v[5]; } if (j < nlocal) { virial[0] += 0.5*v[0]; virial[1] += 0.5*v[1]; virial[2] += 0.5*v[2]; virial[3] += 0.5*v[3]; virial[4] += 0.5*v[4]; virial[5] += 0.5*v[5]; } } } if (vflag_atom) { if (newton_bond || i < nlocal) { vatom[i][0] += 0.5*v[0]; vatom[i][1] += 0.5*v[1]; vatom[i][2] += 0.5*v[2]; vatom[i][3] += 0.5*v[3]; vatom[i][4] += 0.5*v[4]; vatom[i][5] += 0.5*v[5]; } if (newton_bond || j < nlocal) { vatom[j][0] += 0.5*v[0]; vatom[j][1] += 0.5*v[1]; vatom[j][2] += 0.5*v[2]; vatom[j][3] += 0.5*v[3]; vatom[j][4] += 0.5*v[4]; vatom[j][5] += 0.5*v[5]; } } } } /* ---------------------------------------------------------------------- */ double AngleSDK::single(int type, int i1, int i2, int i3) { double **x = atom->x; double delx1 = x[i1][0] - x[i2][0]; double dely1 = x[i1][1] - x[i2][1]; double delz1 = x[i1][2] - x[i2][2]; domain->minimum_image(delx1,dely1,delz1); double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1); double delx2 = x[i3][0] - x[i2][0]; double dely2 = x[i3][1] - x[i2][1]; double delz2 = x[i3][2] - x[i2][2]; domain->minimum_image(delx2,dely2,delz2); double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2); double c = delx1*delx2 + dely1*dely2 + delz1*delz2; c /= r1*r2; if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; double e13=0.0; if (repflag) { // 1-3 LJ interaction. double delx3 = x[i1][0] - x[i3][0]; double dely3 = x[i1][1] - x[i3][1]; double delz3 = x[i1][2] - x[i3][2]; domain->minimum_image(delx3,dely3,delz3); const int type1 = atom->type[i1]; const int type3 = atom->type[i3]; const double rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3; if (rsq3 < rminsq[type1][type3]) { const int ljt = lj_type[type1][type3]; const double r2inv = 1.0/rsq3; if (ljt == LJ12_4) { const double r4inv=r2inv*r2inv; e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]); } else if (ljt == LJ9_6) { const double r3inv = r2inv*sqrt(r2inv); const double r6inv = r3inv*r3inv; e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]); } else if (ljt == LJ12_6) { const double r6inv = r2inv*r2inv*r2inv; e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]); } // make sure energy is 0.0 at the cutoff. e13 -= emin[type1][type3]; } } double dtheta = acos(c) - theta0[type]; double tk = k[type] * dtheta; return tk*dtheta + e13; } diff --git a/src/USER-COLVARS/ndx_group.cpp b/src/USER-COLVARS/ndx_group.cpp index 10ccf000b..31d8332c9 100644 --- a/src/USER-COLVARS/ndx_group.cpp +++ b/src/USER-COLVARS/ndx_group.cpp @@ -1,248 +1,249 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + /* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) + Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "ndx_group.h" #include "atom.h" #include "comm.h" #include "group.h" #include "memory.h" #include "force.h" #include "error.h" #include #include #include using namespace LAMMPS_NS; #define BUFLEN 4096 #define DELTA 16384 static char *find_section(FILE *fp, const char *name) { char linebuf[BUFLEN]; char *n,*p,*t,*r; while ((p = fgets(linebuf,BUFLEN,fp))) { t = strtok(p," \t\n\r\f"); if ((t != NULL) && *t == '[') { t = strtok(NULL," \t\n\r\f"); if (t != NULL) { n = t; t = strtok(NULL," \t\n\r\f"); if ((t != NULL) && *t == ']') { if ((name == NULL) || strcmp(name,n) == 0) { int l = strlen(n); r = new char[l+1]; strncpy(r,n,l+1); return r; } } } } } return NULL; } static tagint *read_section(FILE *fp, bigint &num) { char linebuf[BUFLEN]; char *p,*t; tagint *tagbuf; bigint nmax; num = 0; nmax = DELTA; tagbuf = (tagint *)malloc(sizeof(tagint)*nmax); while ((p = fgets(linebuf,BUFLEN,fp))) { t = strtok(p," \t\n\r\f"); while (t != NULL) { // start of a new section. we are done here. if (*t == '[') return tagbuf; tagbuf[num++] = ATOTAGINT(t); if (num == nmax) { nmax += DELTA; tagbuf = (tagint *)realloc(tagbuf,sizeof(tagint)*nmax); } t = strtok(NULL," \t\n\r\f"); } } return tagbuf; } /* ---------------------------------------------------------------------- */ void Ndx2Group::command(int narg, char **arg) { int len; bigint num; FILE *fp; char *name = NULL; tagint *tags; if (narg < 1) error->all(FLERR,"Illegal ndx2group command"); if (atom->tag_enable == 0) error->all(FLERR,"Must have atom IDs for ndx2group command"); if (comm->me == 0) { fp = fopen(arg[0], "r"); if (fp == NULL) error->one(FLERR,"Cannot open index file for reading"); if (screen) fprintf(screen, "Reading groups from index file %s:\n",arg[0]); if (logfile) fprintf(logfile,"Reading groups from index file %s:\n",arg[0]); } if (narg == 1) { // restore all groups do { if (comm->me == 0) { len = 0; // find the next section. // if we had processed a section, before we need to step back if (name != NULL) { rewind(fp); char *tmp = find_section(fp,name); delete[] tmp; delete[] name; name = NULL; } name = find_section(fp,NULL); if (name != NULL) { len=strlen(name); // skip over group "all", which is called "System" in gromacs if (strcmp(name,"System") == 0) continue; if (screen) fprintf(screen," Processing group '%s'\n",name); if (logfile) fprintf(logfile," Processing group '%s'\n",name); } MPI_Bcast(&len,1,MPI_INT,0,world); if (len > 0) { MPI_Bcast(name,len,MPI_CHAR,0,world); // read tags for atoms in group and broadcast num = 0; tags = read_section(fp,num); MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world); MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world); create(name,num,tags); free(tags); } } else { MPI_Bcast(&len,1,MPI_INT,0,world); if (len > 0) { delete[] name; name = new char[len+1]; MPI_Bcast(name,len+1,MPI_CHAR,0,world); MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world); tags = (tagint *)malloc(sizeof(tagint)*(num ? num : 1)); MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world); create(name,num,tags); free(tags); } } } while (len); } else { // restore selected groups for (int idx=1; idx < narg; ++idx) { if (comm->me == 0) { len = 0; // find named section, search from beginning of file if (name != NULL) delete[] name; rewind(fp); name = find_section(fp,arg[idx]); if (name != NULL) len=strlen(name); if (screen) fprintf(screen," %s group '%s'\n", len ? "Processing" : "Skipping",arg[idx]); if (logfile) fprintf(logfile,"%s group '%s'\n", len ? "Processing" : "Skipping",arg[idx]); MPI_Bcast(&len,1,MPI_INT,0,world); if (len > 0) { MPI_Bcast(name,len+1,MPI_CHAR,0,world); // read tags for atoms in group and broadcast num = 0; tags = read_section(fp,num); MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world); MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world); create(name,num,tags); free(tags); } } else { MPI_Bcast(&len,1,MPI_INT,0,world); if (len > 0) { delete[] name; name = new char[len+1]; MPI_Bcast(name,len+1,MPI_CHAR,0,world); MPI_Bcast(&num,1,MPI_LMP_BIGINT,0,world); tags = (tagint *)malloc(sizeof(tagint)*(num ? num : 1)); MPI_Bcast(tags,num,MPI_LMP_TAGINT,0,world); create(name,num,tags); free(tags); } } } } delete[] name; if (comm->me == 0) { if (screen) fputs("\n",screen); if (logfile) fputs("\n",logfile); fclose(fp); } } /* ---------------------------------------------------------------------- */ void Ndx2Group::create(char *name, bigint num, tagint *tags) { // wipe out all members if the group exists. gid==0 is group "all" int gid = group->find(name); if (gid > 0) { char *cmd[2]; cmd[0] = name; cmd[1] = (char *)"clear"; group->assign(2,cmd); } // map from global to local const int nlocal = atom->nlocal; int *flags = (int *)calloc(nlocal,sizeof(int)); for (bigint i=0; i < num; ++i) { const int id = atom->map(tags[i]); if (id < nlocal && id >= 0) flags[id] = 1; } group->create(name,flags); free(flags); } diff --git a/src/USER-EFF/fix_nh_eff.cpp b/src/USER-EFF/fix_nh_eff.cpp index bbf5ac31f..464498969 100644 --- a/src/USER-EFF/fix_nh_eff.cpp +++ b/src/USER-EFF/fix_nh_eff.cpp @@ -1,113 +1,113 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Andres Jaramillo-Botero (Caltech) + Contributing author: Andres Jaramillo-Botero (Caltech) ------------------------------------------------------------------------- */ #include #include #include "fix_nh_eff.h" #include "atom.h" #include "atom_vec.h" #include "group.h" #include "error.h" #include "domain.h" using namespace LAMMPS_NS; using namespace FixConst; enum{NOBIAS,BIAS}; /* ---------------------------------------------------------------------- */ FixNHEff::FixNHEff(LAMMPS *lmp, int narg, char **arg) : FixNH(lmp, narg, arg) { if (!atom->electron_flag) error->all(FLERR,"Fix nvt/nph/npt/eff requires atom style electron"); } /* ---------------------------------------------------------------------- perform half-step update of electron radial velocities -----------------------------------------------------------------------*/ void FixNHEff::nve_v() { // standard nve_v velocity update FixNH::nve_v(); double *erforce = atom->erforce; double *ervel = atom->ervel; double *mass = atom->mass; int *spin = atom->spin; double mefactor = domain->dimension/4.0; int *type = atom->type; int *mask = atom->mask; int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; double dtfm; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { if (abs(spin[i])==1) { dtfm = dtf / mass[type[i]]; ervel[i] = dtfm * erforce[i] / mefactor; } } } } /* ---------------------------------------------------------------------- perform full-step update of electron radii -----------------------------------------------------------------------*/ void FixNHEff::nve_x() { // standard nve_x position update FixNH::nve_x(); double *eradius = atom->eradius; double *ervel = atom->ervel; int *spin = atom->spin; int *mask = atom->mask; int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) if (abs(spin[i])==1) eradius[i] += dtv * ervel[i]; } /* ---------------------------------------------------------------------- perform half-step scaling of electron radial velocities -----------------------------------------------------------------------*/ void FixNHEff::nh_v_temp() { // standard nh_v_temp velocity scaling FixNH::nh_v_temp(); double *ervel = atom->ervel; int *spin = atom->spin; int *mask = atom->mask; int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) if (abs(spin[i])==1) ervel[i] *= factor_eta; } diff --git a/src/USER-H5MD/dump_h5md.cpp b/src/USER-H5MD/dump_h5md.cpp index ba31c22d2..7456d6fa4 100644 --- a/src/USER-H5MD/dump_h5md.cpp +++ b/src/USER-H5MD/dump_h5md.cpp @@ -1,555 +1,558 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- Contributing author: Pierre de Buyl (KU Leuven) ------------------------------------------------------------------------- */ #include #include #include #include #include #include "ch5md.h" #include "dump_h5md.h" #include "domain.h" #include "atom.h" #include "update.h" #include "group.h" #include "output.h" #include "error.h" #include "force.h" #include "memory.h" #include "version.h" using namespace LAMMPS_NS; #define MYMIN(a,b) ((a) < (b) ? (a) : (b)) #define MYMAX(a,b) ((a) > (b) ? (a) : (b)) /** Scan common options for the dump elements */ static int element_args(int narg, char **arg, int *every) { int iarg=0; while (iargall(FLERR,"Illegal dump h5md command"); if (binary || compressed || multifile || multiproc) error->all(FLERR,"Invalid dump h5md filename"); if (domain->triclinic!=0) error->all(FLERR,"Invalid domain for dump h5md. Only triclinic domains supported."); size_one = 6; sort_flag = 1; sortcol = 0; format_default = NULL; flush_flag = 0; unwrap_flag = 0; datafile_from_dump = -1; author_name=NULL; every_dump = force->inumeric(FLERR,arg[3]); every_position = every_image = -1; every_velocity = every_force = every_species = -1; every_charge = -1; do_box=true; create_group=true; bool box_is_set, create_group_is_set; box_is_set = create_group_is_set = false; int iarg=5; int n_parsed, default_every; size_one=0; if (every_dump==0) default_every=0; else default_every=1; while (iargall(FLERR, "Illegal dump h5md command"); iarg += n_parsed; size_one+=domain->dimension; } else if (strcmp(arg[iarg], "image")==0) { if (every_position<0) error->all(FLERR, "Illegal dump h5md command"); iarg+=1; size_one+=domain->dimension; every_image = every_position; } else if (strcmp(arg[iarg], "velocity")==0) { every_velocity = default_every; iarg+=1; n_parsed = element_args(narg-iarg, &arg[iarg], &every_velocity); if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command"); iarg += n_parsed; size_one+=domain->dimension; } else if (strcmp(arg[iarg], "force")==0) { every_force = default_every; iarg+=1; n_parsed = element_args(narg-iarg, &arg[iarg], &every_force); if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command"); iarg += n_parsed; size_one+=domain->dimension; } else if (strcmp(arg[iarg], "species")==0) { every_species=default_every; iarg+=1; n_parsed = element_args(narg-iarg, &arg[iarg], &every_species); if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command"); iarg += n_parsed; size_one+=1; } else if (strcmp(arg[iarg], "charge")==0) { if (!atom->q_flag) error->all(FLERR, "Requesting non-allocated quantity q in dump_h5md"); every_charge = default_every; iarg+=1; n_parsed = element_args(narg-iarg, &arg[iarg], &every_charge); if (n_parsed<0) error->all(FLERR, "Illegal dump h5md command"); iarg += n_parsed; size_one+=1; } else if (strcmp(arg[iarg], "file_from")==0) { if (iarg+1>=narg) { error->all(FLERR, "Invalid number of arguments in dump h5md"); } if (box_is_set||create_group_is_set) error->all(FLERR, "Cannot set file_from in dump h5md after box or create_group"); int idump; for (idump = 0; idump < output->ndump; idump++) if (strcmp(arg[iarg+1],output->dump[idump]->id) == 0) break; if (idump == output->ndump) error->all(FLERR,"Cound not find dump_modify ID"); datafile_from_dump = idump; do_box=false; create_group=false; iarg+=2; } else if (strcmp(arg[iarg], "box")==0) { if (iarg+1>=narg) { error->all(FLERR, "Invalid number of arguments in dump h5md"); } box_is_set = true; if (strcmp(arg[iarg+1], "yes")==0) do_box=true; else if (strcmp(arg[iarg+1], "no")==0) do_box=false; else error->all(FLERR, "Illegal dump h5md command"); iarg+=2; } else if (strcmp(arg[iarg], "create_group")==0) { if (iarg+1>=narg) { error->all(FLERR, "Invalid number of arguments in dump h5md"); } create_group_is_set = true; if (strcmp(arg[iarg+1], "yes")==0) create_group=true; else if (strcmp(arg[iarg+1], "no")==0) { create_group=false; } else error->all(FLERR, "Illegal dump h5md command"); iarg+=2; } else if (strcmp(arg[iarg], "author")==0) { if (iarg+1>=narg) { error->all(FLERR, "Invalid number of arguments in dump h5md"); } if (author_name==NULL) { author_name = new char[strlen(arg[iarg])+1]; strcpy(author_name, arg[iarg+1]); } else { error->all(FLERR, "Illegal dump h5md command: author argument repeated"); } iarg+=2; } else { error->all(FLERR, "Invalid argument to dump h5md"); } } // allocate global array for atom coords bigint n = group->count(igroup); natoms = static_cast (n); if (every_position>=0) memory->create(dump_position,domain->dimension*natoms,"dump:position"); if (every_image>=0) memory->create(dump_image,domain->dimension*natoms,"dump:image"); if (every_velocity>=0) memory->create(dump_velocity,domain->dimension*natoms,"dump:velocity"); if (every_force>=0) memory->create(dump_force,domain->dimension*natoms,"dump:force"); if (every_species>=0) memory->create(dump_species,natoms,"dump:species"); if (every_charge>=0) memory->create(dump_charge,natoms,"dump:charge"); openfile(); ntotal = 0; } /* ---------------------------------------------------------------------- */ DumpH5MD::~DumpH5MD() { if (every_position>=0) { memory->destroy(dump_position); if (me==0) { h5md_close_element(particles_data.position); if (do_box) h5md_close_element(particles_data.box_edges); } } if (every_image>=0) { memory->destroy(dump_image); if (me==0) h5md_close_element(particles_data.image); } if (every_velocity>=0) { memory->destroy(dump_velocity); if (me==0) h5md_close_element(particles_data.velocity); } if (every_force>=0) { memory->destroy(dump_force); if (me==0) h5md_close_element(particles_data.force); } if (every_species>=0) { memory->destroy(dump_species); if (me==0) h5md_close_element(particles_data.species); } if (every_charge>=0) { memory->destroy(dump_charge); if (me==0) h5md_close_element(particles_data.charge); } } /* ---------------------------------------------------------------------- */ void DumpH5MD::init_style() { if (sort_flag == 0 || sortcol != 0) error->all(FLERR,"Dump h5md requires sorting by atom ID"); } /* ---------------------------------------------------------------------- */ void DumpH5MD::openfile() { char *group_name; int group_name_length; int dims[2]; char *boundary[3]; for (int i=0; i<3; i++) { boundary[i] = new char[9]; if (domain->periodicity[i]==1) { strcpy(boundary[i], "periodic"); } else { strcpy(boundary[i], "none"); } } if (me == 0) { if (datafile_from_dump<0) { if (author_name==NULL) { datafile = h5md_create_file(filename, "N/A", NULL, "lammps", LAMMPS_VERSION); } else { datafile = h5md_create_file(filename, author_name, NULL, "lammps", LAMMPS_VERSION); } group_name_length = strlen(group->names[igroup])+1; group_name = new char[group_name_length]; strcpy(group_name, group->names[igroup]); if (create_group) { particles_data = h5md_create_particles_group(datafile, group_name); } else { particles_data.group = h5md_open_particles_group(datafile.particles, group_name); } delete [] group_name; dims[0] = natoms; dims[1] = domain->dimension; if (every_position>0) { particles_data.position = h5md_create_time_data(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, NULL); h5md_create_box(&particles_data, dims[1], boundary, true, NULL, &particles_data.position); } if (every_image>0) particles_data.image = h5md_create_time_data(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, &particles_data.position); if (every_velocity>0) particles_data.velocity = h5md_create_time_data(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, NULL); if (every_force>0) particles_data.force = h5md_create_time_data(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, NULL); if (every_species>0) particles_data.species = h5md_create_time_data(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, NULL); if (every_charge>0) { particles_data.charge = h5md_create_time_data(particles_data.group, "charge", 1, dims, H5T_NATIVE_DOUBLE, NULL); h5md_write_string_attribute(particles_data.group, "charge", "type", "effective"); } } else { DumpH5MD* other_dump; other_dump=(DumpH5MD*)output->dump[datafile_from_dump]; datafile = other_dump->datafile; group_name_length = strlen(group->names[igroup]); group_name = new char[group_name_length]; strcpy(group_name, group->names[igroup]); if (create_group) { particles_data = h5md_create_particles_group(datafile, group_name); } else { particles_data = other_dump->particles_data; } dims[0] = natoms; dims[1] = domain->dimension; if (every_position>0) { particles_data.position = h5md_create_time_data(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, NULL); h5md_create_box(&particles_data, dims[1], boundary, true, NULL, &particles_data.position); } if (every_image>0) particles_data.image = h5md_create_time_data(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, &particles_data.position); if (every_velocity>0) particles_data.velocity = h5md_create_time_data(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, NULL); if (every_force>0) particles_data.force = h5md_create_time_data(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, NULL); if (every_species>0) particles_data.species = h5md_create_time_data(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, NULL); if (every_charge>0) { particles_data.charge = h5md_create_time_data(particles_data.group, "charge", 1, dims, H5T_NATIVE_DOUBLE, NULL); h5md_write_string_attribute(particles_data.group, "charge", "type", "effective"); } } } if (author_name!=NULL) delete [] author_name; for (int i=0; i<3; i++) { delete [] boundary[i]; } } /* ---------------------------------------------------------------------- */ void DumpH5MD::write_header(bigint nbig) { return; } /* ---------------------------------------------------------------------- */ void DumpH5MD::pack(tagint *ids) { int m,n; tagint *tag = atom->tag; double **x = atom->x; double **v = atom->v; double **f = atom->f; int *species = atom->type; double *q = atom->q; imageint *image = atom->image; int *mask = atom->mask; int nlocal = atom->nlocal; int dim=domain->dimension; double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; m = n = 0; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { if (every_position>=0) { int ix = (image[i] & IMGMASK) - IMGMAX; int iy = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; int iz = (image[i] >> IMG2BITS) - IMGMAX; if (unwrap_flag == 1) { buf[m++] = (x[i][0] + ix * xprd); buf[m++] = (x[i][1] + iy * yprd); if (dim>2) buf[m++] = (x[i][2] + iz * zprd); } else { buf[m++] = x[i][0]; buf[m++] = x[i][1]; if (dim>2) buf[m++] = x[i][2]; } if (every_image>=0) { buf[m++] = ix; buf[m++] = iy; if (dim>2) buf[m++] = iz; } } if (every_velocity>=0) { buf[m++] = v[i][0]; buf[m++] = v[i][1]; if (dim>2) buf[m++] = v[i][2]; } if (every_force>=0) { buf[m++] = f[i][0]; buf[m++] = f[i][1]; if (dim>2) buf[m++] = f[i][2]; } if (every_species>=0) buf[m++] = species[i]; if (every_charge>=0) buf[m++] = q[i]; ids[n++] = tag[i]; } } /* ---------------------------------------------------------------------- */ void DumpH5MD::write_data(int n, double *mybuf) { // copy buf atom coords into global array int m = 0; int dim = domain->dimension; int k = dim*ntotal; int k_image = dim*ntotal; int k_velocity = dim*ntotal; int k_force = dim*ntotal; int k_species = ntotal; int k_charge = ntotal; for (int i = 0; i < n; i++) { if (every_position>=0) { for (int j=0; j=0) for (int j=0; j=0) for (int j=0; j=0) for (int j=0; j=0) dump_species[k_species++] = mybuf[m++]; if (every_charge>=0) dump_charge[k_charge++] = mybuf[m++]; ntotal++; } // if last chunk of atoms in this snapshot, write global arrays to file if (ntotal == natoms) { if (every_dump>0) { write_frame(); ntotal = 0; } else { write_fixed_frame(); } } } /* ---------------------------------------------------------------------- */ int DumpH5MD::modify_param(int narg, char **arg) { if (strcmp(arg[0],"unwrap") == 0) { if (narg < 2) error->all(FLERR,"Illegal dump_modify command"); if (strcmp(arg[1],"yes") == 0) unwrap_flag = 1; else if (strcmp(arg[1],"no") == 0) unwrap_flag = 0; else error->all(FLERR,"Illegal dump_modify command"); return 2; } return 0; } /* ---------------------------------------------------------------------- */ void DumpH5MD::write_frame() { int local_step; double local_time; double edges[3]; local_step = update->ntimestep; local_time = local_step * update->dt; edges[0] = boxxhi - boxxlo; edges[1] = boxyhi - boxylo; edges[2] = boxzhi - boxzlo; if (every_position>0) { if (local_step % (every_position*every_dump) == 0) { h5md_append(particles_data.position, dump_position, local_step, local_time); h5md_append(particles_data.box_edges, edges, local_step, local_time); if (every_image>0) h5md_append(particles_data.image, dump_image, local_step, local_time); } } else { if (do_box) h5md_append(particles_data.box_edges, edges, local_step, local_time); } if (every_velocity>0 && local_step % (every_velocity*every_dump) == 0) { h5md_append(particles_data.velocity, dump_velocity, local_step, local_time); } if (every_force>0 && local_step % (every_force*every_dump) == 0) { h5md_append(particles_data.force, dump_force, local_step, local_time); } if (every_species>0 && local_step % (every_species*every_dump) == 0) { h5md_append(particles_data.species, dump_species, local_step, local_time); } if (every_charge>0 && local_step % (every_charge*every_dump) == 0) { h5md_append(particles_data.charge, dump_charge, local_step, local_time); } } void DumpH5MD::write_fixed_frame() { double edges[3]; int dims[2]; char *boundary[3]; for (int i=0; i<3; i++) { boundary[i] = new char[9]; if (domain->periodicity[i]==1) { strcpy(boundary[i], "periodic"); } else { strcpy(boundary[i], "none"); } } dims[0] = natoms; dims[1] = domain->dimension; edges[0] = boxxhi - boxxlo; edges[1] = boxyhi - boxylo; edges[2] = boxzhi - boxzlo; if (every_position==0) { particles_data.position = h5md_create_fixed_data_simple(particles_data.group, "position", 2, dims, H5T_NATIVE_DOUBLE, dump_position); h5md_create_box(&particles_data, dims[1], boundary, false, edges, NULL); if (every_image==0) particles_data.image = h5md_create_fixed_data_simple(particles_data.group, "image", 2, dims, H5T_NATIVE_INT, dump_image); } if (every_velocity==0) particles_data.velocity = h5md_create_fixed_data_simple(particles_data.group, "velocity", 2, dims, H5T_NATIVE_DOUBLE, dump_velocity); if (every_force==0) particles_data.force = h5md_create_fixed_data_simple(particles_data.group, "force", 2, dims, H5T_NATIVE_DOUBLE, dump_force); if (every_species==0) particles_data.species = h5md_create_fixed_data_simple(particles_data.group, "species", 1, dims, H5T_NATIVE_INT, dump_species); if (every_charge==0) { particles_data.charge = h5md_create_fixed_data_simple(particles_data.group, "charge", 1, dims, H5T_NATIVE_INT, dump_charge); h5md_write_string_attribute(particles_data.group, "charge", "type", "effective"); } for (int i=0; i<3; i++) { delete [] boundary[i]; } } diff --git a/src/USER-MISC/fix_ti_spring.cpp b/src/USER-MISC/fix_ti_spring.cpp index 6b9e3d63b..fbbc747c3 100644 --- a/src/USER-MISC/fix_ti_spring.cpp +++ b/src/USER-MISC/fix_ti_spring.cpp @@ -1,386 +1,386 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- - Contributing authors: + Contributing authors: Rodrigo Freitas (UC Berkeley) - rodrigof@berkeley.edu Mark Asta (UC Berkeley) - mdasta@berkeley.edu Maurice de Koning (Unicamp/Brazil) - dekoning@ifi.unicamp.br ------------------------------------------------------------------------- */ #include #include #include "fix_ti_spring.h" #include "atom.h" #include "update.h" #include "domain.h" #include "respa.h" #include "memory.h" #include "error.h" #include "citeme.h" #include "force.h" using namespace LAMMPS_NS; using namespace FixConst; static const char cite_fix_ti_spring[] = "ti/spring command:\n\n" "@article{freitas2016,\n" " author={Freitas, Rodrigo and Asta, Mark and de Koning, Maurice},\n" " title={Nonequilibrium free-energy calculation of solids using LAMMPS},\n" " journal={Computational Materials Science},\n" " volume={112},\n" " pages={333--341},\n" " year={2016},\n" " publisher={Elsevier}\n" "}\n\n"; /* ---------------------------------------------------------------------- */ FixTISpring::FixTISpring(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (lmp->citeme) lmp->citeme->add(cite_fix_ti_spring); if (narg < 6 || narg > 8) error->all(FLERR,"Illegal fix ti/spring command"); // Flags. restart_peratom = 1; scalar_flag = 1; global_freq = 1; vector_flag = 1; size_vector = 2; global_freq = 1; extscalar = 1; extvector = 1; // disallow resetting the time step, while this fix is defined time_depend = 1; // Spring constant. k = force->numeric(FLERR,arg[3]); if (k <= 0.0) error->all(FLERR,"Illegal fix ti/spring command"); // Perform initial allocation of atom-based array // Register with Atom class xoriginal = NULL; grow_arrays(atom->nmax); atom->add_callback(0); atom->add_callback(1); // xoriginal = initial unwrapped positions of atoms double **x = atom->x; int *mask = atom->mask; imageint *image = atom->image; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) domain->unmap(x[i],image[i],xoriginal[i]); else xoriginal[i][0] = xoriginal[i][1] = xoriginal[i][2] = 0.0; } // Time variables. t0 = update->ntimestep; // timestep of original/starting coordinates t_switch = force->bnumeric(FLERR,arg[4]); // Number of steps for switching t_equil = force->bnumeric(FLERR,arg[5]); // Number of steps for equilibration if ((t_switch <= 0) || (t_equil < 0)) error->all(FLERR,"Illegal fix ti/spring command"); // Coupling parameter initialization sf = 1; if (narg > 6) { if (strcmp(arg[6], "function") == 0) sf = force->inumeric(FLERR,arg[7]); else error->all(FLERR,"Illegal fix ti/spring switching function"); if ((sf!=1) && (sf!=2)) error->all(FLERR,"Illegal fix ti/spring switching function"); } lambda = switch_func(0); dlambda = dswitch_func(0); espring = 0.0; } /* ---------------------------------------------------------------------- */ FixTISpring::~FixTISpring() { // unregister callbacks to this fix from Atom class atom->delete_callback(id,0); atom->delete_callback(id,1); // delete locally stored array memory->destroy(xoriginal); } /* ---------------------------------------------------------------------- */ int FixTISpring::setmask() { int mask = 0; mask |= INITIAL_INTEGRATE; mask |= POST_FORCE; mask |= POST_FORCE_RESPA; mask |= MIN_POST_FORCE; mask |= THERMO_ENERGY; return mask; } /* ---------------------------------------------------------------------- */ void FixTISpring::init() { if (strstr(update->integrate_style,"respa")) nlevels_respa = ((Respa *) update->integrate)->nlevels; } /* ---------------------------------------------------------------------- */ void FixTISpring::setup(int vflag) { if (strstr(update->integrate_style,"verlet")) post_force(vflag); else { ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1); post_force_respa(vflag,nlevels_respa-1,0); ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1); } } /* ---------------------------------------------------------------------- */ void FixTISpring::min_setup(int vflag) { post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixTISpring::post_force(int vflag) { // do not calculate forces during equilibration if ((update->ntimestep - t0) < t_equil) return; double **x = atom->x; double **f = atom->f; int *mask = atom->mask; imageint *image = atom->image; int nlocal = atom->nlocal; double dx, dy, dz; double unwrap[3]; espring = 0.0; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { domain->unmap(x[i],image[i],unwrap); dx = unwrap[0] - xoriginal[i][0]; dy = unwrap[1] - xoriginal[i][1]; dz = unwrap[2] - xoriginal[i][2]; f[i][0] = (1-lambda) * f[i][0] + lambda * (-k*dx); f[i][1] = (1-lambda) * f[i][1] + lambda * (-k*dy); f[i][2] = (1-lambda) * f[i][2] + lambda * (-k*dz); espring += k * (dx*dx + dy*dy + dz*dz); } espring *= 0.5; } /* ---------------------------------------------------------------------- */ void FixTISpring::post_force_respa(int vflag, int ilevel, int iloop) { if (ilevel == nlevels_respa-1) post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixTISpring::min_post_force(int vflag) { post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixTISpring::initial_integrate(int vflag) { // Update the coupling parameter value if needed if ((update->ntimestep - t0) < t_equil) return; const bigint t = update->ntimestep - (t0+t_equil); const double r_switch = 1.0/t_switch; if ( (t >= 0) && (t <= t_switch) ) { lambda = switch_func(t*r_switch); dlambda = dswitch_func(t*r_switch); } if ( (t >= t_equil+t_switch) && (t <= (t_equil+2*t_switch)) ) { lambda = switch_func(1.0 - (t - t_switch - t_equil)*r_switch); dlambda = - dswitch_func(1.0 - (t - t_switch - t_equil)*r_switch); } } /* ---------------------------------------------------------------------- energy of stretched springs ------------------------------------------------------------------------- */ double FixTISpring::compute_scalar() { double all; MPI_Allreduce(&espring,&all,1,MPI_DOUBLE,MPI_SUM,world); return all; } /* ---------------------------------------------------------------------- information about coupling parameter ------------------------------------------------------------------------- */ double FixTISpring::compute_vector(int n) { linfo[0] = lambda; linfo[1] = dlambda; return linfo[n]; } /* ---------------------------------------------------------------------- memory usage of local atom-based array ------------------------------------------------------------------------- */ double FixTISpring::memory_usage() { double bytes = atom->nmax*3 * sizeof(double); return bytes; } /* ---------------------------------------------------------------------- allocate atom-based array ------------------------------------------------------------------------- */ void FixTISpring::grow_arrays(int nmax) { memory->grow(xoriginal,nmax,3,"fix_ti/spring:xoriginal"); } /* ---------------------------------------------------------------------- copy values within local atom-based array ------------------------------------------------------------------------- */ void FixTISpring::copy_arrays(int i, int j, int delflag) { xoriginal[j][0] = xoriginal[i][0]; xoriginal[j][1] = xoriginal[i][1]; xoriginal[j][2] = xoriginal[i][2]; } /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ int FixTISpring::pack_exchange(int i, double *buf) { buf[0] = xoriginal[i][0]; buf[1] = xoriginal[i][1]; buf[2] = xoriginal[i][2]; return 3; } /* ---------------------------------------------------------------------- unpack values in local atom-based array from exchange with another proc ------------------------------------------------------------------------- */ int FixTISpring::unpack_exchange(int nlocal, double *buf) { xoriginal[nlocal][0] = buf[0]; xoriginal[nlocal][1] = buf[1]; xoriginal[nlocal][2] = buf[2]; return 3; } /* ---------------------------------------------------------------------- pack values in local atom-based arrays for restart file ------------------------------------------------------------------------- */ int FixTISpring::pack_restart(int i, double *buf) { buf[0] = 4; buf[1] = xoriginal[i][0]; buf[2] = xoriginal[i][1]; buf[3] = xoriginal[i][2]; return 4; } /* ---------------------------------------------------------------------- unpack values from atom->extra array to restart the fix ------------------------------------------------------------------------- */ void FixTISpring::unpack_restart(int nlocal, int nth) { double **extra = atom->extra; // skip to Nth set of extra values int m = 0; for (int i = 0; i < nth; i++) m += static_cast (extra[nlocal][m]); m++; xoriginal[nlocal][0] = extra[nlocal][m++]; xoriginal[nlocal][1] = extra[nlocal][m++]; xoriginal[nlocal][2] = extra[nlocal][m++]; } /* ---------------------------------------------------------------------- maxsize of any atom's restart data ------------------------------------------------------------------------- */ int FixTISpring::maxsize_restart() { return 4; } /* ---------------------------------------------------------------------- size of atom nlocal's restart data ------------------------------------------------------------------------- */ int FixTISpring::size_restart(int nlocal) { return 4; } /* ---------------------------------------------------------------------- Switching function ------------------------------------------------------------------------- */ double FixTISpring::switch_func(double t) { if (sf == 1) return t; double t2 = t*t; double t5 = t2*t2*t; return ((70.0*t2*t2 - 315.0*t2*t + 540.0*t2 - 420.0*t + 126.0)*t5); } /* ---------------------------------------------------------------------- Switching function derivative ------------------------------------------------------------------------- */ double FixTISpring::dswitch_func(double t) { if(sf == 1) return 1.0/t_switch; double t2 = t*t; double t4 = t2*t2; return ((630*t2*t2 - 2520*t2*t + 3780*t2 - 2520*t + 630)*t4) / t_switch; } diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index 20e60bab2..b3fe2c29e 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -1,367 +1,367 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - OpenMP based threading support for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) + OpenMP based threading support for LAMMPS ------------------------------------------------------------------------- */ #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "neighbor.h" #include "neigh_request.h" #include "universe.h" #include "update.h" #include "integrate.h" #include "min.h" #include "timer.h" #include "fix_omp.h" #include "thr_data.h" #include "thr_omp.h" #include "pair_hybrid.h" #include "bond_hybrid.h" #include "angle_hybrid.h" #include "dihedral_hybrid.h" #include "improper_hybrid.h" #include "kspace.h" #include #include #include #include "suffix.h" using namespace LAMMPS_NS; using namespace FixConst; static int get_tid() { int tid = 0; #if defined(_OPENMP) tid = omp_get_thread_num(); #endif return tid; } /* ---------------------------------------------------------------------- */ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL), _nthr(-1), _neighbor(true), _mixed(false), _reduced(true) { if (narg < 4) error->all(FLERR,"Illegal package omp command"); int nthreads = 1; if (narg > 3) { #if defined(_OPENMP) if (strcmp(arg[3],"0") == 0) #pragma omp parallel default(none) shared(nthreads) nthreads = omp_get_num_threads(); else nthreads = force->inumeric(FLERR,arg[3]); #endif } if (nthreads < 1) error->all(FLERR,"Illegal number of OpenMP threads requested"); int reset_thr = 0; if (nthreads != comm->nthreads) { #if defined(_OPENMP) reset_thr = 1; omp_set_num_threads(nthreads); #endif comm->nthreads = nthreads; } // optional keywords int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package omp command"); if (strcmp(arg[iarg+1],"yes") == 0) _neighbor = true; else if (strcmp(arg[iarg+1],"no") == 0) _neighbor = false; else error->all(FLERR,"Illegal package omp command"); iarg += 2; } else error->all(FLERR,"Illegal package omp command"); } // print summary of settings if (comm->me == 0) { #if defined(_OPENMP) const char * const nmode = _neighbor ? "multi-threaded" : "serial"; if (screen) { if (reset_thr) fprintf(screen,"set %d OpenMP thread(s) per MPI task\n", nthreads); fprintf(screen,"using %s neighbor list subroutines\n", nmode); } if (logfile) { if (reset_thr) fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads); fprintf(logfile,"using %s neighbor list subroutines\n", nmode); } #else error->warning(FLERR,"OpenMP support not enabled during compilation; " "using 1 thread only."); #endif } // allocate list for per thread accumulator manager class instances // and then have each thread create an instance of this class to // encourage the OS to use storage that is "close" to each thread's CPU. thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) shared(lmp) #endif { const int tid = get_tid(); Timer *t = new Timer(lmp); thr[tid] = new ThrData(tid,t); } } /* ---------------------------------------------------------------------- */ FixOMP::~FixOMP() { for (int i=0; i < _nthr; ++i) delete thr[i]; delete[] thr; } /* ---------------------------------------------------------------------- */ int FixOMP::setmask() { int mask = 0; mask |= PRE_FORCE; mask |= PRE_FORCE_RESPA; mask |= MIN_PRE_FORCE; return mask; } /* ---------------------------------------------------------------------- */ void FixOMP::init() { // USER-OMP package cannot be used with atom_style template if (atom->molecular == 2) error->all(FLERR,"USER-OMP package does not (yet) work with " "atom_style template"); // adjust number of data objects when the number of OpenMP // threads has been changed somehow const int nthreads = comm->nthreads; if (_nthr != nthreads) { if (screen) fprintf(screen,"Re-init USER-OMP for %d OpenMP thread(s)\n", nthreads); if (logfile) fprintf(logfile,"Re-init USER-OMP for %d OpenMP thread(s)\n", nthreads); for (int i=0; i < _nthr; ++i) delete thr[i]; thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const int tid = get_tid(); Timer *t = new Timer(lmp); thr[tid] = new ThrData(tid,t); } } // reset per thread timer for (int i=0; i < nthreads; ++i) { thr[i]->_timer_active=1; thr[i]->timer(Timer::RESET); thr[i]->_timer_active=-1; } if ((strstr(update->integrate_style,"respa") != NULL) && (strstr(update->integrate_style,"respa/omp") == NULL)) error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles"); int check_hybrid, kspace_split; last_pair_hybrid = NULL; last_omp_style = NULL; const char *last_omp_name = NULL; const char *last_hybrid_name = NULL; const char *last_force_name = NULL; // support for verlet/split operation. // kspace_split == 0 : regular processing // kspace_split < 0 : master partition, does not do kspace // kspace_split > 0 : slave partition, only does kspace if (strstr(update->integrate_style,"verlet/split") != NULL) { if (universe->iworld == 0) kspace_split = -1; else kspace_split = 1; } else { kspace_split = 0; } // determine which is the last force style with OpenMP // support as this is the one that has to reduce the forces #define CheckStyleForOMP(name) \ check_hybrid = 0; \ if (force->name) { \ if ( (strcmp(force->name ## _style,"hybrid") == 0) || \ (strcmp(force->name ## _style,"hybrid/overlay") == 0) ) \ check_hybrid=1; \ if (force->name->suffix_flag & Suffix::OMP) { \ last_force_name = (const char *) #name; \ last_omp_name = force->name ## _style; \ last_omp_style = (void *) force->name; \ } \ } #define CheckHybridForOMP(name,Class) \ if (check_hybrid) { \ Class ## Hybrid *style = (Class ## Hybrid *) force->name; \ for (int i=0; i < style->nstyles; i++) { \ if (style->styles[i]->suffix_flag & Suffix::OMP) { \ last_force_name = (const char *) #name; \ last_omp_name = style->keywords[i]; \ last_omp_style = style->styles[i]; \ } \ } \ } if (kspace_split <= 0) { CheckStyleForOMP(pair); CheckHybridForOMP(pair,Pair); if (check_hybrid) { last_pair_hybrid = last_omp_style; last_hybrid_name = last_omp_name; } CheckStyleForOMP(bond); CheckHybridForOMP(bond,Bond); CheckStyleForOMP(angle); CheckHybridForOMP(angle,Angle); CheckStyleForOMP(dihedral); CheckHybridForOMP(dihedral,Dihedral); CheckStyleForOMP(improper); CheckHybridForOMP(improper,Improper); } if (kspace_split >= 0) { CheckStyleForOMP(kspace); } #undef CheckStyleForOMP #undef CheckHybridForOMP set_neighbor_omp(); // diagnostic output if (comm->me == 0) { if (last_omp_style) { if (last_pair_hybrid) { if (screen) fprintf(screen,"Hybrid pair style last /omp style %s\n", last_hybrid_name); if (logfile) fprintf(logfile,"Hybrid pair style last /omp style %s\n", last_hybrid_name); } if (screen) fprintf(screen,"Last active /omp style is %s_style %s\n", last_force_name, last_omp_name); if (logfile) fprintf(logfile,"Last active /omp style is %s_style %s\n", last_force_name, last_omp_name); } else { if (screen) fprintf(screen,"No /omp style for force computation currently active\n"); if (logfile) fprintf(logfile,"No /omp style for force computation currently active\n"); } } } /* ---------------------------------------------------------------------- */ void FixOMP::set_neighbor_omp() { // select or deselect multi-threaded neighbor // list build depending on setting in package omp. // NOTE: since we are at the top of the list of // fixes, we cannot adjust neighbor lists from // other fixes. those have to be re-implemented // as /omp fix styles. :-( const int neigh_omp = _neighbor ? 1 : 0; const int nrequest = neighbor->nrequest; for (int i = 0; i < nrequest; ++i) neighbor->requests[i]->omp = neigh_omp; } /* ---------------------------------------------------------------------- */ void FixOMP::setup(int) { // we are post the force compute in setup. turn on timers for (int i=0; i < _nthr; ++i) thr[i]->_timer_active=0; } /* ---------------------------------------------------------------------- */ // adjust size and clear out per thread accumulator arrays void FixOMP::pre_force(int) { const int nall = atom->nlocal + atom->nghost; double **f = atom->f; double **torque = atom->torque; double *erforce = atom->erforce; double *de = atom->de; double *drho = atom->drho; #if defined(_OPENMP) #pragma omp parallel default(none) shared(f,torque,erforce,de,drho) #endif { const int tid = get_tid(); thr[tid]->check_tid(tid); thr[tid]->init_force(nall,f,torque,erforce,de,drho); } // end of omp parallel region _reduced = false; } /* ---------------------------------------------------------------------- */ double FixOMP::memory_usage() { double bytes = _nthr * (sizeof(ThrData *) + sizeof(ThrData)); bytes += _nthr * thr[0]->memory_usage(); return bytes; } diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp index 277da9d4b..16d3001dd 100644 --- a/src/USER-OMP/pppm_disp_omp.cpp +++ b/src/USER-OMP/pppm_disp_omp.cpp @@ -1,1872 +1,1873 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U), Rolf Isele-Holder (RWTH Aachen University) + Contributing authors: Axel Kohlmeyer (Temple U) + Rolf Isele-Holder (RWTH Aachen University) ------------------------------------------------------------------------- */ #include "pppm_disp_omp.h" #include "atom.h" #include "comm.h" #include "domain.h" #include "force.h" #include "memory.h" #include "math_const.h" #include #include #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; #ifdef FFT_SINGLE #define ZEROF 0.0f #define ONEF 1.0f #else #define ZEROF 0.0 #define ONEF 1.0 #endif #define OFFSET 16384 /* ---------------------------------------------------------------------- */ PPPMDispOMP::PPPMDispOMP(LAMMPS *lmp, int narg, char **arg) : PPPMDisp(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) { triclinic_support = 0; suffix_flag |= Suffix::OMP; } /* ---------------------------------------------------------------------- */ PPPMDispOMP::~PPPMDispOMP() { deallocate(); } /* ---------------------------------------------------------------------- allocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMDispOMP::allocate() { PPPMDisp::allocate(); #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif if (function[0]) { ThrData *thr = fix->get_thr(tid); thr->init_pppm(order,memory); } if (function[1] + function[2]) { ThrData * thr = fix->get_thr(tid); thr->init_pppm_disp(order_6,memory); } } } /* ---------------------------------------------------------------------- free memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMDispOMP::deallocate() { PPPMDisp::deallocate(); #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif if (function[0]) { ThrData * thr = fix->get_thr(tid); thr->init_pppm(-order,memory); } if (function[1] + function[2]) { ThrData * thr = fix->get_thr(tid); thr->init_pppm_disp(-order_6,memory); } } } /* ---------------------------------------------------------------------- Compute the modified (hockney-eastwood) coulomb green function ------------------------------------------------------------------------- */ void PPPMDispOMP::compute_gf() { #if defined(_OPENMP) #pragma omp parallel default(none) #endif { double *prd; if (triclinic == 0) prd = domain->prd; else prd = domain->prd_lamda; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; double unitkx = (2.0*MY_PI/xprd); double unitky = (2.0*MY_PI/yprd); double unitkz = (2.0*MY_PI/zprd_slab); int tid,nn,nnfrom,nnto,k,l,m; int kper,lper,mper; double snx,sny,snz,snx2,sny2,snz2; double sqk; double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; double numerator,denominator; const int nnx = nxhi_fft-nxlo_fft+1; const int nny = nyhi_fft-nylo_fft+1; loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (m = nzlo_fft; m <= nzhi_fft; m++) { mper = m - nz_pppm*(2*m/nz_pppm); qz = unitkz*mper; snz = sin(0.5*qz*zprd_slab/nz_pppm); snz2 = snz*snz; sz = exp(-0.25*pow(qz/g_ewald,2.0)); wz = 1.0; argz = 0.5*qz*zprd_slab/nz_pppm; if (argz != 0.0) wz = pow(sin(argz)/argz,order); wz *= wz; for (l = nylo_fft; l <= nyhi_fft; l++) { lper = l - ny_pppm*(2*l/ny_pppm); qy = unitky*lper; sny = sin(0.5*qy*yprd/ny_pppm); sny2 = sny*sny; sy = exp(-0.25*pow(qy/g_ewald,2.0)); wy = 1.0; argy = 0.5*qy*yprd/ny_pppm; if (argy != 0.0) wy = pow(sin(argy)/argy,order); wy *= wy; for (k = nxlo_fft; k <= nxhi_fft; k++) { /* only compute the part designated to this thread */ nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft)); if ((nn < nnfrom) || (nn >=nnto)) continue; kper = k - nx_pppm*(2*k/nx_pppm); qx = unitkx*kper; snx = sin(0.5*qx*xprd/nx_pppm); snx2 = snx*snx; sx = exp(-0.25*pow(qx/g_ewald,2.0)); wx = 1.0; argx = 0.5*qx*xprd/nx_pppm; if (argx != 0.0) wx = pow(sin(argx)/argx,order); wx *= wx; sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); if (sqk != 0.0) { numerator = 4.0*MY_PI/sqk; denominator = gf_denom(snx2,sny2,snz2, gf_b, order); greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator; } else greensfn[nn] = 0.0; } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- Compyute the modified (hockney-eastwood) dispersion green function ------------------------------------------------------------------------- */ void PPPMDispOMP::compute_gf_6() { #if defined(_OPENMP) #pragma omp parallel default(none) #endif { double *prd; int k,l,m,nn; // volume-dependent factors // adjust z dimension for 2d slab PPPM // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 if (triclinic == 0) prd = domain->prd; else prd = domain->prd_lamda; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; double unitkx = (2.0*MY_PI/xprd); double unitky = (2.0*MY_PI/yprd); double unitkz = (2.0*MY_PI/zprd_slab); int kper,lper,mper; double sqk; double snx,sny,snz,snx2,sny2,snz2; double argx,argy,argz,wx,wy,wz,sx,sy,sz; double qx,qy,qz; double rtsqk, term; double numerator,denominator; double inv2ew = 2*g_ewald_6; inv2ew = 1/inv2ew; double rtpi = sqrt(MY_PI); int nnfrom, nnto, tid; numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); const int nnx = nxhi_fft_6-nxlo_fft_6+1; const int nny = nyhi_fft_6-nylo_fft_6+1; loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { mper = m - nz_pppm_6*(2*m/nz_pppm_6); qz = unitkz*mper; snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); snz2 = snz*snz; sz = exp(-qz*qz*inv2ew*inv2ew); wz = 1.0; argz = 0.5*qz*zprd_slab/nz_pppm_6; if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); wz *= wz; for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { lper = l - ny_pppm_6*(2*l/ny_pppm_6); qy = unitky*lper; sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); sny2 = sny*sny; sy = exp(-qy*qy*inv2ew*inv2ew); wy = 1.0; argy = 0.5*qy*yprd/ny_pppm_6; if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); wy *= wy; for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { /* only compute the part designated to this thread */ nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6)); if ((nn < nnfrom) || (nn >=nnto)) continue; kper = k - nx_pppm_6*(2*k/nx_pppm_6); qx = unitkx*kper; snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); snx2 = snx*snx; sx = exp(-qx*qx*inv2ew*inv2ew); wx = 1.0; argx = 0.5*qx*xprd/nx_pppm_6; if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); wx *= wx; sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); if (sqk != 0.0) { denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); rtsqk = sqrt(sqk); term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); greensfn_6[nn] = numerator*term*wx*wy*wz/denominator; } else greensfn_6[nn] = 0.0; } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- run the regular toplevel compute method from plain PPPPM which will have individual methods replaced by our threaded versions and then call the obligatory force reduction. ------------------------------------------------------------------------- */ void PPPMDispOMP::compute(int eflag, int vflag) { PPPMDisp::compute(eflag,vflag); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } /* ---------------------------------------------------------------------- find center grid pt for each of my particles check that full stencil for the particle will fit in my 3d brick store central grid pt indices in part2grid array ------------------------------------------------------------------------- */ void PPPMDispOMP::particle_map(double dxinv, double dyinv, double dzinv, double sft, int ** part2grid, int nup, int nlw, int nxlo_o, int nylo_o, int nzlo_o, int nxhi_o, int nyhi_o, int nzhi_o) { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; const int nlocal = atom->nlocal; const double delxinv = dxinv; const double delyinv = dyinv; const double delzinv = dzinv; const double shift = sft; const int nupper = nup; const int nlower = nlw; const int nxlo_out = nxlo_o; const int nylo_out = nylo_o; const int nzlo_out = nzlo_o; const int nxhi_out = nxhi_o; const int nyhi_out = nyhi_o; const int nzhi_out = nzhi_o; if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2])) error->one(FLERR,"Non-numeric box dimensions. Simulation unstable."); int i, flag = 0; #if defined(_OPENMP) #pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) #endif for (i = 0; i < nlocal; i++) { // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // current particle coord can be outside global and local box // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 const int nx = static_cast ((x[i].x-boxlox)*delxinv+shift) - OFFSET; const int ny = static_cast ((x[i].y-boxloy)*delyinv+shift) - OFFSET; const int nz = static_cast ((x[i].z-boxloz)*delzinv+shift) - OFFSET; p2g[i].a = nx; p2g[i].b = ny; p2g[i].t = nz; // check that entire stencil around nx,ny,nz will fit in my 3d brick if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || ny+nlower < nylo_out || ny+nupper > nyhi_out || nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++; } int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); } /* ---------------------------------------------------------------------- create discretized "density" on section of global grid due to my particles density(x,y,z) = charge "density" at grid points of my 3d brick (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) in global grid ------------------------------------------------------------------------- */ void PPPMDispOMP::make_rho_c() { // clear 3d density array FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); // no local atoms => nothing else to do const int nlocal = atom->nlocal; if (nlocal == 0) return; const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const double * _noalias const q = atom->q; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; // determine range of grid points handled by this thread int i,jfrom,jto,tid; loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv; compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff); const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { const int jn = (nz+n-nzlo_out)*ix*iy; const FFT_SCALAR y0 = z0*r1d[2][n]; for (int m = nlower; m <= nupper; ++m) { const int jm = jn+(ny+m-nylo_out)*ix; const FFT_SCALAR x0 = y0*r1d[1][m]; for (int l = nlower; l <= nupper; ++l) { const int jl = jm+nx+l-nxlo_out; // make sure each thread only updates // "his" elements of the density grid if (jl >= jto) break; if (jl < jfrom) continue; d[jl] += x0*r1d[0][l]; } } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- same as above for dispersion interaction with geometric mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::make_rho_g() { // clear 3d density array FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); memset(d,0,ngrid_6*sizeof(FFT_SCALAR)); // no local atoms => nothing else to do const int nlocal = atom->nlocal; if (nlocal == 0) return; const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; // determine range of grid points handled by this thread int i,jfrom,jto,tid; loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); const int type = atom->type[i]; const double lj = B[type]; const FFT_SCALAR z0 = delvolinv_6 * lj; for (int n = nlower_6; n <= nupper_6; ++n) { const int jn = (nz+n-nzlo_out_6)*ix*iy; const FFT_SCALAR y0 = z0*r1d[2][n]; for (int m = nlower_6; m <= nupper_6; ++m) { const int jm = jn+(ny+m-nylo_out_6)*ix; const FFT_SCALAR x0 = y0*r1d[1][m]; for (int l = nlower_6; l <= nupper_6; ++l) { const int jl = jm+nx+l-nxlo_out_6; // make sure each thread only updates // "his" elements of the density grid if (jl >= jto) break; if (jl < jfrom) continue; d[jl] += x0*r1d[0][l]; } } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- same as above for dispersion interaction with arithmetic mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::make_rho_a() { // clear 3d density array FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]); FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]); memset(d0,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d1,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d2,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d3,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d4,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d5,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d6,0,ngrid_6*sizeof(FFT_SCALAR)); // no local atoms => nothing else to do const int nlocal = atom->nlocal; if (nlocal == 0) return; const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; // determine range of grid points handled by this thread int i,jfrom,jto,tid; loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); const int type = atom->type[i]; const double lj0 = B[7*type]; const double lj1 = B[7*type+1]; const double lj2 = B[7*type+2]; const double lj3 = B[7*type+3]; const double lj4 = B[7*type+4]; const double lj5 = B[7*type+5]; const double lj6 = B[7*type+6]; const FFT_SCALAR z0 = delvolinv_6; for (int n = nlower_6; n <= nupper_6; ++n) { const int jn = (nz+n-nzlo_out_6)*ix*iy; const FFT_SCALAR y0 = z0*r1d[2][n]; for (int m = nlower_6; m <= nupper_6; ++m) { const int jm = jn+(ny+m-nylo_out_6)*ix; const FFT_SCALAR x0 = y0*r1d[1][m]; for (int l = nlower_6; l <= nupper_6; ++l) { const int jl = jm+nx+l-nxlo_out_6; // make sure each thread only updates // "his" elements of the density grid if (jl >= jto) break; if (jl < jfrom) continue; const double w = x0*r1d[0][l]; d0[jl] += w*lj0; d1[jl] += w*lj1; d2[jl] += w*lj2; d3[jl] += w*lj3; d4[jl] += w*lj4; d5[jl] += w*lj5; d6[jl] += w*lj6; } } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ik scheme ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_c_ik() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const q = atom->q; const double * const * const x = atom->x; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx,eky,ekz; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx -= x0*vdx_brick[mz][my][mx]; eky -= x0*vdy_brick[mz][my][mx]; ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force const double qfactor = qqrd2e*scale*q[i]; f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; f[i][2] += qfactor*ekz; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ad scheme ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_c_ad() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const q = atom->q; const double * const * const x = atom->x; const double qqrd2e = force->qqrd2e; //const double * const sf_c = sf_coeff; double *prd; if (triclinic == 0) prd = domain->prd; else prd = domain->prd_lamda; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; const double hx_inv = nx_pppm/xprd; const double hy_inv = ny_pppm/yprd; const double hz_inv = nz_pppm/zprd_slab; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz; FFT_SCALAR ekx,eky,ekz; double sf = 0.0; double s1,s2,s3; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); compute_drho1d_thr(dr1d,dx,dy,dz, order, drho_coeff); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; for (m = nlower; m <= nupper; m++) { my = m+ny; for (l = nlower; l <= nupper; l++) { mx = l+nx; ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick[mz][my][mx]; } } } ekx *= hx_inv; eky *= hy_inv; ekz *= hz_inv; // convert E-field to force const double qfactor = qqrd2e*scale; s1 = x[i][0]*hx_inv; s2 = x[i][1]*hy_inv; s3 = x[i][2]*hz_inv; sf = sf_coeff[0]*sin(2*MY_PI*s1); sf += sf_coeff[1]*sin(4*MY_PI*s1); sf *= 2*q[i]*q[i]; f[i][0] += qfactor*(ekx*q[i] - sf); sf = sf_coeff[2]*sin(2*MY_PI*s2); sf += sf_coeff[3]*sin(4*MY_PI*s2); sf *= 2*q[i]*q[i]; f[i][1] += qfactor*(eky*q[i] - sf); sf = sf_coeff[4]*sin(2*MY_PI*s3); sf += sf_coeff[5]*sin(4*MY_PI*s3); sf *= 2*q[i]*q[i]; if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get per-atom energy/virial ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_c_peratom() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const q = atom->q; const double * const * const x = atom->x; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR u,v0,v1,v2,v3,v4,v5; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*r1d[0][l]; if (eflag_atom) u += x0*u_brick[mz][my][mx]; if (vflag_atom) { v0 += x0*v0_brick[mz][my][mx]; v1 += x0*v1_brick[mz][my][mx]; v2 += x0*v2_brick[mz][my][mx]; v3 += x0*v3_brick[mz][my][mx]; v4 += x0*v4_brick[mz][my][mx]; v5 += x0*v5_brick[mz][my][mx]; } } } } const double qfactor = 0.5*force->qqrd2e * scale * q[i]; if (eflag_atom) eatom[i] += u*qfactor; if (vflag_atom) { vatom[i][0] += v0*qfactor; vatom[i][1] += v1*qfactor; vatom[i][2] += v2*qfactor; vatom[i][3] += v3*qfactor; vatom[i][4] += v4*qfactor; vatom[i][5] += v5*qfactor; } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get dispersion field & force on my particles for ik scheme and geometric mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_g_ik() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const * const x = atom->x; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx,eky,ekz; int type; double lj; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); ekx = eky = ekz = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx -= x0*vdx_brick_g[mz][my][mx]; eky -= x0*vdy_brick_g[mz][my][mx]; ekz -= x0*vdz_brick_g[mz][my][mx]; } } } // convert E-field to force type = atom->type[i]; lj = B[type]; f[i][0] += lj*ekx; f[i][1] += lj*eky; f[i][2] += lj*ekz; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get dispersion field & force on my particles for ad scheme and geometric mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_g_ad() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const * const x = atom->x; double *prd; if (triclinic == 0) prd = domain->prd; else prd = domain->prd_lamda; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; const double hx_inv = nx_pppm_6/xprd; const double hy_inv = ny_pppm_6/yprd; const double hz_inv = nz_pppm_6/zprd_slab; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz; FFT_SCALAR ekx,eky,ekz; int type; double lj; double sf = 0.0; double s1,s2,s3; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); ekx = eky = ekz = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx]; } } } ekx *= hx_inv; eky *= hy_inv; ekz *= hz_inv; // convert E-field to force type = atom->type[i]; lj = B[type]; s1 = x[i][0]*hx_inv; s2 = x[i][1]*hy_inv; s3 = x[i][2]*hz_inv; sf = sf_coeff_6[0]*sin(2*MY_PI*s1); sf += sf_coeff_6[1]*sin(4*MY_PI*s1); sf *= 2*lj*lj; f[i][0] += ekx*lj - sf; sf = sf_coeff_6[2]*sin(2*MY_PI*s2); sf += sf_coeff_6[3]*sin(4*MY_PI*s2); sf *= 2*lj*lj; f[i][1] += eky*lj - sf; sf = sf_coeff_6[4]*sin(2*MY_PI*s3); sf += sf_coeff_6[5]*sin(4*MY_PI*s3); sf *= 2*lj*lj; if (slabflag != 2) f[i][2] += ekz*lj - sf; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get per-atom energy/virial for dispersion interaction and geometric mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_g_peratom() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR u,v0,v1,v2,v3,v4,v5; int type; double lj; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; x0 = y0*r1d[0][l]; if (eflag_atom) u += x0*u_brick_g[mz][my][mx]; if (vflag_atom) { v0 += x0*v0_brick_g[mz][my][mx]; v1 += x0*v1_brick_g[mz][my][mx]; v2 += x0*v2_brick_g[mz][my][mx]; v3 += x0*v3_brick_g[mz][my][mx]; v4 += x0*v4_brick_g[mz][my][mx]; v5 += x0*v5_brick_g[mz][my][mx]; } } } } type = atom->type[i]; lj = B[type]*0.5; if (eflag_atom) eatom[i] += u*lj; if (vflag_atom) { vatom[i][0] += v0*lj; vatom[i][1] += v1*lj; vatom[i][2] += v2*lj; vatom[i][3] += v3*lj; vatom[i][4] += v4*lj; vatom[i][5] += v5*lj; } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get dispersion field & force on my particles for ik scheme and arithmetic mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_a_ik() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const * const x = atom->x; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; FFT_SCALAR ekx6, eky6, ekz6; int type; double lj0,lj1,lj2,lj3,lj4,lj5,lj6; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); ekx0 = eky0 = ekz0 = ZEROF; ekx1 = eky1 = ekz1 = ZEROF; ekx2 = eky2 = ekz2 = ZEROF; ekx3 = eky3 = ekz3 = ZEROF; ekx4 = eky4 = ekz4 = ZEROF; ekx5 = eky5 = ekz5 = ZEROF; ekx6 = eky6 = ekz6 = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx0 -= x0*vdx_brick_a0[mz][my][mx]; eky0 -= x0*vdy_brick_a0[mz][my][mx]; ekz0 -= x0*vdz_brick_a0[mz][my][mx]; ekx1 -= x0*vdx_brick_a1[mz][my][mx]; eky1 -= x0*vdy_brick_a1[mz][my][mx]; ekz1 -= x0*vdz_brick_a1[mz][my][mx]; ekx2 -= x0*vdx_brick_a2[mz][my][mx]; eky2 -= x0*vdy_brick_a2[mz][my][mx]; ekz2 -= x0*vdz_brick_a2[mz][my][mx]; ekx3 -= x0*vdx_brick_a3[mz][my][mx]; eky3 -= x0*vdy_brick_a3[mz][my][mx]; ekz3 -= x0*vdz_brick_a3[mz][my][mx]; ekx4 -= x0*vdx_brick_a4[mz][my][mx]; eky4 -= x0*vdy_brick_a4[mz][my][mx]; ekz4 -= x0*vdz_brick_a4[mz][my][mx]; ekx5 -= x0*vdx_brick_a5[mz][my][mx]; eky5 -= x0*vdy_brick_a5[mz][my][mx]; ekz5 -= x0*vdz_brick_a5[mz][my][mx]; ekx6 -= x0*vdx_brick_a6[mz][my][mx]; eky6 -= x0*vdy_brick_a6[mz][my][mx]; ekz6 -= x0*vdz_brick_a6[mz][my][mx]; } } } // convert D-field to force type = atom->type[i]; lj0 = B[7*type+6]; lj1 = B[7*type+5]; lj2 = B[7*type+4]; lj3 = B[7*type+3]; lj4 = B[7*type+2]; lj5 = B[7*type+1]; lj6 = B[7*type]; f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get dispersion field & force on my particles for ad scheme and arithmetic mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_a_ad() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const double * const * const x = atom->x; double *prd; if (triclinic == 0) prd = domain->prd; else prd = domain->prd_lamda; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; const double hx_inv = nx_pppm_6/xprd; const double hy_inv = ny_pppm_6/yprd; const double hz_inv = nz_pppm_6/zprd_slab; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); double * const * const f = thr->get_f(); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; FFT_SCALAR ekx6, eky6, ekz6; int type; double lj0,lj1,lj2,lj3,lj4,lj5,lj6; double sf = 0.0; double s1,s2,s3; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); ekx0 = eky0 = ekz0 = ZEROF; ekx1 = eky1 = ekz1 = ZEROF; ekx2 = eky2 = ekz2 = ZEROF; ekx3 = eky3 = ekz3 = ZEROF; ekx4 = eky4 = ekz4 = ZEROF; ekx5 = eky5 = ekz5 = ZEROF; ekx6 = eky6 = ekz6 = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n]; y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n]; z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n]; ekx0 += x0*u_brick_a0[mz][my][mx]; eky0 += y0*u_brick_a0[mz][my][mx]; ekz0 += z0*u_brick_a0[mz][my][mx]; ekx1 += x0*u_brick_a1[mz][my][mx]; eky1 += y0*u_brick_a1[mz][my][mx]; ekz1 += z0*u_brick_a1[mz][my][mx]; ekx2 += x0*u_brick_a2[mz][my][mx]; eky2 += y0*u_brick_a2[mz][my][mx]; ekz2 += z0*u_brick_a2[mz][my][mx]; ekx3 += x0*u_brick_a3[mz][my][mx]; eky3 += y0*u_brick_a3[mz][my][mx]; ekz3 += z0*u_brick_a3[mz][my][mx]; ekx4 += x0*u_brick_a4[mz][my][mx]; eky4 += y0*u_brick_a4[mz][my][mx]; ekz4 += z0*u_brick_a4[mz][my][mx]; ekx5 += x0*u_brick_a5[mz][my][mx]; eky5 += y0*u_brick_a5[mz][my][mx]; ekz5 += z0*u_brick_a5[mz][my][mx]; ekx6 += x0*u_brick_a6[mz][my][mx]; eky6 += y0*u_brick_a6[mz][my][mx]; ekz6 += z0*u_brick_a6[mz][my][mx]; } } } ekx0 *= hx_inv; eky0 *= hy_inv; ekz0 *= hz_inv; ekx1 *= hx_inv; eky1 *= hy_inv; ekz1 *= hz_inv; ekx2 *= hx_inv; eky2 *= hy_inv; ekz2 *= hz_inv; ekx3 *= hx_inv; eky3 *= hy_inv; ekz3 *= hz_inv; ekx4 *= hx_inv; eky4 *= hy_inv; ekz4 *= hz_inv; ekx5 *= hx_inv; eky5 *= hy_inv; ekz5 *= hz_inv; ekx6 *= hx_inv; eky6 *= hy_inv; ekz6 *= hz_inv; // convert D-field to force type = atom->type[i]; lj0 = B[7*type+6]; lj1 = B[7*type+5]; lj2 = B[7*type+4]; lj3 = B[7*type+3]; lj4 = B[7*type+2]; lj5 = B[7*type+1]; lj6 = B[7*type]; s1 = x[i][0]*hx_inv; s2 = x[i][1]*hy_inv; s3 = x[i][2]*hz_inv; sf = sf_coeff_6[0]*sin(2*MY_PI*s1); sf += sf_coeff_6[1]*sin(4*MY_PI*s1); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; sf = sf_coeff_6[2]*sin(2*MY_PI*s2); sf += sf_coeff_6[3]*sin(4*MY_PI*s2); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; sf = sf_coeff_6[4]*sin(2*MY_PI*s3); sf += sf_coeff_6[5]*sin(4*MY_PI*s3); sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get per-atom energy/virial for dispersion interaction and arithmetic mixing rule ------------------------------------------------------------------------- */ void PPPMDispOMP::fieldforce_a_peratom() { const int nlocal = atom->nlocal; // no local atoms => nothing to do if (nlocal == 0) return; // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; #if defined(_OPENMP) const int nthreads = comm->nthreads; #pragma omp parallel default(none) #endif { #if defined(_OPENMP) // each thread works on a fixed chunk of atoms. const int tid = omp_get_thread_num(); const int inum = nlocal; const int idelta = 1 + inum/nthreads; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; #else const int ifrom = 0; const int ito = nlocal; const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); int l,m,n,nx,ny,nz,mx,my,mz; FFT_SCALAR dx,dy,dz,x0,y0,z0; FFT_SCALAR u0,v00,v10,v20,v30,v40,v50; FFT_SCALAR u1,v01,v11,v21,v31,v41,v51; FFT_SCALAR u2,v02,v12,v22,v32,v42,v52; FFT_SCALAR u3,v03,v13,v23,v33,v43,v53; FFT_SCALAR u4,v04,v14,v24,v34,v44,v54; FFT_SCALAR u5,v05,v15,v25,v35,v45,v55; FFT_SCALAR u6,v06,v16,v26,v36,v46,v56; int type; double lj0,lj1,lj2,lj3,lj4,lj5,lj6; // this if protects against having more threads than local atoms if (ifrom < nlocal) { for (int i = ifrom; i < ito; i++) { nx = part2grid_6[i][0]; ny = part2grid_6[i][1]; nz = part2grid_6[i][2]; dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; for (n = nlower_6; n <= nupper_6; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower_6; m <= nupper_6; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower_6; l <= nupper_6; l++) { mx = l+nx; x0 = y0*r1d[0][l]; if (eflag_atom) { u0 += x0*u_brick_a0[mz][my][mx]; u1 += x0*u_brick_a1[mz][my][mx]; u2 += x0*u_brick_a2[mz][my][mx]; u3 += x0*u_brick_a3[mz][my][mx]; u4 += x0*u_brick_a4[mz][my][mx]; u5 += x0*u_brick_a5[mz][my][mx]; u6 += x0*u_brick_a6[mz][my][mx]; } if (vflag_atom) { v00 += x0*v0_brick_a0[mz][my][mx]; v10 += x0*v1_brick_a0[mz][my][mx]; v20 += x0*v2_brick_a0[mz][my][mx]; v30 += x0*v3_brick_a0[mz][my][mx]; v40 += x0*v4_brick_a0[mz][my][mx]; v50 += x0*v5_brick_a0[mz][my][mx]; v01 += x0*v0_brick_a1[mz][my][mx]; v11 += x0*v1_brick_a1[mz][my][mx]; v21 += x0*v2_brick_a1[mz][my][mx]; v31 += x0*v3_brick_a1[mz][my][mx]; v41 += x0*v4_brick_a1[mz][my][mx]; v51 += x0*v5_brick_a1[mz][my][mx]; v02 += x0*v0_brick_a2[mz][my][mx]; v12 += x0*v1_brick_a2[mz][my][mx]; v22 += x0*v2_brick_a2[mz][my][mx]; v32 += x0*v3_brick_a2[mz][my][mx]; v42 += x0*v4_brick_a2[mz][my][mx]; v52 += x0*v5_brick_a2[mz][my][mx]; v03 += x0*v0_brick_a3[mz][my][mx]; v13 += x0*v1_brick_a3[mz][my][mx]; v23 += x0*v2_brick_a3[mz][my][mx]; v33 += x0*v3_brick_a3[mz][my][mx]; v43 += x0*v4_brick_a3[mz][my][mx]; v53 += x0*v5_brick_a3[mz][my][mx]; v04 += x0*v0_brick_a4[mz][my][mx]; v14 += x0*v1_brick_a4[mz][my][mx]; v24 += x0*v2_brick_a4[mz][my][mx]; v34 += x0*v3_brick_a4[mz][my][mx]; v44 += x0*v4_brick_a4[mz][my][mx]; v54 += x0*v5_brick_a4[mz][my][mx]; v05 += x0*v0_brick_a5[mz][my][mx]; v15 += x0*v1_brick_a5[mz][my][mx]; v25 += x0*v2_brick_a5[mz][my][mx]; v35 += x0*v3_brick_a5[mz][my][mx]; v45 += x0*v4_brick_a5[mz][my][mx]; v55 += x0*v5_brick_a5[mz][my][mx]; v06 += x0*v0_brick_a6[mz][my][mx]; v16 += x0*v1_brick_a6[mz][my][mx]; v26 += x0*v2_brick_a6[mz][my][mx]; v36 += x0*v3_brick_a6[mz][my][mx]; v46 += x0*v4_brick_a6[mz][my][mx]; v56 += x0*v5_brick_a6[mz][my][mx]; } } } } // convert D-field to force type = atom->type[i]; lj0 = B[7*type+6]*0.5; lj1 = B[7*type+5]*0.5; lj2 = B[7*type+4]*0.5; lj3 = B[7*type+3]*0.5; lj4 = B[7*type+2]*0.5; lj5 = B[7*type+1]*0.5; lj6 = B[7*type]*0.5; if (eflag_atom) eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6; if (vflag_atom) { vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + v04*lj4 + v05*lj5 + v06*lj6; vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + v14*lj4 + v15*lj5 + v16*lj6; vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + v24*lj4 + v25*lj5 + v26*lj6; vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + v34*lj4 + v35*lj5 + v36*lj6; vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + v44*lj4 + v45*lj5 + v46*lj6; vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + v54*lj4 + v55*lj5 + v56*lj6; } } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- charge assignment into rho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ void PPPMDispOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz, const int ord, FFT_SCALAR * const * const rho_c) { int k,l; FFT_SCALAR r1,r2,r3; for (k = (1-ord)/2; k <= ord/2; k++) { r1 = r2 = r3 = ZEROF; for (l = ord-1; l >= 0; l--) { r1 = rho_c[l][k] + r1*dx; r2 = rho_c[l][k] + r2*dy; r3 = rho_c[l][k] + r3*dz; } r1d[0][k] = r1; r1d[1][k] = r2; r1d[2][k] = r3; } } /* ---------------------------------------------------------------------- charge assignment into drho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ void PPPMDispOMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz, const int ord, FFT_SCALAR * const * const drho_c) { int k,l; FFT_SCALAR r1,r2,r3; for (k = (1-ord)/2; k <= ord/2; k++) { r1 = r2 = r3 = ZEROF; for (l = ord-2; l >= 0; l--) { r1 = drho_c[l][k] + r1*dx; r2 = drho_c[l][k] + r2*dy; r3 = drho_c[l][k] + r3*dz; } dr1d[0][k] = r1; dr1d[1][k] = r2; dr1d[2][k] = r3; } } diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp index cc184d9d6..0e9eafb2f 100644 --- a/src/USER-OMP/thr_data.cpp +++ b/src/USER-OMP/thr_data.cpp @@ -1,369 +1,369 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - per-thread data management for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) + per-thread data management for LAMMPS ------------------------------------------------------------------------- */ #include "thr_data.h" #include #include #include "memory.h" #include "timer.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ ThrData::ThrData(int tid, Timer *t) : _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0), _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid), _timer(t) { _timer_active = 0; } /* ---------------------------------------------------------------------- */ void ThrData::check_tid(int tid) { if (tid != _tid) fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid); } /* ---------------------------------------------------------------------- */ void ThrData::_stamp(enum Timer::ttype flag) { // do nothing until it gets set to 0 in ::setup() if (_timer_active < 0) return; if (flag == Timer::START) { _timer_active = 1; } if (_timer_active) _timer->stamp(flag); } /* ---------------------------------------------------------------------- */ double ThrData::get_time(enum Timer::ttype flag) { if (_timer) return _timer->get_wall(flag); else return 0.0; } /* ---------------------------------------------------------------------- */ void ThrData::init_force(int nall, double **f, double **torque, double *erforce, double *de, double *drho) { eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0; memset(virial_pair,0,6*sizeof(double)); memset(virial_bond,0,6*sizeof(double)); memset(virial_angle,0,6*sizeof(double)); memset(virial_dihed,0,6*sizeof(double)); memset(virial_imprp,0,6*sizeof(double)); memset(virial_kspce,0,6*sizeof(double)); eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL; vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL; if (nall >= 0 && f) { _f = f + _tid*nall; memset(&(_f[0][0]),0,nall*3*sizeof(double)); } else _f = NULL; if (nall >= 0 && torque) { _torque = torque + _tid*nall; memset(&(_torque[0][0]),0,nall*3*sizeof(double)); } else _torque = NULL; if (nall >= 0 && erforce) { _erforce = erforce + _tid*nall; memset(&(_erforce[0]),0,nall*sizeof(double)); } else _erforce = NULL; if (nall >= 0 && de) { _de = de + _tid*nall; memset(&(_de[0]),0,nall*sizeof(double)); } else _de = NULL; if (nall >= 0 && drho) { _drho = drho + _tid*nall; memset(&(_drho[0]),0,nall*sizeof(double)); } else _drho = NULL; } /* ---------------------------------------------------------------------- set up and clear out locally managed per atom arrays ------------------------------------------------------------------------- */ void ThrData::init_eam(int nall, double *rho) { if (nall >= 0 && rho) { _rho = rho + _tid*nall; memset(_rho, 0, nall*sizeof(double)); } } /* ---------------------------------------------------------------------- */ void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda) { init_eam(nall, rho); if (nall >= 0 && mu && lambda) { _mu = mu + _tid*nall; _lambda = lambda + _tid*nall; memset(&(_mu[0][0]), 0, nall*3*sizeof(double)); memset(&(_lambda[0][0]), 0, nall*6*sizeof(double)); } } /* ---------------------------------------------------------------------- */ void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values) { init_eam(nall, rho); if (nall >= 0 && rhoB && D_values) { _rhoB = rhoB + _tid*nall; _D_values = D_values + _tid*nall; memset(_rhoB, 0, nall*sizeof(double)); memset(_D_values, 0, nall*sizeof(double)); } } /* ---------------------------------------------------------------------- */ void ThrData::init_eim(int nall, double *rho, double *fp) { init_eam(nall, rho); if (nall >= 0 && fp) { _fp = fp + _tid*nall; memset(_fp,0,nall*sizeof(double)); } } /* ---------------------------------------------------------------------- if order > 0 : set up per thread storage for PPPM if order < 0 : free per thread storage for PPPM ------------------------------------------------------------------------- */ #if defined(FFT_SINGLE) typedef float FFT_SCALAR; #else typedef double FFT_SCALAR; #endif void ThrData::init_pppm(int order, Memory *memory) { FFT_SCALAR **rho1d, **drho1d; if (order > 0) { memory->create2d_offset(rho1d,3,-order/2,order/2,"thr_data:rho1d"); memory->create2d_offset(drho1d,3,-order/2,order/2,"thr_data:drho1d"); _rho1d = static_cast(rho1d); _drho1d = static_cast(drho1d); } else { order = -order; rho1d = static_cast(_rho1d); drho1d = static_cast(_drho1d); memory->destroy2d_offset(rho1d,-order/2); memory->destroy2d_offset(drho1d,-order/2); } } /* ---------------------------------------------------------------------- if order > 0 : set up per thread storage for PPPM if order < 0 : free per thread storage for PPPM ------------------------------------------------------------------------- */ #if defined(FFT_SINGLE) typedef float FFT_SCALAR; #else typedef double FFT_SCALAR; #endif void ThrData::init_pppm_disp(int order_6, Memory *memory) { FFT_SCALAR **rho1d_6, **drho1d_6; if (order_6 > 0) { memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"thr_data:rho1d_6"); memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"thr_data:drho1d_6"); _rho1d_6 = static_cast(rho1d_6); _drho1d_6 = static_cast(drho1d_6); } else { order_6 = -order_6; rho1d_6 = static_cast(_rho1d_6); drho1d_6 = static_cast(_drho1d_6); memory->destroy2d_offset(rho1d_6,-order_6/2); memory->destroy2d_offset(drho1d_6,-order_6/2); } } /* ---------------------------------------------------------------------- compute global pair virial via summing F dot r over own & ghost atoms at this point, only pairwise forces have been accumulated in atom->f ------------------------------------------------------------------------- */ void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst) { // sum over force on all particles including ghosts if (nfirst < 0) { int nall = nlocal + nghost; for (int i = 0; i < nall; i++) { virial_pair[0] += _f[i][0]*x[i][0]; virial_pair[1] += _f[i][1]*x[i][1]; virial_pair[2] += _f[i][2]*x[i][2]; virial_pair[3] += _f[i][1]*x[i][0]; virial_pair[4] += _f[i][2]*x[i][0]; virial_pair[5] += _f[i][2]*x[i][1]; } // neighbor includegroup flag is set // sum over force on initial nfirst particles and ghosts } else { int nall = nfirst; for (int i = 0; i < nall; i++) { virial_pair[0] += _f[i][0]*x[i][0]; virial_pair[1] += _f[i][1]*x[i][1]; virial_pair[2] += _f[i][2]*x[i][2]; virial_pair[3] += _f[i][1]*x[i][0]; virial_pair[4] += _f[i][2]*x[i][0]; virial_pair[5] += _f[i][2]*x[i][1]; } nall = nlocal + nghost; for (int i = nlocal; i < nall; i++) { virial_pair[0] += _f[i][0]*x[i][0]; virial_pair[1] += _f[i][1]*x[i][1]; virial_pair[2] += _f[i][2]*x[i][2]; virial_pair[3] += _f[i][1]*x[i][0]; virial_pair[4] += _f[i][2]*x[i][0]; virial_pair[5] += _f[i][2]*x[i][1]; } } } /* ---------------------------------------------------------------------- */ double ThrData::memory_usage() { double bytes = (7 + 6*6) * sizeof(double); bytes += 2 * sizeof(double*); bytes += 4 * sizeof(int); return bytes; } /* additional helper functions */ // reduce per thread data into the first part of the data // array that is used for the non-threaded parts and reset // the temporary storage to 0.0. this routine depends on // multi-dimensional arrays like force stored in this order // x1,y1,z1,x2,y2,z2,... // we need to post a barrier to wait until all threads are done // with writing to the array . void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid) { #if defined(_OPENMP) // NOOP in single-threaded execution. if (nthreads == 1) return; #pragma omp barrier { const int nvals = ndim*nall; const int idelta = nvals/nthreads + 1; const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); #if defined(USER_OMP_NO_UNROLL) if (ifrom < nvals) { int m = 0; for (m = ifrom; m < ito; ++m) { for (int n = 1; n < nthreads; ++n) { dall[m] += dall[n*nvals + m]; dall[n*nvals + m] = 0.0; } } } #else // this if protects against having more threads than atoms if (ifrom < nvals) { int m = 0; // for architectures that have L1 D-cache line sizes of 64 bytes // (8 doubles) wide, explictly unroll this loop to compute 8 // contiguous values in the array at a time // -- modify this code based on the size of the cache line double t0, t1, t2, t3, t4, t5, t6, t7; for (m = ifrom; m < (ito-7); m+=8) { t0 = dall[m ]; t1 = dall[m+1]; t2 = dall[m+2]; t3 = dall[m+3]; t4 = dall[m+4]; t5 = dall[m+5]; t6 = dall[m+6]; t7 = dall[m+7]; for (int n = 1; n < nthreads; ++n) { t0 += dall[n*nvals + m ]; t1 += dall[n*nvals + m+1]; t2 += dall[n*nvals + m+2]; t3 += dall[n*nvals + m+3]; t4 += dall[n*nvals + m+4]; t5 += dall[n*nvals + m+5]; t6 += dall[n*nvals + m+6]; t7 += dall[n*nvals + m+7]; dall[n*nvals + m ] = 0.0; dall[n*nvals + m+1] = 0.0; dall[n*nvals + m+2] = 0.0; dall[n*nvals + m+3] = 0.0; dall[n*nvals + m+4] = 0.0; dall[n*nvals + m+5] = 0.0; dall[n*nvals + m+6] = 0.0; dall[n*nvals + m+7] = 0.0; } dall[m ] = t0; dall[m+1] = t1; dall[m+2] = t2; dall[m+3] = t3; dall[m+4] = t4; dall[m+5] = t5; dall[m+6] = t6; dall[m+7] = t7; } // do the last < 8 values for (; m < ito; m++) { for (int n = 1; n < nthreads; ++n) { dall[m] += dall[n*nvals + m]; dall[n*nvals + m] = 0.0; } } } #endif } #else // NOOP in non-threaded execution. return; #endif } diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp index 1aba7290a..1744a7738 100644 --- a/src/USER-OMP/thr_omp.cpp +++ b/src/USER-OMP/thr_omp.cpp @@ -1,1223 +1,1223 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - OpenMP based threading support for LAMMPS Contributing author: Axel Kohlmeyer (Temple U) + OpenMP based threading support for LAMMPS ------------------------------------------------------------------------- */ #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "timer.h" #include "thr_omp.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "compute.h" #include "math_const.h" #include using namespace LAMMPS_NS; using namespace MathConst; /* ---------------------------------------------------------------------- */ ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style), thr_error(0) { // register fix omp with this class int ifix = lmp->modify->find_fix("package_omp"); if (ifix < 0) lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles"); fix = static_cast(lmp->modify->fix[ifix]); } /* ---------------------------------------------------------------------- */ ThrOMP::~ThrOMP() { // nothing to do? } /* ---------------------------------------------------------------------- Hook up per thread per atom arrays into the tally infrastructure ---------------------------------------------------------------------- */ void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom, double **vatom, ThrData *thr) { const int tid = thr->get_tid(); if (tid == 0) thr_error = 0; if (thr_style & THR_PAIR) { if (eflag & 2) { thr->eatom_pair = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_pair[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_pair = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_BOND) { if (eflag & 2) { thr->eatom_bond = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_bond[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_bond = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_ANGLE) { if (eflag & 2) { thr->eatom_angle = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_angle[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_angle = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_DIHEDRAL) { if (eflag & 2) { thr->eatom_dihed = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_dihed = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double)); } } if (thr_style & THR_IMPROPER) { if (eflag & 2) { thr->eatom_imprp = eatom + tid*nall; if (nall > 0) memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double)); } if (vflag & 4) { thr->vatom_imprp = vatom + tid*nall; if (nall > 0) memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double)); } } // nothing to do for THR_KSPACE } /* ---------------------------------------------------------------------- Reduce per thread data into the regular structures Reduction of global properties is serialized with a "critical" directive, so that only one thread at a time will access the global variables. Since we are not synchronized, this should come with little overhead. The reduction of per-atom properties in contrast is parallelized over threads in the same way as forces. ---------------------------------------------------------------------- */ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, ThrData *const thr) { const int nlocal = lmp->atom->nlocal; const int nghost = lmp->atom->nghost; const int nall = nlocal + nghost; const int nfirst = lmp->atom->nfirst; const int nthreads = lmp->comm->nthreads; const int evflag = eflag | vflag; const int tid = thr->get_tid(); double **f = lmp->atom->f; double **x = lmp->atom->x; int need_force_reduce = 1; if (evflag) sync_threads(); switch (thr_style) { case THR_PAIR: { Pair * const pair = lmp->force->pair; if (pair->vflag_fdotr) { // this is a non-hybrid pair style. compute per thread fdotr if (fix->last_pair_hybrid == NULL) { if (lmp->neighbor->includegroup == 0) thr->virial_fdotr_compute(x, nlocal, nghost, -1); else thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); } else { if (style == fix->last_pair_hybrid) { // pair_style hybrid will compute fdotr for us // but we first need to reduce the forces data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); fix->did_reduce(); need_force_reduce = 0; } } } if (evflag) { #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { pair->eng_vdwl += thr->eng_vdwl; pair->eng_coul += thr->eng_coul; thr->eng_vdwl = 0.0; thr->eng_coul = 0.0; } if (vflag & 3) for (int i=0; i < 6; ++i) { pair->virial[i] += thr->virial_pair[i]; thr->virial_pair[i] = 0.0; } } if (eflag & 2) { data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); } } } break; case THR_BOND: if (evflag) { Bond * const bond = lmp->force->bond; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { bond->energy += thr->eng_bond; thr->eng_bond = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { bond->virial[i] += thr->virial_bond[i]; thr->virial_bond[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_ANGLE: if (evflag) { Angle * const angle = lmp->force->angle; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { angle->energy += thr->eng_angle; thr->eng_angle = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { angle->virial[i] += thr->virial_angle[i]; thr->virial_angle[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_DIHEDRAL: if (evflag) { Dihedral * const dihedral = lmp->force->dihedral; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { dihedral->energy += thr->eng_dihed; thr->eng_dihed = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { dihedral->virial[i] += thr->virial_dihed[i]; thr->virial_dihed[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals if (evflag) { Dihedral * const dihedral = lmp->force->dihedral; Pair * const pair = lmp->force->pair; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { dihedral->energy += thr->eng_dihed; pair->eng_vdwl += thr->eng_vdwl; pair->eng_coul += thr->eng_coul; thr->eng_dihed = 0.0; thr->eng_vdwl = 0.0; thr->eng_coul = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { dihedral->virial[i] += thr->virial_dihed[i]; pair->virial[i] += thr->virial_pair[i]; thr->virial_dihed[i] = 0.0; thr->virial_pair[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_IMPROPER: if (evflag) { Improper *improper = lmp->force->improper; #if defined(_OPENMP) #pragma omp critical #endif { if (eflag & 1) { improper->energy += thr->eng_imprp; thr->eng_imprp = 0.0; } if (vflag & 3) { for (int i=0; i < 6; ++i) { improper->virial[i] += thr->virial_imprp[i]; thr->virial_imprp[i] = 0.0; } } } if (eflag & 2) { data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid); } if (vflag & 4) { data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid); } } break; case THR_KSPACE: // nothing to do. XXX may need to add support for per-atom info break; case THR_INTGR: // nothing to do break; default: printf("tid:%d unhandled thr_style case %d\n", tid, thr_style); break; } if (style == fix->last_omp_style) { if (need_force_reduce) { data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); fix->did_reduce(); } if (lmp->atom->torque) data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); } thr->timer(Timer::COMM); } /* ---------------------------------------------------------------------- tally eng_vdwl and eng_coul into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, ThrData * const thr) { if (pair->eflag_global) { if (newton_pair) { thr->eng_vdwl += evdwl; thr->eng_coul += ecoul; } else { const double evdwlhalf = 0.5*evdwl; const double ecoulhalf = 0.5*ecoul; if (i < nlocal) { thr->eng_vdwl += evdwlhalf; thr->eng_coul += ecoulhalf; } if (j < nlocal) { thr->eng_vdwl += evdwlhalf; thr->eng_coul += ecoulhalf; } } } if (pair->eflag_atom) { const double epairhalf = 0.5 * (evdwl + ecoul); if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf; if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf; } } /* helper functions */ static void v_tally(double * const vout, const double * const vin) { vout[0] += vin[0]; vout[1] += vin[1]; vout[2] += vin[2]; vout[3] += vin[3]; vout[4] += vin[4]; vout[5] += vin[5]; } static void v_tally(double * const vout, const double scale, const double * const vin) { vout[0] += scale*vin[0]; vout[1] += scale*vin[1]; vout[2] += scale*vin[2]; vout[3] += scale*vin[3]; vout[4] += scale*vin[4]; vout[5] += scale*vin[5]; } /* ---------------------------------------------------------------------- tally virial into per thread global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double * const v, ThrData * const thr) { if (pair->vflag_global) { double * const va = thr->virial_pair; if (newton_pair) { v_tally(va,v); } else { if (i < nlocal) v_tally(va,0.5,v); if (j < nlocal) v_tally(va,0.5,v); } } if (pair->vflag_atom) { if (newton_pair || i < nlocal) { double * const va = thr->vatom_pair[i]; v_tally(va,0.5,v); } if (newton_pair || j < nlocal) { double * const va = thr->vatom_pair[j]; v_tally(va,0.5,v); } } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into per thread global and per-atom accumulators need i < nlocal test since called by bond_quartic and dihedral_charmm ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, const double fpair, const double delx, const double dely, const double delz, ThrData * const thr) { if (pair->eflag_either) e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } if (pair->num_tally_compute > 0) { // ev_tally callbacks are not thread safe and thus have to be protected #if defined(_OPENMP) #pragma omp critical #endif for (int k=0; k < pair->num_tally_compute; ++k) { Compute *c = pair->list_tally_compute[k]; c->pair_tally_callback(i, j, nlocal, newton_pair, evdwl, ecoul, fpair, delx, dely, delz); } } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators for virial, have delx,dely,delz and fx,fy,fz ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j, const int nlocal, const int newton_pair, const double evdwl, const double ecoul, const double fx, const double fy, const double fz, const double delx, const double dely, const double delz, ThrData * const thr) { if (pair->eflag_either) e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); if (pair->vflag_either) { double v[6]; v[0] = delx*fx; v[1] = dely*fy; v[2] = delz*fz; v[3] = delx*fy; v[4] = delx*fz; v[5] = dely*fz; v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators for virial, have delx,dely,delz and fx,fy,fz called when using full neighbor lists ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_xyz_full_thr(Pair * const pair, const int i, const double evdwl, const double ecoul, const double fx, const double fy, const double fz, const double delx, const double dely, const double delz, ThrData * const thr) { if (pair->eflag_either) e_tally_thr(pair,i,i,i+1,0,0.5*evdwl,ecoul,thr); if (pair->vflag_either) { double v[6]; v[0] = 0.5*delx*fx; v[1] = 0.5*dely*fy; v[2] = 0.5*delz*fz; v[3] = 0.5*delx*fy; v[4] = 0.5*delx*fz; v[5] = 0.5*dely*fz; v_tally_thr(pair,i,i,i+1,0,v,thr); } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators called by SW and hbond potentials, newton_pair is always on virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk ------------------------------------------------------------------------- */ void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k, const double evdwl, const double ecoul, const double * const fj, const double * const fk, const double * const drji, const double * const drki, ThrData * const thr) { if (pair->eflag_either) { if (pair->eflag_global) { thr->eng_vdwl += evdwl; thr->eng_coul += ecoul; } if (pair->eflag_atom) { const double epairthird = THIRD * (evdwl + ecoul); thr->eatom_pair[i] += epairthird; thr->eatom_pair[j] += epairthird; thr->eatom_pair[k] += epairthird; } } if (pair->vflag_either) { double v[6]; v[0] = drji[0]*fj[0] + drki[0]*fk[0]; v[1] = drji[1]*fj[1] + drki[1]*fk[1]; v[2] = drji[2]*fj[2] + drki[2]*fk[2]; v[3] = drji[0]*fj[1] + drki[0]*fk[1]; v[4] = drji[0]*fj[2] + drki[0]*fk[2]; v[5] = drji[1]*fj[2] + drki[1]*fk[2]; if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { v_tally(thr->vatom_pair[i],THIRD,v); v_tally(thr->vatom_pair[j],THIRD,v); v_tally(thr->vatom_pair[k],THIRD,v); } } } /* ---------------------------------------------------------------------- tally eng_vdwl and virial into global and per-atom accumulators called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j, const int k, const int m, const double evdwl, const double * const fi, const double * const fj, const double * const fk, const double * const drim, const double * const drjm, const double * const drkm, ThrData * const thr) { double v[6]; if (pair->eflag_either) { if (pair->eflag_global) thr->eng_vdwl += evdwl; if (pair->eflag_atom) { const double epairfourth = 0.25 * evdwl; thr->eatom_pair[i] += epairfourth; thr->eatom_pair[j] += epairfourth; thr->eatom_pair[k] += epairfourth; thr->eatom_pair[m] += epairfourth; } } if (pair->vflag_atom) { v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); v_tally(thr->vatom_pair[m],v); } } /* ---------------------------------------------------------------------- tally ecoul and virial into each of n atoms in list called by TIP4P potential, newton_pair is always on changes v values by dividing by n ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_list_thr(Pair * const pair, const int key, const int * const list, const double * const v, const double ecoul, const double alpha, ThrData * const thr) { int i; if (pair->eflag_either) { if (pair->eflag_global) thr->eng_coul += ecoul; if (pair->eflag_atom) { if (key == 0) { thr->eatom_pair[list[0]] += 0.5*ecoul; thr->eatom_pair[list[1]] += 0.5*ecoul; } else if (key == 1) { thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[1]] += 0.25*ecoul*alpha; thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.5*ecoul; } else if (key == 2) { thr->eatom_pair[list[0]] += 0.5*ecoul; thr->eatom_pair[list[1]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.25*ecoul*alpha; } else { thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[1]] += 0.25*ecoul*alpha; thr->eatom_pair[list[2]] += 0.25*ecoul*alpha; thr->eatom_pair[list[3]] += 0.5*ecoul*(1-alpha); thr->eatom_pair[list[4]] += 0.25*ecoul*alpha; thr->eatom_pair[list[5]] += 0.25*ecoul*alpha; } } } if (pair->vflag_either) { if (pair->vflag_global) v_tally(thr->virial_pair,v); if (pair->vflag_atom) { if (key == 0) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]; thr->vatom_pair[list[1]][i] += 0.5*v[i]; } } else if (key == 1) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.5*v[i]; } } else if (key == 2) { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]; thr->vatom_pair[list[1]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.25*v[i]*alpha; } } else { for (i = 0; i <= 5; i++) { thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[3]][i] += 0.5*v[i]*(1-alpha); thr->vatom_pair[list[4]][i] += 0.25*v[i]*alpha; thr->vatom_pair[list[5]][i] += 0.25*v[i]*alpha; } } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal, const int newton_bond, const double ebond, const double fbond, const double delx, const double dely, const double delz, ThrData * const thr) { if (bond->eflag_either) { const double ebondhalf = 0.5*ebond; if (newton_bond) { if (bond->eflag_global) thr->eng_bond += ebond; if (bond->eflag_atom) { thr->eatom_bond[i] += ebondhalf; thr->eatom_bond[j] += ebondhalf; } } else { if (bond->eflag_global) { if (i < nlocal) thr->eng_bond += ebondhalf; if (j < nlocal) thr->eng_bond += ebondhalf; } if (bond->eflag_atom) { if (i < nlocal) thr->eatom_bond[i] += ebondhalf; if (j < nlocal) thr->eatom_bond[j] += ebondhalf; } } } if (bond->vflag_either) { double v[6]; v[0] = delx*delx*fbond; v[1] = dely*dely*fbond; v[2] = delz*delz*fbond; v[3] = delx*dely*fbond; v[4] = delx*delz*fbond; v[5] = dely*delz*fbond; if (bond->vflag_global) { if (newton_bond) v_tally(thr->virial_bond,v); else { if (i < nlocal) v_tally(thr->virial_bond,0.5,v); if (j < nlocal) v_tally(thr->virial_bond,0.5,v); } } if (bond->vflag_atom) { v[0] *= 0.5; v[1] *= 0.5; v[2] *= 0.5; v[3] *= 0.5; v[4] *= 0.5; v[5] *= 0.5; if (newton_bond) { v_tally(thr->vatom_bond[i],v); v_tally(thr->vatom_bond[j],v); } else { if (i < nlocal) v_tally(thr->vatom_bond[i],v); if (j < nlocal) v_tally(thr->vatom_bond[j],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k, const int nlocal, const int newton_bond, const double eangle, const double * const f1, const double * const f3, const double delx1, const double dely1, const double delz1, const double delx2, const double dely2, const double delz2, ThrData * const thr) { if (angle->eflag_either) { const double eanglethird = THIRD*eangle; if (newton_bond) { if (angle->eflag_global) thr->eng_angle += eangle; if (angle->eflag_atom) { thr->eatom_angle[i] += eanglethird; thr->eatom_angle[j] += eanglethird; thr->eatom_angle[k] += eanglethird; } } else { if (angle->eflag_global) { if (i < nlocal) thr->eng_angle += eanglethird; if (j < nlocal) thr->eng_angle += eanglethird; if (k < nlocal) thr->eng_angle += eanglethird; } if (angle->eflag_atom) { if (i < nlocal) thr->eatom_angle[i] += eanglethird; if (j < nlocal) thr->eatom_angle[j] += eanglethird; if (k < nlocal) thr->eatom_angle[k] += eanglethird; } } } if (angle->vflag_either) { double v[6]; v[0] = delx1*f1[0] + delx2*f3[0]; v[1] = dely1*f1[1] + dely2*f3[1]; v[2] = delz1*f1[2] + delz2*f3[2]; v[3] = delx1*f1[1] + delx2*f3[1]; v[4] = delx1*f1[2] + delx2*f3[2]; v[5] = dely1*f1[2] + dely2*f3[2]; if (angle->vflag_global) { if (newton_bond) { v_tally(thr->virial_angle,v); } else { int cnt = 0; if (i < nlocal) ++cnt; if (j < nlocal) ++cnt; if (k < nlocal) ++cnt; v_tally(thr->virial_angle,cnt*THIRD,v); } } if (angle->vflag_atom) { v[0] *= THIRD; v[1] *= THIRD; v[2] *= THIRD; v[3] *= THIRD; v[4] *= THIRD; v[5] *= THIRD; if (newton_bond) { v_tally(thr->vatom_angle[i],v); v_tally(thr->vatom_angle[j],v); v_tally(thr->vatom_angle[k],v); } else { if (i < nlocal) v_tally(thr->vatom_angle[i],v); if (j < nlocal) v_tally(thr->vatom_angle[j],v); if (k < nlocal) v_tally(thr->vatom_angle[k],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial from 1-3 repulsion of SDK angle into accumulators ------------------------------------------------------------------------- */ void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3, const int nlocal, const int newton_bond, const double epair, const double fpair, const double delx, const double dely, const double delz, ThrData * const thr) { if (angle->eflag_either) { const double epairhalf = 0.5 * epair; if (angle->eflag_global) { if (newton_bond || i1 < nlocal) thr->eng_angle += epairhalf; if (newton_bond || i3 < nlocal) thr->eng_angle += epairhalf; } if (angle->eflag_atom) { if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf; if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf; } } if (angle->vflag_either) { double v[6]; v[0] = delx*delx*fpair; v[1] = dely*dely*fpair; v[2] = delz*delz*fpair; v[3] = delx*dely*fpair; v[4] = delx*delz*fpair; v[5] = dely*delz*fpair; if (angle->vflag_global) { double * const va = thr->virial_angle; if (newton_bond || i1 < nlocal) v_tally(va,0.5,v); if (newton_bond || i3 < nlocal) v_tally(va,0.5,v); } if (angle->vflag_atom) { if (newton_bond || i1 < nlocal) { double * const va = thr->vatom_angle[i1]; v_tally(va,0.5,v); } if (newton_bond || i3 < nlocal) { double * const va = thr->vatom_angle[i3]; v_tally(va,0.5,v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2, const int i3, const int i4, const int nlocal, const int newton_bond, const double edihedral, const double * const f1, const double * const f3, const double * const f4, const double vb1x, const double vb1y, const double vb1z, const double vb2x, const double vb2y, const double vb2z, const double vb3x, const double vb3y, const double vb3z, ThrData * const thr) { if (dihed->eflag_either) { if (dihed->eflag_global) { if (newton_bond) { thr->eng_dihed += edihedral; } else { const double edihedralquarter = 0.25*edihedral; int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; thr->eng_dihed += static_cast(cnt)*edihedralquarter; } } if (dihed->eflag_atom) { const double edihedralquarter = 0.25*edihedral; if (newton_bond) { thr->eatom_dihed[i1] += edihedralquarter; thr->eatom_dihed[i2] += edihedralquarter; thr->eatom_dihed[i3] += edihedralquarter; thr->eatom_dihed[i4] += edihedralquarter; } else { if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter; if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter; if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter; if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter; } } } if (dihed->vflag_either) { double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; if (dihed->vflag_global) { if (newton_bond) { v_tally(thr->virial_dihed,v); } else { int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; v_tally(thr->virial_dihed,0.25*static_cast(cnt),v); } } v[0] *= 0.25; v[1] *= 0.25; v[2] *= 0.25; v[3] *= 0.25; v[4] *= 0.25; v[5] *= 0.25; if (dihed->vflag_atom) { if (newton_bond) { v_tally(thr->vatom_dihed[i1],v); v_tally(thr->vatom_dihed[i2],v); v_tally(thr->vatom_dihed[i3],v); v_tally(thr->vatom_dihed[i4],v); } else { if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v); if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v); if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v); if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v); } } } } /* ---------------------------------------------------------------------- tally energy and virial into global and per-atom accumulators virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 ------------------------------------------------------------------------- */ void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2, const int i3, const int i4, const int nlocal, const int newton_bond, const double eimproper, const double * const f1, const double * const f3, const double * const f4, const double vb1x, const double vb1y, const double vb1z, const double vb2x, const double vb2y, const double vb2z, const double vb3x, const double vb3y, const double vb3z, ThrData * const thr) { if (imprp->eflag_either) { if (imprp->eflag_global) { if (newton_bond) { thr->eng_imprp += eimproper; } else { const double eimproperquarter = 0.25*eimproper; int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; thr->eng_imprp += static_cast(cnt)*eimproperquarter; } } if (imprp->eflag_atom) { const double eimproperquarter = 0.25*eimproper; if (newton_bond) { thr->eatom_imprp[i1] += eimproperquarter; thr->eatom_imprp[i2] += eimproperquarter; thr->eatom_imprp[i3] += eimproperquarter; thr->eatom_imprp[i4] += eimproperquarter; } else { if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter; if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter; if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter; if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter; } } } if (imprp->vflag_either) { double v[6]; v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; if (imprp->vflag_global) { if (newton_bond) { v_tally(thr->virial_imprp,v); } else { int cnt = 0; if (i1 < nlocal) ++cnt; if (i2 < nlocal) ++cnt; if (i3 < nlocal) ++cnt; if (i4 < nlocal) ++cnt; v_tally(thr->virial_imprp,0.25*static_cast(cnt),v); } } v[0] *= 0.25; v[1] *= 0.25; v[2] *= 0.25; v[3] *= 0.25; v[4] *= 0.25; v[5] *= 0.25; if (imprp->vflag_atom) { if (newton_bond) { v_tally(thr->vatom_imprp[i1],v); v_tally(thr->vatom_imprp[i2],v); v_tally(thr->vatom_imprp[i3],v); v_tally(thr->vatom_imprp[i4],v); } else { if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v); if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v); if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v); if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v); } } } } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO potential, newton_pair is always on fpair is magnitude of force on atom I ------------------------------------------------------------------------- */ void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair, const double * const drij, ThrData * const thr) { double v[6]; v[0] = 0.5 * drij[0]*drij[0]*fpair; v[1] = 0.5 * drij[1]*drij[1]*fpair; v[2] = 0.5 * drij[2]*drij[2]*fpair; v[3] = 0.5 * drij[0]*drij[1]*fpair; v[4] = 0.5 * drij[0]*drij[2]*fpair; v[5] = 0.5 * drij[1]*drij[2]*fpair; v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO and Tersoff potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::v_tally3_thr(const int i, const int j, const int k, const double * const fi, const double * const fj, const double * const drik, const double * const drjk, ThrData * const thr) { double v[6]; v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]); v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]); v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]); v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]); v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); } /* ---------------------------------------------------------------------- tally virial into per-atom accumulators called by AIREBO potential, newton_pair is always on ------------------------------------------------------------------------- */ void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m, const double * const fi, const double * const fj, const double * const fk, const double * const drim, const double * const drjm, const double * const drkm, ThrData * const thr) { double v[6]; v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); v_tally(thr->vatom_pair[i],v); v_tally(thr->vatom_pair[j],v); v_tally(thr->vatom_pair[k],v); v_tally(thr->vatom_pair[m],v); } /* ---------------------------------------------------------------------- */ double ThrOMP::memory_usage_thr() { double bytes=0.0; return bytes; } diff --git a/src/USER-PHONON/fix_phonon.cpp b/src/USER-PHONON/fix_phonon.cpp index 1470bc3ee..e4ff1dd31 100644 --- a/src/USER-PHONON/fix_phonon.cpp +++ b/src/USER-PHONON/fix_phonon.cpp @@ -1,922 +1,922 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + /* ---------------------------------------------------------------------- - Contributing authors: - Ling-Ti Kong + Contributing author: Ling-Ti Kong Contact: School of Materials Science and Engineering, Shanghai Jiao Tong University, 800 Dongchuan Road, Minhang, Shanghai 200240, CHINA konglt@sjtu.edu.cn; konglt@gmail.com ------------------------------------------------------------------------- */ #include #include #include #include "fix_phonon.h" #include "fft3d_wrap.h" #include "atom.h" #include "compute.h" #include "domain.h" #include "force.h" #include "group.h" #include "lattice.h" #include "modify.h" #include "update.h" #include "citeme.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; #define INVOKED_SCALAR 1 #define INVOKED_VECTOR 2 #define MAXLINE 512 static const char cite_fix_phonon[] = "fix phonon command:\n\n" "@Article{Kong11,\n" " author = {L. T. Kong},\n" " title = {Phonon dispersion measured directly from molecular dynamics simulations},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2011,\n" " volume = 182,\n" " pages = {2201--2207}\n" "}\n\n"; /* ---------------------------------------------------------------------- */ FixPhonon::FixPhonon(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (lmp->citeme) lmp->citeme->add(cite_fix_phonon); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); if (narg < 8) error->all(FLERR,"Illegal fix phonon command: number of arguments < 8"); nevery = force->inumeric(FLERR, arg[3]); // Calculate this fix every n steps! if (nevery < 1) error->all(FLERR,"Illegal fix phonon command"); nfreq = force->inumeric(FLERR, arg[4]); // frequency to output result if (nfreq < 1) error->all(FLERR,"Illegal fix phonon command"); waitsteps = force->bnumeric(FLERR,arg[5]); // Wait this many timesteps before actually measuring if (waitsteps < 0) error->all(FLERR,"Illegal fix phonon command: waitsteps < 0 !"); int n = strlen(arg[6]) + 1; // map file mapfile = new char[n]; strcpy(mapfile, arg[6]); n = strlen(arg[7]) + 1; // prefix of output prefix = new char[n]; strcpy(prefix, arg[7]); logfile = new char[n+4]; sprintf(logfile,"%s.log",prefix); int sdim = sysdim = domain->dimension; int iarg = 8; nasr = 20; // other command line options while (iarg < narg){ if (strcmp(arg[iarg],"sysdim") == 0){ if (++iarg >= narg) error->all(FLERR,"Illegal fix phonon command: incomplete command line options."); sdim = force->inumeric(FLERR, arg[iarg]); if (sdim < 1) error->all(FLERR,"Illegal fix phonon command: sysdim should not be less than 1."); } else if (strcmp(arg[iarg],"nasr") == 0){ if (++iarg >= narg) error->all(FLERR,"Illegal fix phonon command: incomplete command line options."); nasr = force->inumeric(FLERR, arg[iarg]); } else { error->all(FLERR,"Illegal fix phonon command: unknown option read!"); } ++iarg; } // get the dimension of the simulation; 1D is possible by specifying the option of "sysdim 1" if (sdim < sysdim) sysdim = sdim; nasr = MAX(0, nasr); // get the total number of atoms in group and run min/max checks bigint ng = group->count(igroup); if (ng > MAXSMALLINT) error->all(FLERR,"Too many atoms for fix phonon"); if (ng < 1) error->all(FLERR,"No atom found for fix phonon!"); ngroup = static_cast(ng); // MPI gatherv related variables recvcnts = new int[nprocs]; displs = new int[nprocs]; // mapping index tag2surf.clear(); // clear map info surf2tag.clear(); // get the mapping between lattice indices and atom IDs readmap(); delete []mapfile; if (nucell == 1) nasr = MIN(1,nasr); // get the mass matrix for dynamic matrix getmass(); // create FFT and allocate memory for FFT // here the parallization is done on the x direction only nxlo = 0; int *nx_loc = new int [nprocs]; for (int i = 0; i < nprocs; ++i){ nx_loc[i] = nx / nprocs; if (i < nx%nprocs) ++nx_loc[i]; } for (int i = 0; i < me; ++i) nxlo += nx_loc[i]; nxhi = nxlo + nx_loc[me] - 1; mynpt = nx_loc[me] * ny * nz; mynq = mynpt; fft_dim = nucell * sysdim; fft_dim2 = fft_dim * fft_dim; fft_nsend = mynpt * fft_dim; fft_cnts = new int[nprocs]; fft_disp = new int[nprocs]; fft_disp[0] = 0; for (int i = 0; i < nprocs; ++i) fft_cnts[i] = nx_loc[i] * ny * nz * fft_dim; for (int i = 1; i < nprocs; ++i) fft_disp[i] = fft_disp[i-1] + fft_cnts[i-1]; delete []nx_loc; fft = new FFT3d(lmp,world,nz,ny,nx,0,nz-1,0,ny-1,nxlo,nxhi,0,nz-1,0,ny-1,nxlo,nxhi,0,0,&mysize,0); memory->create(fft_data, MAX(1,mynq)*2, "fix_phonon:fft_data"); // allocate variables; MAX(1,... is used because NULL buffer will result in error for MPI memory->create(RIloc,ngroup,(sysdim+1),"fix_phonon:RIloc"); memory->create(RIall,ngroup,(sysdim+1),"fix_phonon:RIall"); memory->create(Rsort,ngroup, sysdim, "fix_phonon:Rsort"); memory->create(Rnow, MAX(1,mynpt),fft_dim,"fix_phonon:Rnow"); memory->create(Rsum, MAX(1,mynpt),fft_dim,"fix_phonon:Rsum"); memory->create(basis,nucell, sysdim, "fix_phonon:basis"); // because of hermit, only nearly half of q points are stored memory->create(Rqnow,MAX(1,mynq),fft_dim, "fix_phonon:Rqnow"); memory->create(Rqsum,MAX(1,mynq),fft_dim2,"fix_phonon:Rqsum"); memory->create(Phi_q,MAX(1,mynq),fft_dim2,"fix_phonon:Phi_q"); // variable to collect all local Phi to root if (me == 0) memory->create(Phi_all,ntotal,fft_dim2,"fix_phonon:Phi_all"); else memory->create(Phi_all,1,1,"fix_phonon:Phi_all"); // output some information on the system to log file if (me == 0){ flog = fopen(logfile, "w"); if (flog == NULL) { char str[MAXLINE]; sprintf(str,"Can not open output file %s",logfile); error->one(FLERR,str); } fprintf(flog,"############################################################\n"); fprintf(flog,"# group name of the atoms under study : %s\n", group->names[igroup]); fprintf(flog,"# total number of atoms in the group : %d\n", ngroup); fprintf(flog,"# dimension of the system : %d D\n", sysdim); fprintf(flog,"# number of atoms per unit cell : %d\n", nucell); fprintf(flog,"# dimension of the FFT mesh : %d x %d x %d\n", nx, ny, nz); fprintf(flog,"# number of wait steps before measurement : " BIGINT_FORMAT "\n", waitsteps); fprintf(flog,"# frequency of the measurement : %d\n", nevery); fprintf(flog,"# output result after this many measurement: %d\n", nfreq); fprintf(flog,"# number of processors used by this run : %d\n", nprocs); fprintf(flog,"############################################################\n"); fprintf(flog,"# mapping information between lattice indices and atom id\n"); fprintf(flog,"# nx ny nz nucell\n"); fprintf(flog,"%d %d %d %d\n", nx, ny, nz, nucell); fprintf(flog,"# l1 l2 l3 k atom_id\n"); int ix, iy, iz, iu; for (idx = 0; idx < ngroup; ++idx){ itag = surf2tag[idx]; iu = idx%nucell; iz = (idx/nucell)%nz; iy = (idx/(nucell*nz))%ny; ix = (idx/(nucell*nz*ny))%nx; fprintf(flog,"%d %d %d %d " TAGINT_FORMAT "\n", ix, iy, iz, iu, itag); } fprintf(flog,"############################################################\n"); fflush(flog); } surf2tag.clear(); // default temperature is from thermo TempSum = new double[sysdim]; id_temp = new char[12]; strcpy(id_temp,"thermo_temp"); int icompute = modify->find_compute(id_temp); temperature = modify->compute[icompute]; inv_nTemp = 1./group->count(temperature->igroup); } // end of constructor /* ---------------------------------------------------------------------- */ void FixPhonon::post_run() { // compute and output final results if (ifreq > 0 && ifreq != nfreq) postprocess(); if (me == 0) fclose(flog); } /* ---------------------------------------------------------------------- */ FixPhonon::~FixPhonon() { // delete locally stored array memory->destroy(RIloc); memory->destroy(RIall); memory->destroy(Rsort); memory->destroy(Rnow); memory->destroy(Rsum); memory->destroy(basis); memory->destroy(Rqnow); memory->destroy(Rqsum); memory->destroy(Phi_q); memory->destroy(Phi_all); delete []recvcnts; delete []displs; delete []prefix; delete []logfile; delete []fft_cnts; delete []fft_disp; delete []id_temp; delete []TempSum; delete []M_inv_sqrt; delete []basetype; // destroy FFT delete fft; memory->sfree(fft_data); // clear map info tag2surf.clear(); surf2tag.clear(); } /* ---------------------------------------------------------------------- */ int FixPhonon::setmask() { int mask = 0; mask |= END_OF_STEP; return mask; } /* ---------------------------------------------------------------------- */ void FixPhonon::init() { // warn if more than one fix-phonon int count = 0; for (int i = 0; i < modify->nfix; ++i) if (strcmp(modify->fix[i]->style,"phonon") == 0) ++count; if (count > 1 && me == 0) error->warning(FLERR,"More than one fix phonon defined"); // just warn, but allowed. } /* ---------------------------------------------------------------------- */ void FixPhonon::setup(int flag) { // initialize accumulating variables for (int i = 0; i < sysdim; ++i) TempSum[i] = 0.; for (int i = 0; i < mynpt; ++i) for (int j = 0; j < fft_dim; ++j) Rsum[i][j] = 0.; for (int i =0; i < mynq; ++i) for (int j =0; j < fft_dim2; ++j) Rqsum[i][j] = std::complex (0.,0.); for (int i = 0; i < 6; ++i) hsum[i] = 0.; for (int i = 0; i < nucell; ++i) for (int j = 0; j < sysdim; ++j) basis[i][j] = 0.; neval = ifreq = 0; prev_nstep = update->ntimestep; } /* ---------------------------------------------------------------------- */ void FixPhonon::end_of_step() { if ( (update->ntimestep-prev_nstep) <= waitsteps) return; double **x = atom->x; int *mask = atom->mask; tagint *tag = atom->tag; imageint *image = atom->image; int nlocal = atom->nlocal; double *h = domain->h; int i,idim,jdim,ndim; double xcur[3]; // to get the current temperature if (!(temperature->invoked_flag & INVOKED_VECTOR)) temperature->compute_vector(); for (idim = 0; idim < sysdim; ++idim) TempSum[idim] += temperature->vector[idim]; // evaluate R(r) on local proc nfind = 0; for (i = 0; i < nlocal; ++i){ if (mask[i] & groupbit){ itag = tag[i]; idx = tag2surf[itag]; domain->unmap(x[i], image[i], xcur); for (idim = 0; idim < sysdim; ++idim) RIloc[nfind][idim] = xcur[idim]; RIloc[nfind++][sysdim] = static_cast(idx); } } // gather R(r) on local proc, then sort and redistribute to all procs for FFT nfind *= (sysdim+1); displs[0] = 0; for (i = 0; i < nprocs; ++i) recvcnts[i] = 0; MPI_Gather(&nfind,1,MPI_INT,recvcnts,1,MPI_INT,0,world); for (i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1]; MPI_Gatherv(RIloc[0],nfind,MPI_DOUBLE,RIall[0],recvcnts,displs,MPI_DOUBLE,0,world); if (me == 0){ for (i = 0; i < ngroup; ++i){ idx = static_cast(RIall[i][sysdim]); for (idim = 0; idim < sysdim; ++idim) Rsort[idx][idim] = RIall[i][idim]; } } MPI_Scatterv(Rsort[0],fft_cnts,fft_disp, MPI_DOUBLE, Rnow[0], fft_nsend, MPI_DOUBLE,0,world); // get Rsum for (idx = 0; idx < mynpt; ++idx) for (idim = 0; idim < fft_dim; ++idim) Rsum[idx][idim] += Rnow[idx][idim]; // FFT R(r) to get R(q) for (idim = 0; idim < fft_dim; ++idim){ int m = 0; for (idx = 0; idx < mynpt; ++idx){ fft_data[m++] = static_cast(Rnow[idx][idim]); fft_data[m++] = static_cast(0.); } fft->compute(fft_data, fft_data, -1); m = 0; for (idq = 0; idq < mynq; ++idq){ Rqnow[idq][idim] = std::complex(static_cast(fft_data[m]), static_cast(fft_data[m+1])); m += 2; } } // to get sum(R(q).R(q)*) for (idq = 0; idq < mynq; ++idq){ ndim = 0; for (idim = 0; idim < fft_dim; ++idim) for (jdim = 0; jdim < fft_dim; ++jdim) Rqsum[idq][ndim++] += Rqnow[idq][idim] * std::conj(Rqnow[idq][jdim]); } // get basis info if (fft_dim > sysdim){ double dist2orig[3]; for (idx = 0; idx < mynpt; ++idx){ ndim = sysdim; for (i = 1; i < nucell; ++i){ for (idim = 0; idim < sysdim; ++idim) dist2orig[idim] = Rnow[idx][ndim++] - Rnow[idx][idim]; domain->minimum_image(dist2orig); for (idim = 0; idim < sysdim; ++idim) basis[i][idim] += dist2orig[idim]; } } } // get lattice vector info for (int i = 0; i < 6; ++i) hsum[i] += h[i]; // increment counter ++neval; // compute and output Phi_q after every nfreq evaluations if (++ifreq == nfreq) postprocess(); } // end of end_of_step() /* ---------------------------------------------------------------------- */ double FixPhonon::memory_usage() { double bytes = sizeof(double)*2*mynq + sizeof(std::map)*2*ngroup + sizeof(double)*(ngroup*(3*sysdim+2)+mynpt*fft_dim*2) + sizeof(std::complex)*MAX(1,mynq)*fft_dim *(1+2*fft_dim) + sizeof(std::complex)*ntotal*fft_dim2 + sizeof(int) * nprocs * 4; return bytes; } /* ---------------------------------------------------------------------- */ int FixPhonon::modify_param(int narg, char **arg) { if (strcmp(arg[0],"temp") == 0) { if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); delete [] id_temp; int n = strlen(arg[1]) + 1; id_temp = new char[n]; strcpy(id_temp,arg[1]); int icompute = modify->find_compute(id_temp); if (icompute < 0) error->all(FLERR,"Could not find fix_modify temp ID"); temperature = modify->compute[icompute]; if (temperature->tempflag == 0) error->all(FLERR,"Fix_modify temp ID does not compute temperature"); inv_nTemp = 1.0/group->count(temperature->igroup); return 2; } return 0; } /* ---------------------------------------------------------------------- * private method, to get the mass matrix for dynamic matrix * --------------------------------------------------------------------*/ void FixPhonon::getmass() { int nlocal = atom->nlocal; int *mask = atom->mask; tagint *tag = atom->tag; int *type = atom->type; double *rmass = atom->rmass; double *mass = atom->mass; double *mass_one, *mass_all; double *type_one, *type_all; mass_one = new double[nucell]; mass_all = new double[nucell]; type_one = new double[nucell]; type_all = new double[nucell]; for (int i = 0; i < nucell; ++i) mass_one[i] = type_one[i] = 0.; if (rmass){ for (int i = 0; i < nlocal; ++i){ if (mask[i] & groupbit){ itag = tag[i]; idx = tag2surf[itag]; int iu = idx%nucell; mass_one[iu] += rmass[i]; type_one[iu] += double(type[i]); } } } else { for (int i = 0; i < nlocal; ++i){ if (mask[i] & groupbit){ itag = tag[i]; idx = tag2surf[itag]; int iu = idx%nucell; mass_one[iu] += mass[type[i]]; type_one[iu] += double(type[i]); } } } MPI_Allreduce(mass_one,mass_all,nucell,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(type_one,type_all,nucell,MPI_DOUBLE,MPI_SUM,world); M_inv_sqrt = new double[nucell]; basetype = new int[nucell]; double inv_total = 1./double(ntotal); for (int i = 0; i < nucell; ++i){ mass_all[i] *= inv_total; M_inv_sqrt[i] = sqrt(1./mass_all[i]); basetype[i] = int(type_all[i]*inv_total); } delete []mass_one; delete []mass_all; delete []type_one; delete []type_all; } /* ---------------------------------------------------------------------- * private method, to read the mapping info from file * --------------------------------------------------------------------*/ void FixPhonon::readmap() { int info = 0; // auto-generate mapfile for "cluster" (gamma only system) if (strcmp(mapfile, "GAMMA") == 0){ nx = ny = nz = ntotal = 1; nucell = ngroup; tagint *tag_loc, *tag_all; memory->create(tag_loc,ngroup,"fix_phonon:tag_loc"); memory->create(tag_all,ngroup,"fix_phonon:tag_all"); // get atom IDs on local proc int nfind = 0; for (int i = 0; i < atom->nlocal; ++i){ if (atom->mask[i] & groupbit) tag_loc[nfind++] = atom->tag[i]; } // gather IDs on local proc displs[0] = 0; for (int i = 0; i < nprocs; ++i) recvcnts[i] = 0; MPI_Allgather(&nfind,1,MPI_INT,recvcnts,1,MPI_INT,world); for (int i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1]; MPI_Allgatherv(tag_loc,nfind,MPI_LMP_TAGINT,tag_all,recvcnts,displs,MPI_LMP_TAGINT,world); for (int i = 0; i < ngroup; ++i){ itag = tag_all[i]; tag2surf[itag] = i; surf2tag[i] = itag; } memory->destroy(tag_loc); memory->destroy(tag_all); return; } // read from map file for others char line[MAXLINE]; FILE *fp = fopen(mapfile, "r"); if (fp == NULL){ sprintf(line,"Cannot open input map file %s", mapfile); error->all(FLERR,line); } if (fgets(line,MAXLINE,fp) == NULL) error->all(FLERR,"Error while reading header of mapping file!"); nx = force->inumeric(FLERR, strtok(line, " \n\t\r\f")); ny = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); nz = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); nucell = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); ntotal = nx*ny*nz; if (ntotal*nucell != ngroup) error->all(FLERR,"FFT mesh and number of atoms in group mismatch!"); // second line of mapfile is comment if (fgets(line,MAXLINE,fp) == NULL) error->all(FLERR,"Error while reading comment of mapping file!"); int ix, iy, iz, iu; // the remaining lines carry the mapping info for (int i = 0; i < ngroup; ++i){ if (fgets(line,MAXLINE,fp) == NULL) {info = 1; break;} ix = force->inumeric(FLERR, strtok(line, " \n\t\r\f")); iy = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); iz = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); iu = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); itag = force->inumeric(FLERR, strtok(NULL, " \n\t\r\f")); // check if index is in correct range if (ix < 0 || ix >= nx || iy < 0 || iy >= ny || iz < 0 || iz >= nz || iu < 0 || iu >= nucell) {info = 2; break;} // 1 <= itag <= natoms if (itag < 1 || itag > static_cast(atom->natoms)) {info = 3; break;} idx = ((ix*ny+iy)*nz+iz)*nucell + iu; tag2surf[itag] = idx; surf2tag[idx] = itag; } fclose(fp); if (tag2surf.size() != surf2tag.size() || tag2surf.size() != static_cast(ngroup) ) error->all(FLERR,"The mapping is incomplete!"); if (info) error->all(FLERR,"Error while reading mapping file!"); // check the correctness of mapping int *mask = atom->mask; tagint *tag = atom->tag; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; ++i) { if (mask[i] & groupbit){ itag = tag[i]; idx = tag2surf[itag]; if (itag != surf2tag[idx]) error->one(FLERR,"The mapping info read is incorrect!"); } } } /* ---------------------------------------------------------------------- * private method, to output the force constant matrix * --------------------------------------------------------------------*/ void FixPhonon::postprocess( ) { if (neval < 1) return; ifreq = 0; int idim, jdim, ndim; double inv_neval = 1. /double(neval); // to get for (idq = 0; idq < mynq; ++idq) for (idim = 0; idim < fft_dim2; ++idim) Phi_q[idq][idim] = Rqsum[idq][idim] * inv_neval; // to get for (idx = 0; idx < mynpt; ++idx) for (idim = 0; idim < fft_dim; ++idim) Rnow[idx][idim] = Rsum[idx][idim] * inv_neval; // to get q for (idim = 0; idim < fft_dim; ++idim){ int m = 0; for (idx = 0; idx < mynpt; ++idx){ fft_data[m++] = static_cast(Rnow[idx][idim]); fft_data[m++] = static_cast(0.); } fft->compute(fft_data,fft_data,-1); m = 0; for (idq = 0; idq < mynq; ++idq){ Rqnow[idq][idim] = std::complex(static_cast(fft_data[m]), static_cast(fft_data[m+1])); m += 2; } } // to get G(q) = - q.q for (idq = 0; idq < mynq; ++idq){ ndim = 0; for (idim = 0; idim < fft_dim; ++idim) for (jdim = 0; jdim < fft_dim; ++jdim) Phi_q[idq][ndim++] -= Rqnow[idq][idim] * std::conj(Rqnow[idq][jdim]); } // to get Phi = KT.G^-1; normalization of FFTW data is done here double boltz = force->boltz, kbtsqrt[sysdim], TempAve = 0.; double TempFac = inv_neval * inv_nTemp; double NormFac = TempFac * double(ntotal); for (idim = 0; idim < sysdim; ++idim){ kbtsqrt[idim] = sqrt(TempSum[idim] * NormFac); TempAve += TempSum[idim] * TempFac; } TempAve /= sysdim*boltz; for (idq = 0; idq < mynq; ++idq){ GaussJordan(fft_dim, Phi_q[idq]); ndim =0; for (idim = 0; idim < fft_dim; ++idim) for (jdim = 0; jdim < fft_dim; ++jdim) Phi_q[idq][ndim++] *= kbtsqrt[idim%sysdim]*kbtsqrt[jdim%sysdim]; } // to collect all local Phi_q to root displs[0]=0; for (int i = 0; i < nprocs; ++i) recvcnts[i] = fft_cnts[i]*fft_dim*2; for (int i = 1; i < nprocs; ++i) displs[i] = displs[i-1] + recvcnts[i-1]; MPI_Gatherv(Phi_q[0],mynq*fft_dim2*2,MPI_DOUBLE,Phi_all[0],recvcnts,displs,MPI_DOUBLE,0,world); // to collect all basis info and averaged it on root double basis_root[fft_dim]; if (fft_dim > sysdim) MPI_Reduce(&basis[1][0], &basis_root[sysdim], fft_dim-sysdim, MPI_DOUBLE, MPI_SUM, 0, world); if (me == 0){ // output dynamic matrix by root // get basis info for (idim = 0; idim < sysdim; ++idim) basis_root[idim] = 0.; for (idim = sysdim; idim < fft_dim; ++idim) basis_root[idim] /= double(ntotal)*double(neval); // get unit cell base vector info; might be incorrect if MD pbc and FixPhonon pbc mismatch. double basevec[9]; basevec[1] = basevec[2] = basevec[5] = 0.; basevec[0] = hsum[0] * inv_neval / double(nx); basevec[4] = hsum[1] * inv_neval / double(ny); basevec[8] = hsum[2] * inv_neval / double(nz); basevec[7] = hsum[3] * inv_neval / double(nz); basevec[6] = hsum[4] * inv_neval / double(nz); basevec[3] = hsum[5] * inv_neval / double(ny); // write binary file, in fact, it is the force constants matrix that is written // Enforcement of ASR and the conversion of dynamical matrix is done in the postprocessing code char fname[MAXLINE]; sprintf(fname,"%s.bin." BIGINT_FORMAT,prefix,update->ntimestep); FILE *fp_bin = fopen(fname,"wb"); fwrite(&sysdim, sizeof(int), 1, fp_bin); fwrite(&nx, sizeof(int), 1, fp_bin); fwrite(&ny, sizeof(int), 1, fp_bin); fwrite(&nz, sizeof(int), 1, fp_bin); fwrite(&nucell, sizeof(int), 1, fp_bin); fwrite(&boltz, sizeof(double), 1, fp_bin); fwrite(Phi_all[0],sizeof(double),ntotal*fft_dim2*2,fp_bin); fwrite(&TempAve, sizeof(double),1, fp_bin); fwrite(&basevec[0], sizeof(double),9, fp_bin); fwrite(&basis_root[0],sizeof(double),fft_dim,fp_bin); fwrite(basetype, sizeof(int), nucell, fp_bin); fwrite(M_inv_sqrt, sizeof(double),nucell, fp_bin); fclose(fp_bin); // write log file, here however, it is the dynamical matrix that is written fprintf(flog,"############################################################\n"); fprintf(flog,"# Current time step : " BIGINT_FORMAT "\n", update->ntimestep); fprintf(flog,"# Total number of measurements : %d\n", neval); fprintf(flog,"# Average temperature of the measurement : %lg\n", TempAve); fprintf(flog,"# Boltzmann constant under current units : %lg\n", boltz); fprintf(flog,"# basis vector A1 = [%lg %lg %lg]\n", basevec[0], basevec[1], basevec[2]); fprintf(flog,"# basis vector A2 = [%lg %lg %lg]\n", basevec[3], basevec[4], basevec[5]); fprintf(flog,"# basis vector A3 = [%lg %lg %lg]\n", basevec[6], basevec[7], basevec[8]); fprintf(flog,"############################################################\n"); fprintf(flog,"# qx\t qy \t qz \t\t Phi(q)\n"); EnforceASR(); // to get D = 1/M x Phi for (idq = 0; idq < ntotal; ++idq){ ndim =0; for (idim = 0; idim < fft_dim; ++idim) for (jdim = 0; jdim < fft_dim; ++jdim) Phi_all[idq][ndim++] *= M_inv_sqrt[idim/sysdim]*M_inv_sqrt[jdim/sysdim]; } idq =0; for (int ix = 0; ix < nx; ++ix){ double qx = double(ix)/double(nx); for (int iy = 0; iy < ny; ++iy){ double qy = double(iy)/double(ny); for (int iz = 0; iz < nz; ++iz){ double qz = double(iz)/double(nz); fprintf(flog,"%lg %lg %lg", qx, qy, qz); for (idim = 0; idim < fft_dim2; ++idim) fprintf(flog, " %lg %lg", std::real(Phi_all[idq][idim]), std::imag(Phi_all[idq][idim])); fprintf(flog, "\n"); ++idq; } } } fflush(flog); } } // end of postprocess /* ---------------------------------------------------------------------- * private method, to get the inverse of a complex matrix by means of * Gaussian-Jordan Elimination with full pivoting; square matrix required. * * Adapted from the Numerical Recipes in Fortran. * --------------------------------------------------------------------*/ void FixPhonon::GaussJordan(int n, std::complex *Mat) { int i,icol,irow,j,k,l,ll,idr,idc; int *indxc,*indxr,*ipiv; double big, nmjk; std::complex dum, pivinv; indxc = new int[n]; indxr = new int[n]; ipiv = new int[n]; for (i = 0; i < n; ++i) ipiv[i] = 0; for (i = 0; i < n; ++i){ big = 0.; for (j = 0; j < n; ++j){ if (ipiv[j] != 1){ for (k = 0; k < n; ++k){ if (ipiv[k] == 0){ idr = j*n+k; nmjk = norm(Mat[idr]); if (nmjk >= big){ big = nmjk; irow = j; icol = k; } } else if (ipiv[k] > 1) error->one(FLERR,"Singular matrix in complex GaussJordan!"); } } } ipiv[icol] += 1; if (irow != icol){ for (l = 0; l < n; ++l){ idr = irow*n+l; idc = icol*n+l; dum = Mat[idr]; Mat[idr] = Mat[idc]; Mat[idc] = dum; } } indxr[i] = irow; indxc[i] = icol; idr = icol*n+icol; if (Mat[idr] == std::complex(0.,0.)) error->one(FLERR,"Singular matrix in complex GaussJordan!"); pivinv = 1./ Mat[idr]; Mat[idr] = std::complex(1.,0.); idr = icol*n; for (l = 0; l < n; ++l) Mat[idr+l] *= pivinv; for (ll = 0; ll < n; ++ll){ if (ll != icol){ idc = ll*n + icol; dum = Mat[idc]; Mat[idc] = 0.; idc -= icol; for (l = 0; l < n; ++l) Mat[idc+l] -= Mat[idr+l]*dum; } } } for (l = n-1; l >= 0; --l){ int rl = indxr[l]; int cl = indxc[l]; if (rl != cl){ for (k = 0; k < n; ++k){ idr = k*n + rl; idc = k*n + cl; dum = Mat[idr]; Mat[idr] = Mat[idc]; Mat[idc] = dum; } } } delete []indxr; delete []indxc; delete []ipiv; } /* ---------------------------------------------------------------------- * private method, to apply the acoustic sum rule on force constant matrix * at gamma point. Should be executed on root only. * --------------------------------------------------------------------*/ void FixPhonon::EnforceASR() { if (nasr < 1) return; for (int iit = 0; iit < nasr; ++iit){ // simple ASR; the resultant matrix might not be symmetric for (int a = 0; a < sysdim; ++a) for (int b = 0; b < sysdim; ++b){ for (int k = 0; k < nucell; ++k){ double sum = 0.; for (int kp = 0; kp < nucell; ++kp){ int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b; sum += std::real(Phi_all[0][idx]); } sum /= double(nucell); for (int kp = 0; kp < nucell; ++kp){ int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b; Phi_all[0][idx] -= sum; } } } // symmetrize for (int k = 0; k < nucell; ++k) for (int kp = k; kp < nucell; ++kp){ double csum = 0.; for (int a = 0; a < sysdim; ++a) for (int b = 0; b < sysdim; ++b){ int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b; int jdx = (kp*sysdim+b)*fft_dim + k*sysdim + a; csum = (std::real(Phi_all[0][idx])+std::real(Phi_all[0][jdx]))*0.5; Phi_all[0][idx] = std::complex(csum, std::imag(Phi_all[0][idx])); Phi_all[0][jdx] = std::complex(csum, std::imag(Phi_all[0][jdx])); } } } // symmetric ASR for (int a = 0; a < sysdim; ++a) for (int b = 0; b < sysdim; ++b){ for (int k = 0; k < nucell; ++k){ double sum = 0.; for (int kp = 0; kp < nucell; ++kp){ int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b; sum += std::real(Phi_all[0][idx]); } sum /= double(nucell-k); for (int kp = k; kp < nucell; ++kp){ int idx = (k*sysdim+a)*fft_dim + kp*sysdim + b; int jdx = (kp*sysdim+b)*fft_dim + k*sysdim + a; Phi_all[0][idx] -= sum; Phi_all[0][jdx] = std::complex(std::real(Phi_all[0][idx]), std::imag(Phi_all[0][jdx])); } } } } /* --------------------------------------------------------------------*/ diff --git a/src/USER-SMD/fix_smd_wall_surface.cpp b/src/USER-SMD/fix_smd_wall_surface.cpp index 082723426..94cac6e20 100644 --- a/src/USER-SMD/fix_smd_wall_surface.cpp +++ b/src/USER-SMD/fix_smd_wall_surface.cpp @@ -1,509 +1,509 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Mike Parks (SNL), Ezwanur Rahman, J.T. Foster (UTSA) - ------------------------------------------------------------------------- */ + Contributing authors: Mike Parks (SNL), Ezwanur Rahman, J.T. Foster (UTSA) +------------------------------------------------------------------------- */ #include #include "fix_smd_wall_surface.h" #include "atom.h" #include "domain.h" #include "force.h" #include "comm.h" #include "update.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "pair.h" #include "lattice.h" #include "memory.h" #include "error.h" #include #include #include "atom_vec.h" #include using namespace LAMMPS_NS; using namespace FixConst; using namespace Eigen; using namespace std; #define DELTA 16384 #define EPSILON 1.0e-6 enum { LAYOUT_UNIFORM, LAYOUT_NONUNIFORM, LAYOUT_TILED }; // several files /* ---------------------------------------------------------------------- */ FixSMDWallSurface::FixSMDWallSurface(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { restart_global = 0; restart_peratom = 0; first = 1; //atom->add_callback(0); //atom->add_callback(1); if (narg != 6) error->all(FLERR, "Illegal number of arguments for fix smd/wall_surface"); filename = strdup(arg[3]); wall_particle_type = force->inumeric(FLERR, arg[4]); wall_molecule_id = force->inumeric(FLERR, arg[5]); if (wall_molecule_id < 65535) { error->one(FLERR, "wall molcule id must be >= 65535\n"); } if (comm->me == 0) { printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n"); printf("fix smd/wall_surface reads trianglulated surface from file: %s\n", filename); printf("fix smd/wall_surface has particle type %d \n", wall_particle_type); printf("fix smd/wall_surface has molecule id %d \n", wall_molecule_id); printf(">>========>>========>>========>>========>>========>>========>>========>>========\n"); } } /* ---------------------------------------------------------------------- */ FixSMDWallSurface::~FixSMDWallSurface() { free(filename); filename = NULL; // unregister this fix so atom class doesn't invoke it any more //atom->delete_callback(id, 0); //atom->delete_callback(id, 1); } /* ---------------------------------------------------------------------- */ int FixSMDWallSurface::setmask() { int mask = 0; return mask; } /* ---------------------------------------------------------------------- */ void FixSMDWallSurface::init() { if (!first) return; } /* ---------------------------------------------------------------------- For minimization: setup as with dynamics ------------------------------------------------------------------------- */ void FixSMDWallSurface::min_setup(int vflag) { setup(vflag); } /* ---------------------------------------------------------------------- create initial list of neighbor partners via call to neighbor->build() must be done in setup (not init) since fix init comes before neigh init ------------------------------------------------------------------------- */ void FixSMDWallSurface::setup(int vflag) { if (!first) return; first = 0; // set bounds for my proc // if periodic and I am lo/hi proc, adjust bounds by EPSILON // insures all data atoms will be owned even with round-off int triclinic = domain->triclinic; double epsilon[3]; if (triclinic) epsilon[0] = epsilon[1] = epsilon[2] = EPSILON; else { epsilon[0] = domain->prd[0] * EPSILON; epsilon[1] = domain->prd[1] * EPSILON; epsilon[2] = domain->prd[2] * EPSILON; } if (triclinic == 0) { sublo[0] = domain->sublo[0]; subhi[0] = domain->subhi[0]; sublo[1] = domain->sublo[1]; subhi[1] = domain->subhi[1]; sublo[2] = domain->sublo[2]; subhi[2] = domain->subhi[2]; } else { sublo[0] = domain->sublo_lamda[0]; subhi[0] = domain->subhi_lamda[0]; sublo[1] = domain->sublo_lamda[1]; subhi[1] = domain->subhi_lamda[1]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } if (comm->layout != LAYOUT_TILED) { if (domain->xperiodic) { if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (comm->myloc[0] == comm->procgrid[0] - 1) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (comm->myloc[1] == comm->procgrid[1] - 1) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (comm->myloc[2] == comm->procgrid[2] - 1) subhi[2] += epsilon[2]; } } else { if (domain->xperiodic) { if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; } } read_triangles(0); } /* ---------------------------------------------------------------------- function to determine number of values in a text line ------------------------------------------------------------------------- */ int FixSMDWallSurface::count_words(const char *line) { int n = strlen(line) + 1; char *copy; memory->create(copy, n, "atom:copy"); strcpy(copy, line); char *ptr; if ((ptr = strchr(copy, '#'))) *ptr = '\0'; if (strtok(copy, " \t\n\r\f") == NULL) { memory->destroy(copy); return 0; } n = 1; while (strtok(NULL, " \t\n\r\f")) n++; memory->destroy(copy); return n; } /* ---------------------------------------------------------------------- size of atom nlocal's restart data ------------------------------------------------------------------------- */ void FixSMDWallSurface::read_triangles(int pass) { double coord[3]; int nlocal_previous = atom->nlocal; int ilocal = nlocal_previous; int m; int me; bigint natoms_previous = atom->natoms; Vector3d *vert; vert = new Vector3d[3]; Vector3d normal, center; FILE *fp = fopen(filename, "r"); if (fp == NULL) { char str[128]; sprintf(str, "Cannot open file %s", filename); error->one(FLERR, str); } MPI_Comm_rank(world, &me); if (me == 0) { if (screen) { if (pass == 0) { printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n"); fprintf(screen, " scanning triangle pairs ...\n"); } else { fprintf(screen, " reading triangle pairs ...\n"); } } if (logfile) { if (pass == 0) { fprintf(logfile, " scanning triangle pairs ...\n"); } else { fprintf(logfile, " reading triangle pairs ...\n"); } } } char str[128]; char line[256]; char *retpointer; char **values; int nwords; // read STL solid name retpointer = fgets(line, sizeof(line), fp); if (retpointer == NULL) { sprintf(str, "error reading number of triangle pairs"); error->one(FLERR, str); } nwords = count_words(line); if (nwords < 1) { sprintf(str, "first line of file is incorrect"); error->one(FLERR, str); } // values = new char*[nwords]; // values[0] = strtok(line, " \t\n\r\f"); // if (values[0] == NULL) // error->all(FLERR, "Incorrect atom format in data file"); // for (m = 1; m < nwords; m++) { // values[m] = strtok(NULL, " \t\n\r\f"); // if (values[m] == NULL) // error->all(FLERR, "Incorrect atom format in data file"); // } // delete[] values; // // if (comm->me == 0) { // cout << "STL file contains solid body with name: " << values[1] << endl; // } // iterate over STL facets util end of body is reached while (fgets(line, sizeof(line), fp)) { // read a line, should be the facet line // evaluate facet line nwords = count_words(line); if (nwords != 5) { //sprintf(str, "found end solid line"); //error->message(FLERR, str); break; } else { // should be facet line } values = new char*[nwords]; values[0] = strtok(line, " \t\n\r\f"); if (values[0] == NULL) error->all(FLERR, "Incorrect atom format in data file"); for (m = 1; m < nwords; m++) { values[m] = strtok(NULL, " \t\n\r\f"); if (values[m] == NULL) error->all(FLERR, "Incorrect atom format in data file"); } normal << force->numeric(FLERR, values[2]), force->numeric(FLERR, values[3]), force->numeric(FLERR, values[4]); //cout << "normal is " << normal << endl; delete[] values; // read outer loop line retpointer = fgets(line, sizeof(line), fp); if (retpointer == NULL) { sprintf(str, "error reading outer loop"); error->one(FLERR, str); } nwords = count_words(line); if (nwords != 2) { sprintf(str, "error reading outer loop"); error->one(FLERR, str); } // read vertex lines for (int k = 0; k < 3; k++) { retpointer = fgets(line, sizeof(line), fp); if (retpointer == NULL) { sprintf(str, "error reading vertex line"); error->one(FLERR, str); } nwords = count_words(line); if (nwords != 4) { sprintf(str, "error reading vertex line"); error->one(FLERR, str); } values = new char*[nwords]; values[0] = strtok(line, " \t\n\r\f"); if (values[0] == NULL) error->all(FLERR, "Incorrect vertex line"); for (m = 1; m < nwords; m++) { values[m] = strtok(NULL, " \t\n\r\f"); if (values[m] == NULL) error->all(FLERR, "Incorrect vertex line"); } vert[k] << force->numeric(FLERR, values[1]), force->numeric(FLERR, values[2]), force->numeric(FLERR, values[3]); //cout << "vertex is " << vert[k] << endl; //printf("%s %s %s\n", values[1], values[2], values[3]); delete[] values; //exit(1); } // read end loop line retpointer = fgets(line, sizeof(line), fp); if (retpointer == NULL) { sprintf(str, "error reading endloop"); error->one(FLERR, str); } nwords = count_words(line); if (nwords != 1) { sprintf(str, "error reading endloop"); error->one(FLERR, str); } // read end facet line retpointer = fgets(line, sizeof(line), fp); if (retpointer == NULL) { sprintf(str, "error reading endfacet"); error->one(FLERR, str); } nwords = count_words(line); if (nwords != 1) { sprintf(str, "error reading endfacet"); error->one(FLERR, str); } // now we have a normal and three vertices ... proceed with adding triangle center = (vert[0] + vert[1] + vert[2]) / 3.0; // cout << "center is " << center << endl; double r1 = (center - vert[0]).norm(); double r2 = (center - vert[1]).norm(); double r3 = (center - vert[2]).norm(); double r = MAX(r1, r2); r = MAX(r, r3); /* * if atom/molecule is in my subbox, create it * ... use x0 to hold triangle normal. * ... use smd_data_9 to hold the three vertices * ... use x to hold triangle center * ... radius is the mmaximal distance from triangle center to all vertices */ // printf("coord: %f %f %f\n", coord[0], coord[1], coord[2]); // printf("sublo: %f %f %f\n", sublo[0], sublo[1], sublo[2]); // printf("subhi: %f %f %f\n", subhi[0], subhi[1], subhi[2]); //printf("ilocal = %d\n", ilocal); if (center(0) >= sublo[0] && center(0) < subhi[0] && center(1) >= sublo[1] && center(1) < subhi[1] && center(2) >= sublo[2] && center(2) < subhi[2]) { //printf("******* KERATIN nlocal=%d ***\n", nlocal); coord[0] = center(0); coord[1] = center(1); coord[2] = center(2); atom->avec->create_atom(wall_particle_type, coord); /* * need to initialize pointers to atom vec arrays here, because they could have changed * due to calling grow() in create_atoms() above; */ tagint *mol = atom->molecule; int *type = atom->type; double *radius = atom->radius; double *contact_radius = atom->contact_radius; double **smd_data_9 = atom->smd_data_9; double **x0 = atom->x0; radius[ilocal] = r; //ilocal; contact_radius[ilocal] = r; //ilocal; mol[ilocal] = wall_molecule_id; type[ilocal] = wall_particle_type; x0[ilocal][0] = normal(0); x0[ilocal][1] = normal(1); x0[ilocal][2] = normal(2); smd_data_9[ilocal][0] = vert[0](0); smd_data_9[ilocal][1] = vert[0](1); smd_data_9[ilocal][2] = vert[0](2); smd_data_9[ilocal][3] = vert[1](0); smd_data_9[ilocal][4] = vert[1](1); smd_data_9[ilocal][5] = vert[1](2); smd_data_9[ilocal][6] = vert[2](0); smd_data_9[ilocal][7] = vert[2](1); smd_data_9[ilocal][8] = vert[2](2); ilocal++; } } // set new total # of atoms and error check bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal, &atom->natoms, 1, MPI_LMP_BIGINT, MPI_SUM, world); if (atom->natoms < 0 || atom->natoms >= MAXBIGINT) error->all(FLERR, "Too many total atoms"); // add IDs for newly created atoms // check that atom IDs are valid if (atom->tag_enable) atom->tag_extend(); atom->tag_check(); // create global mapping of atoms // zero nghost in case are adding new atoms to existing atoms if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } // print status if (comm->me == 0) { if (screen) { printf("... fix smd/wall_surface finished reading triangulated surface\n"); fprintf(screen, "fix smd/wall_surface created " BIGINT_FORMAT " atoms\n", atom->natoms - natoms_previous); printf(">>========>>========>>========>>========>>========>>========>>========>>========\n"); } if (logfile) { fprintf(logfile, "... fix smd/wall_surface finished reading triangulated surface\n"); fprintf(logfile, "fix smd/wall_surface created " BIGINT_FORMAT " atoms\n", atom->natoms - natoms_previous); fprintf(logfile, ">>========>>========>>========>>========>>========>>========>>========>>========\n"); } } delete[] vert; fclose(fp); } diff --git a/src/USER-SMD/pair_smd_hertz.cpp b/src/USER-SMD/pair_smd_hertz.cpp index 3c7d7cffb..76143d639 100644 --- a/src/USER-SMD/pair_smd_hertz.cpp +++ b/src/USER-SMD/pair_smd_hertz.cpp @@ -1,385 +1,385 @@ /* ---------------------------------------------------------------------- * * *** Smooth Mach Dynamics *** * * This file is part of the USER-SMD package for LAMMPS. * Copyright (2014) Georg C. Ganzenmueller, georg.ganzenmueller@emi.fhg.de * Fraunhofer Ernst-Mach Institute for High-Speed Dynamics, EMI, * Eckerstrasse 4, D-79104 Freiburg i.Br, Germany. * * ----------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mike Parks (SNL) - ------------------------------------------------------------------------- */ + Contributing author: Mike Parks (SNL) +------------------------------------------------------------------------- */ #include #include #include #include #include "pair_smd_hertz.h" #include "atom.h" #include "domain.h" #include "force.h" #include "update.h" #include "modify.h" #include "fix.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define SQRT2 1.414213562e0 /* ---------------------------------------------------------------------- */ PairHertz::PairHertz(LAMMPS *lmp) : Pair(lmp) { onerad_dynamic = onerad_frozen = maxrad_dynamic = maxrad_frozen = NULL; bulkmodulus = NULL; kn = NULL; scale = 1.0; } /* ---------------------------------------------------------------------- */ PairHertz::~PairHertz() { if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(bulkmodulus); memory->destroy(kn); delete[] onerad_dynamic; delete[] onerad_frozen; delete[] maxrad_dynamic; delete[] maxrad_frozen; } } /* ---------------------------------------------------------------------- */ void PairHertz::compute(int eflag, int vflag) { int i, j, ii, jj, inum, jnum, itype, jtype; double xtmp, ytmp, ztmp, delx, dely, delz; double rsq, r, evdwl, fpair; int *ilist, *jlist, *numneigh, **firstneigh; double rcut, r_geom, delta, ri, rj, dt_crit; double *rmass = atom->rmass; evdwl = 0.0; if (eflag || vflag) ev_setup(eflag, vflag); else evflag = vflag_fdotr = 0; double **f = atom->f; double **x = atom->x; double **x0 = atom->x0; int *type = atom->type; int nlocal = atom->nlocal; double *radius = atom->contact_radius; double *sph_radius = atom->radius; double rcutSq; double delx0, dely0, delz0, rSq0, sphCut; int newton_pair = force->newton_pair; int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; stable_time_increment = 1.0e22; // loop over neighbors of my atoms for (ii = 0; ii < inum; ii++) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; ri = scale * radius[i]; jlist = firstneigh[i]; jnum = numneigh[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; jtype = type[j]; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx * delx + dely * dely + delz * delz; rj = scale * radius[j]; rcut = ri + rj; rcutSq = rcut * rcut; if (rsq < rcutSq) { /* * self contact option: * if pair of particles was initially close enough to interact via a bulk continuum mechanism (e.g. SPH), exclude pair from contact forces. * this approach should work well if no updates of the reference configuration are performed. */ if (itype == jtype) { delx0 = x0[j][0] - x0[i][0]; dely0 = x0[j][1] - x0[i][1]; delz0 = x0[j][2] - x0[i][2]; if (periodic) { domain->minimum_image(delx0, dely0, delz0); } rSq0 = delx0 * delx0 + dely0 * dely0 + delz0 * delz0; // initial distance sphCut = sph_radius[i] + sph_radius[j]; if (rSq0 < sphCut * sphCut) { rcut = 0.5 * rcut; rcutSq = rcut * rcut; if (rsq > rcutSq) { continue; } } } r = sqrt(rsq); //printf("hertz interaction, r=%f, cut=%f, h=%f\n", r, rcut, sqrt(rSq0)); // Hertzian short-range forces delta = rcut - r; // overlap distance r_geom = ri * rj / rcut; //assuming poisson ratio = 1/4 for 3d fpair = 1.066666667e0 * bulkmodulus[itype][jtype] * delta * sqrt(delta * r_geom); // units: N evdwl = fpair * 0.4e0 * delta; // GCG 25 April: this expression conserves total energy dt_crit = 3.14 * sqrt(0.5 * (rmass[i] + rmass[j]) / (fpair / delta)); stable_time_increment = MIN(stable_time_increment, dt_crit); if (r > 2.0e-16) { fpair /= r; // divide by r and multiply with non-normalized distance vector } else { fpair = 0.0; } /* * contact viscosity -- needs to be done, see GRANULAR package for normal & shear damping * for now: no damping and thus no viscous energy deltaE */ if (evflag) { ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, delx, dely, delz); } f[i][0] += delx * fpair; f[i][1] += dely * fpair; f[i][2] += delz * fpair; if (newton_pair || j < nlocal) { f[j][0] -= delx * fpair; f[j][1] -= dely * fpair; f[j][2] -= delz * fpair; } } } } // double stable_time_increment_all = 0.0; // MPI_Allreduce(&stable_time_increment, &stable_time_increment_all, 1, MPI_DOUBLE, MPI_MIN, world); // if (comm->me == 0) { // printf("stable time step for pair smd/hertz is %f\n", stable_time_increment_all); // } } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairHertz::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag, n + 1, n + 1, "pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(bulkmodulus, n + 1, n + 1, "pair:kspring"); memory->create(kn, n + 1, n + 1, "pair:kn"); memory->create(cutsq, n + 1, n + 1, "pair:cutsq"); // always needs to be allocated, even with granular neighborlist onerad_dynamic = new double[n + 1]; onerad_frozen = new double[n + 1]; maxrad_dynamic = new double[n + 1]; maxrad_frozen = new double[n + 1]; } /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ void PairHertz::settings(int narg, char **arg) { if (narg != 1) error->all(FLERR, "Illegal number of args for pair_style hertz"); scale = force->numeric(FLERR, arg[0]); if (comm->me == 0) { printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n"); printf("SMD/HERTZ CONTACT SETTINGS:\n"); printf("... effective contact radius is scaled by %f\n", scale); printf(">>========>>========>>========>>========>>========>>========>>========>>========\n"); } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairHertz::coeff(int narg, char **arg) { if (narg != 3) error->all(FLERR, "Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo, ihi, jlo, jhi; force->bounds(FLERR,arg[0], atom->ntypes, ilo, ihi); force->bounds(FLERR,arg[1], atom->ntypes, jlo, jhi); double bulkmodulus_one = atof(arg[2]); // set short-range force constant double kn_one = 0.0; if (domain->dimension == 3) { kn_one = (16. / 15.) * bulkmodulus_one; //assuming poisson ratio = 1/4 for 3d } else { kn_one = 0.251856195 * (2. / 3.) * bulkmodulus_one; //assuming poisson ratio = 1/3 for 2d } int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo, i); j <= jhi; j++) { bulkmodulus[i][j] = bulkmodulus_one; kn[i][j] = kn_one; setflag[i][j] = 1; count++; } } if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairHertz::init_one(int i, int j) { if (!allocated) allocate(); if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set"); bulkmodulus[j][i] = bulkmodulus[i][j]; kn[j][i] = kn[i][j]; // cutoff = sum of max I,J radii for // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j]; cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]); cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]); if (comm->me == 0) { printf("cutoff for pair smd/hertz = %f\n", cutoff); } return cutoff; } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairHertz::init_style() { int i; // error checks if (!atom->contact_radius_flag) error->all(FLERR, "Pair style smd/hertz requires atom style with contact_radius"); int irequest = neighbor->request(this); neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->gran = 1; // set maxrad_dynamic and maxrad_frozen for each type // include future Fix pour particles as dynamic for (i = 1; i <= atom->ntypes; i++) onerad_dynamic[i] = onerad_frozen[i] = 0.0; double *radius = atom->radius; int *type = atom->type; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]); } MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world); MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world); } /* ---------------------------------------------------------------------- neighbor callback to inform pair style of neighbor list to use optional granular history list ------------------------------------------------------------------------- */ void PairHertz::init_list(int id, NeighList *ptr) { if (id == 0) list = ptr; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double PairHertz::memory_usage() { return 0.0; } void *PairHertz::extract(const char *str, int &i) { //printf("in PairTriSurf::extract\n"); if (strcmp(str, "smd/hertz/stable_time_increment_ptr") == 0) { return (void *) &stable_time_increment; } return NULL; } diff --git a/src/USER-SMD/pair_smd_triangulated_surface.cpp b/src/USER-SMD/pair_smd_triangulated_surface.cpp index 8410f2ec0..b4e63dd11 100644 --- a/src/USER-SMD/pair_smd_triangulated_surface.cpp +++ b/src/USER-SMD/pair_smd_triangulated_surface.cpp @@ -1,846 +1,846 @@ /* ---------------------------------------------------------------------- * * *** Smooth Mach Dynamics *** * * This file is part of the USER-SMD package for LAMMPS. * Copyright (2014) Georg C. Ganzenmueller, georg.ganzenmueller@emi.fhg.de * Fraunhofer Ernst-Mach Institute for High-Speed Dynamics, EMI, * Eckerstrasse 4, D-79104 Freiburg i.Br, Germany. * * ----------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mike Parks (SNL) - ------------------------------------------------------------------------- */ + Contributing author: Mike Parks (SNL) +------------------------------------------------------------------------- */ #include #include #include #include #include "pair_smd_triangulated_surface.h" #include "atom.h" #include "domain.h" #include "force.h" #include "update.h" #include "modify.h" #include "fix.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "memory.h" #include "error.h" #include #include #include using namespace std; using namespace LAMMPS_NS; using namespace Eigen; #define SQRT2 1.414213562e0 /* ---------------------------------------------------------------------- */ PairTriSurf::PairTriSurf(LAMMPS *lmp) : Pair(lmp) { onerad_dynamic = onerad_frozen = maxrad_dynamic = maxrad_frozen = NULL; bulkmodulus = NULL; kn = NULL; scale = 1.0; } /* ---------------------------------------------------------------------- */ PairTriSurf::~PairTriSurf() { if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(bulkmodulus); memory->destroy(kn); delete[] onerad_dynamic; delete[] onerad_frozen; delete[] maxrad_dynamic; delete[] maxrad_frozen; } } /* ---------------------------------------------------------------------- */ void PairTriSurf::compute(int eflag, int vflag) { int i, j, ii, jj, inum, jnum, itype, jtype; double rsq, r, evdwl, fpair; int *ilist, *jlist, *numneigh, **firstneigh; double rcut, r_geom, delta, r_tri, r_particle, touch_distance, dt_crit; int tri, particle; Vector3d normal, x1, x2, x3, x4, x13, x23, x43, w, cp, x4cp, vnew, v_old; ; Vector3d xi, x_center, dx; Matrix2d C; Vector2d w2d, rhs; evdwl = 0.0; if (eflag || vflag) ev_setup(eflag, vflag); else evflag = vflag_fdotr = 0; tagint *mol = atom->molecule; double **f = atom->f; double **smd_data_9 = atom->smd_data_9; double **x = atom->x; double **x0 = atom->x0; double **v = atom->v; double *rmass = atom->rmass; int *type = atom->type; int nlocal = atom->nlocal; double *radius = atom->contact_radius; double rcutSq; Vector3d offset; int newton_pair = force->newton_pair; int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; int max_neighs = 0; stable_time_increment = 1.0e22; // loop over neighbors of my atoms using a half neighbor list for (ii = 0; ii < inum; ii++) { i = ilist[ii]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; max_neighs = MAX(max_neighs, jnum); for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; jtype = type[j]; /* * decide which one of i, j is triangle and which is particle */ if ((mol[i] < 65535) && (mol[j] >= 65535)) { particle = i; tri = j; } else if ((mol[j] < 65535) && (mol[i] >= 65535)) { particle = j; tri = i; } else { error->one(FLERR, "unknown case"); } //x_center << x[tri][0], x[tri][1], x[tri][2]; // center of triangle x_center(0) = x[tri][0]; x_center(1) = x[tri][1]; x_center(2) = x[tri][2]; //x4 << x[particle][0], x[particle][1], x[particle][2]; x4(0) = x[particle][0]; x4(1) = x[particle][1]; x4(2) = x[particle][2]; dx = x_center - x4; // if (periodic) { domain->minimum_image(dx(0), dx(1), dx(2)); } rsq = dx.squaredNorm(); r_tri = scale * radius[tri]; r_particle = scale * radius[particle]; rcut = r_tri + r_particle; rcutSq = rcut * rcut; //printf("type i=%d, type j=%d, r=%f, ri=%f, rj=%f\n", itype, jtype, sqrt(rsq), ri, rj); if (rsq < rcutSq) { /* * gather triangle information */ normal(0) = x0[tri][0]; normal(1) = x0[tri][1]; normal(2) = x0[tri][2]; /* * distance check: is particle closer than its radius to the triangle plane? */ if (fabs(dx.dot(normal)) < radius[particle]) { /* * get other two triangle vertices */ x1(0) = smd_data_9[tri][0]; x1(1) = smd_data_9[tri][1]; x1(2) = smd_data_9[tri][2]; x2(0) = smd_data_9[tri][3]; x2(1) = smd_data_9[tri][4]; x2(2) = smd_data_9[tri][5]; x3(0) = smd_data_9[tri][6]; x3(1) = smd_data_9[tri][7]; x3(2) = smd_data_9[tri][8]; PointTriangleDistance(x4, x1, x2, x3, cp, r); /* * distance to closest point */ x4cp = x4 - cp; /* * flip normal to point in direction of x4cp */ if (x4cp.dot(normal) < 0.0) { normal *= -1.0; } /* * penalty force pushes particle away from triangle */ if (r < 1.0 * radius[particle]) { delta = radius[particle] - r; // overlap distance r_geom = radius[particle]; fpair = 1.066666667e0 * bulkmodulus[itype][jtype] * delta * sqrt(delta * r_geom); dt_crit = 3.14 * sqrt(rmass[particle] / (fpair / delta)); stable_time_increment = MIN(stable_time_increment, dt_crit); evdwl = r * fpair * 0.4e0 * delta; // GCG 25 April: this expression conserves total energy fpair /= (r + 1.0e-2 * radius[particle]); // divide by r + softening and multiply with non-normalized distance vector if (particle < nlocal) { f[particle][0] += x4cp(0) * fpair; f[particle][1] += x4cp(1) * fpair; f[particle][2] += x4cp(2) * fpair; } if (tri < nlocal) { f[tri][0] -= x4cp(0) * fpair; f[tri][1] -= x4cp(1) * fpair; f[tri][2] -= x4cp(2) * fpair; } if (evflag) { ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, fpair, x4cp(0), x4cp(1), x4cp(2)); } } /* * if particle comes too close to triangle, reflect its velocity and explicitely move it away */ touch_distance = 1.0 * radius[particle]; if (r < touch_distance) { /* * reflect velocity if it points toward triangle */ normal = x4cp / r; //v_old << v[particle][0], v[particle][1], v[particle][2]; v_old(0) = v[particle][0]; v_old(1) = v[particle][1]; v_old(2) = v[particle][2]; if (v_old.dot(normal) < 0.0) { //printf("flipping velocity\n"); vnew = 1.0 * (-2.0 * v_old.dot(normal) * normal + v_old); v[particle][0] = vnew(0); v[particle][1] = vnew(1); v[particle][2] = vnew(2); } //printf("moving particle on top of triangle\n"); x[particle][0] = cp(0) + touch_distance * normal(0); x[particle][1] = cp(1) + touch_distance * normal(1); x[particle][2] = cp(2) + touch_distance * normal(2); } } } } } // int max_neighs_all = 0; // MPI_Allreduce(&max_neighs, &max_neighs_all, 1, MPI_INT, MPI_MAX, world); // if (comm->me == 0) { // printf("max. neighs in tri pair is %d\n", max_neighs_all); // } // // double stable_time_increment_all = 0.0; // MPI_Allreduce(&stable_time_increment, &stable_time_increment_all, 1, MPI_DOUBLE, MPI_MIN, world); // if (comm->me == 0) { // printf("stable time step tri pair is %f\n", stable_time_increment_all); // } } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairTriSurf::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag, n + 1, n + 1, "pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(bulkmodulus, n + 1, n + 1, "pair:kspring"); memory->create(kn, n + 1, n + 1, "pair:kn"); memory->create(cutsq, n + 1, n + 1, "pair:cutsq"); // always needs to be allocated, even with granular neighborlist onerad_dynamic = new double[n + 1]; onerad_frozen = new double[n + 1]; maxrad_dynamic = new double[n + 1]; maxrad_frozen = new double[n + 1]; } /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ void PairTriSurf::settings(int narg, char **arg) { if (narg != 1) error->all(FLERR, "Illegal number of args for pair_style smd/tri_surface"); scale = force->numeric(FLERR, arg[0]); if (comm->me == 0) { printf("\n>>========>>========>>========>>========>>========>>========>>========>>========\n"); printf("SMD/TRI_SURFACE CONTACT SETTINGS:\n"); printf("... effective contact radius is scaled by %f\n", scale); printf(">>========>>========>>========>>========>>========>>========>>========>>========\n"); } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairTriSurf::coeff(int narg, char **arg) { if (narg != 3) error->all(FLERR, "Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo, ihi, jlo, jhi; force->bounds(FLERR,arg[0], atom->ntypes, ilo, ihi); force->bounds(FLERR,arg[1], atom->ntypes, jlo, jhi); double bulkmodulus_one = atof(arg[2]); // set short-range force constant double kn_one = 0.0; if (domain->dimension == 3) { kn_one = (16. / 15.) * bulkmodulus_one; //assuming poisson ratio = 1/4 for 3d } else { kn_one = 0.251856195 * (2. / 3.) * bulkmodulus_one; //assuming poisson ratio = 1/3 for 2d } int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo, i); j <= jhi; j++) { bulkmodulus[i][j] = bulkmodulus_one; kn[i][j] = kn_one; setflag[i][j] = 1; count++; } } if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairTriSurf::init_one(int i, int j) { if (!allocated) allocate(); if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set"); bulkmodulus[j][i] = bulkmodulus[i][j]; kn[j][i] = kn[i][j]; // cutoff = sum of max I,J radii for // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen double cutoff = maxrad_dynamic[i] + maxrad_dynamic[j]; cutoff = MAX(cutoff, maxrad_frozen[i] + maxrad_dynamic[j]); cutoff = MAX(cutoff, maxrad_dynamic[i] + maxrad_frozen[j]); if (comm->me == 0) { printf("cutoff for pair smd/smd/tri_surface = %f\n", cutoff); } return cutoff; } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairTriSurf::init_style() { int i; // error checks if (!atom->contact_radius_flag) error->all(FLERR, "Pair style smd/smd/tri_surface requires atom style with contact_radius"); // old: half list int irequest = neighbor->request(this); neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->gran = 1; // need a full neighbor list // int irequest = neighbor->request(this); // neighbor->requests[irequest]->half = 0; // neighbor->requests[irequest]->full = 1; // set maxrad_dynamic and maxrad_frozen for each type // include future Fix pour particles as dynamic for (i = 1; i <= atom->ntypes; i++) onerad_dynamic[i] = onerad_frozen[i] = 0.0; double *radius = atom->radius; int *type = atom->type; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]], radius[i]); } MPI_Allreduce(&onerad_dynamic[1], &maxrad_dynamic[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world); MPI_Allreduce(&onerad_frozen[1], &maxrad_frozen[1], atom->ntypes, MPI_DOUBLE, MPI_MAX, world); } /* ---------------------------------------------------------------------- neighbor callback to inform pair style of neighbor list to use optional granular history list ------------------------------------------------------------------------- */ void PairTriSurf::init_list(int id, NeighList *ptr) { if (id == 0) list = ptr; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double PairTriSurf::memory_usage() { return 0.0; } /* * distance between triangle and point */ /* function [dist,PP0] = pointTriangleDistance(TRI,P) % calculate distance between a point and a triangle in 3D % SYNTAX % dist = pointTriangleDistance(TRI,P) % [dist,PP0] = pointTriangleDistance(TRI,P) % % DESCRIPTION % Calculate the distance of a given point P from a triangle TRI. % Point P is a row vector of the form 1x3. The triangle is a matrix % formed by three rows of points TRI = [P1;P2;P3] each of size 1x3. % dist = pointTriangleDistance(TRI,P) returns the distance of the point P % to the triangle TRI. % [dist,PP0] = pointTriangleDistance(TRI,P) additionally returns the % closest point PP0 to P on the triangle TRI. % % Author: Gwendolyn Fischer % Release: 1.0 % Release date: 09/02/02 % Release: 1.1 Fixed Bug because of normalization % Release: 1.2 Fixed Bug because of typo in region 5 20101013 % Release: 1.3 Fixed Bug because of typo in region 2 20101014 % Possible extention could be a version tailored not to return the distance % and additionally the closest point, but instead return only the closest % point. Could lead to a small speed gain. % Example: % %% The Problem % P0 = [0.5 -0.3 0.5]; % % P1 = [0 -1 0]; % P2 = [1 0 0]; % P3 = [0 0 0]; % % vertices = [P1; P2; P3]; % faces = [1 2 3]; % % %% The Engine % [dist,PP0] = pointTriangleDistance([P1;P2;P3],P0); % % %% Visualization % [x,y,z] = sphere(20); % x = dist*x+P0(1); % y = dist*y+P0(2); % z = dist*z+P0(3); % % figure % hold all % patch('Vertices',vertices,'Faces',faces,'FaceColor','r','FaceAlpha',0.8); % plot3(P0(1),P0(2),P0(3),'b*'); % plot3(PP0(1),PP0(2),PP0(3),'*g') % surf(x,y,z,'FaceColor','b','FaceAlpha',0.3) % view(3) % The algorithm is based on % "David Eberly, 'Distance Between Point and Triangle in 3D', % Geometric Tools, LLC, (1999)" % http:\\www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf % % ^t % \ | % \reg2| % \ | % \ | % \ | % \| % *P2 % |\ % | \ % reg3 | \ reg1 % | \ % |reg0\ % | \ % | \ P1 % -------*-------*------->s % |P0 \ % reg4 | reg5 \ reg6 */ //void PairTriSurf::PointTriangleDistance(const Vector3d P, const Vector3d TRI1, const Vector3d TRI2, const Vector3d TRI3, // Vector3d &CP, double &dist) { // // Vector3d B, E0, E1, D; // double a, b, c, d, e, f; // double det, s, t, sqrDistance, tmp0, tmp1, numer, denom, invDet; // // // rewrite triangle in normal form // B = TRI1; // E0 = TRI2 - B; // E1 = TRI3 - B; // // D = B - P; // a = E0.dot(E0); // b = E0.dot(E1); // c = E1.dot(E1); // d = E0.dot(D); // e = E1.dot(D); // f = D.dot(D); // // det = a * c - b * b; // //% do we have to use abs here? // s = b * e - c * d; // t = b * d - a * e; // // //% Terible tree of conditionals to determine in which region of the diagram // //% shown above the projection of the point into the triangle-plane lies. // if ((s + t) <= det) { // if (s < 0) { // if (t < 0) { // // %region4 // if (d < 0) { // t = 0; // if (-d >= a) { // s = 1; // sqrDistance = a + 2 * d + f; // } else { // s = -d / a; // sqrDistance = d * s + f; // } // } else { // s = 0; // if (e >= 0) { // t = 0; // sqrDistance = f; // } else { // if (-e >= c) { // t = 1; // sqrDistance = c + 2 * e + f; // } else { // t = -e / c; // sqrDistance = e * t + f; // } // } // } // // end % of region 4 // } else { // // % region 3 // s = 0; // if (e >= 0) { // t = 0; // sqrDistance = f; // } else { // if (-e >= c) { // t = 1; // sqrDistance = c + 2 * e + f; // } else { // t = -e / c; // sqrDistance = e * t + f; // } // } // } // // end of region 3 // } else { // if (t < 0) { // //% region 5 // t = 0; // if (d >= 0) { // s = 0; // sqrDistance = f; // } else { // if (-d >= a) { // s = 1; // sqrDistance = a + 2 * d + f; // } else { // s = -d / a; // sqrDistance = d * s + f; // } // } // } else { // // region 0 // invDet = 1 / det; // s = s * invDet; // t = t * invDet; // sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f; // } // } // } else { // if (s < 0) { // // % region 2 // tmp0 = b + d; // tmp1 = c + e; // if (tmp1 > tmp0) { //% minimum on edge s+t=1 // numer = tmp1 - tmp0; // denom = a - 2 * b + c; // if (numer >= denom) { // s = 1; // t = 0; // sqrDistance = a + 2 * d + f; // } else { // s = numer / denom; // t = 1 - s; // sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f; // } // } else // // % minimum on edge s=0 // s = 0; // if (tmp1 <= 0) { // t = 1; // sqrDistance = c + 2 * e + f; // } else { // if (e >= 0) { // t = 0; // sqrDistance = f; // } else { // t = -e / c; // sqrDistance = e * t + f; // } // } // } //end % of region 2 // else { // if (t < 0) { // // %region6 // tmp0 = b + e; // tmp1 = a + d; // if (tmp1 > tmp0) { // numer = tmp1 - tmp0; // denom = a - 2 * b + c; // if (numer >= denom) { // t = 1; // s = 0; // sqrDistance = c + 2 * e + f; // } else { // t = numer / denom; // s = 1 - t; // sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f; // } // } else { // t = 0; // if (tmp1 <= 0) { // s = 1; // sqrDistance = a + 2 * d + f; // } else { // if (d >= 0) { // s = 0; // sqrDistance = f; // } else { // s = -d / a; // sqrDistance = d * s + f; // } // } // } // % end region 6 // } else { // //% region 1 // numer = c + e - b - d; // if (numer <= 0) { // s = 0; // t = 1; // sqrDistance = c + 2 * e + f; // } else { // denom = a - 2 * b + c; // if (numer >= denom) { // s = 1; // t = 0; // sqrDistance = a + 2 * d + f; // } else { // s = numer / denom; // t = 1 - s; // sqrDistance = s * (a * s + b * t + 2 * d) + t * (b * s + c * t + 2 * e) + f; // } // } //% end of region 1 // } // } // } // // // % account for numerical round-off error // if (sqrDistance < 0) { // sqrDistance = 0; // } // // dist = sqrt(sqrDistance); // // // closest point // CP = B + s * E0 + t * E1; // //} /* * % The algorithm is based on % "David Eberly, 'Distance Between Point and Triangle in 3D', % Geometric Tools, LLC, (1999)" % http:\\www.geometrictools.com/Documentation/DistancePoint3Triangle3.pdf */ void PairTriSurf::PointTriangleDistance(const Vector3d sourcePosition, const Vector3d TRI0, const Vector3d TRI1, const Vector3d TRI2, Vector3d &CP, double &dist) { Vector3d edge0 = TRI1 - TRI0; Vector3d edge1 = TRI2 - TRI0; Vector3d v0 = TRI0 - sourcePosition; double a = edge0.dot(edge0); double b = edge0.dot(edge1); double c = edge1.dot(edge1); double d = edge0.dot(v0); double e = edge1.dot(v0); double det = a * c - b * b; double s = b * e - c * d; double t = b * d - a * e; if (s + t < det) { if (s < 0.f) { if (t < 0.f) { if (d < 0.f) { s = clamp(-d / a, 0.f, 1.f); t = 0.f; } else { s = 0.f; t = clamp(-e / c, 0.f, 1.f); } } else { s = 0.f; t = clamp(-e / c, 0.f, 1.f); } } else if (t < 0.f) { s = clamp(-d / a, 0.f, 1.f); t = 0.f; } else { float invDet = 1.f / det; s *= invDet; t *= invDet; } } else { if (s < 0.f) { float tmp0 = b + d; float tmp1 = c + e; if (tmp1 > tmp0) { float numer = tmp1 - tmp0; float denom = a - 2 * b + c; s = clamp(numer / denom, 0.f, 1.f); t = 1 - s; } else { t = clamp(-e / c, 0.f, 1.f); s = 0.f; } } else if (t < 0.f) { if (a + d > b + e) { float numer = c + e - b - d; float denom = a - 2 * b + c; s = clamp(numer / denom, 0.f, 1.f); t = 1 - s; } else { s = clamp(-e / c, 0.f, 1.f); t = 0.f; } } else { float numer = c + e - b - d; float denom = a - 2 * b + c; s = clamp(numer / denom, 0.f, 1.f); t = 1.f - s; } } CP = TRI0 + s * edge0 + t * edge1; dist = (CP - sourcePosition).norm(); } double PairTriSurf::clamp(const double a, const double min, const double max) { if (a < min) { return min; } else if (a > max) { return max; } else { return a; } } void *PairTriSurf::extract(const char *str, int &i) { //printf("in PairTriSurf::extract\n"); if (strcmp(str, "smd/tri_surface/stable_time_increment_ptr") == 0) { return (void *) &stable_time_increment; } return NULL; }