Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F110899495
fix_shake_cuda.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Apr 28, 16:37
Size
80 KB
Mime Type
text/x-c
Expires
Wed, Apr 30, 16:37 (2 d)
Engine
blob
Format
Raw Data
Handle
25855668
Attached To
rLAMMPS lammps
fix_shake_cuda.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <ctime>
#include "fix_shake_cuda.h"
#include "fix_shake_cuda_cu.h"
#include "atom.h"
#include "update.h"
#include "respa.h"
#include "modify.h"
#include "domain.h"
#include "force.h"
#include "bond.h"
#include "angle.h"
#include "comm.h"
#include "group.h"
#include "fix_respa.h"
#include "memory.h"
#include "error.h"
#include "cuda.h"
#include "cuda_modify_flags.h"
using namespace LAMMPS_NS;
#define BIG 1.0e20
#define MASSDELTA 0.1
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
FixShakeCuda::FixShakeCuda(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all("You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
cuda->accelerator(0,NULL);
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
neighbor_step=true;
PI = 4.0*atan(1.0);
virial_flag = 1;
create_attribute = 1;
// error check
if (atom->molecular == 0)
error->all("Cannot use fix shake with non-molecular system");
// perform initial allocation of atom-based arrays
// register with Atom class
shake_flag = NULL;
shake_atom = shake_type = NULL;
xshake = NULL;
cu_shake_flag = NULL;
cu_shake_atom = NULL;
cu_shake_type = NULL;
cu_xshake = NULL;
cu_list = NULL;
cu_bond_distance = NULL;
cu_angle_distance = NULL;
cu_virial = new cCudaData<double , ENERGY_FLOAT , xx >(virial,6);
grow_arrays(atom->nmax);
atom->add_callback(0);
// set comm size needed by this fix
comm_forward = 3;
// parse SHAKE args
if (narg < 8) error->all("Illegal fix shake command");
tolerance = atof(arg[3]);
max_iter = atoi(arg[4]);
output_every = atoi(arg[5]);
// parse SHAKE args for bond and angle types
// will be used by find_clusters
// store args for "b" "a" "t" as flags in (1:n) list for fast access
// store args for "m" in list of length nmass for looping over
// for "m" verify that atom masses have been set
bond_flag = new int[atom->nbondtypes+1];
for (int i = 1; i <= atom->nbondtypes; i++) bond_flag[i] = 0;
angle_flag = new int[atom->nangletypes+1];
for (int i = 1; i <= atom->nangletypes; i++) angle_flag[i] = 0;
type_flag = new int[atom->ntypes+1];
for (int i = 1; i <= atom->ntypes; i++) type_flag[i] = 0;
mass_list = new double[atom->ntypes];
nmass = 0;
char mode = '\0';
int next = 6;
while (next < narg) {
if (strcmp(arg[next],"b") == 0) mode = 'b';
else if (strcmp(arg[next],"a") == 0) mode = 'a';
else if (strcmp(arg[next],"t") == 0) mode = 't';
else if (strcmp(arg[next],"m") == 0) {
mode = 'm';
atom->check_mass();
} else if (mode == 'b') {
int i = atoi(arg[next]);
if (i < 1 || i > atom->nbondtypes)
error->all("Invalid bond type index for fix shake");
bond_flag[i] = 1;
} else if (mode == 'a') {
int i = atoi(arg[next]);
if (i < 1 || i > atom->nangletypes)
error->all("Invalid angle type index for fix shake");
angle_flag[i] = 1;
} else if (mode == 't') {
int i = atoi(arg[next]);
if (i < 1 || i > atom->ntypes)
error->all("Invalid atom type index for fix shake");
type_flag[i] = 1;
} else if (mode == 'm') {
double massone = atof(arg[next]);
if (massone == 0.0) error->all("Invalid atom mass for fix shake");
if (nmass == atom->ntypes) error->all("Too many masses for fix shake");
mass_list[nmass++] = massone;
} else error->all("Illegal fix shake command");
next++;
}
// allocate bond and angle distance arrays, indexed from 1 to n
bond_distance = new double[atom->nbondtypes+1];
angle_distance = new double[atom->nangletypes+1];
cu_bond_distance = new cCudaData<double, X_FLOAT, xx> (bond_distance, atom->nbondtypes+1);
cu_angle_distance = new cCudaData<double, X_FLOAT, xx> (angle_distance, atom->nangletypes+1);
// allocate statistics arrays
if (output_every) {
int nb = atom->nbondtypes + 1;
b_count = new int[nb];
b_count_all = new int[nb];
b_ave = new double[nb];
b_ave_all = new double[nb];
b_max = new double[nb];
b_max_all = new double[nb];
b_min = new double[nb];
b_min_all = new double[nb];
int na = atom->nangletypes + 1;
a_count = new int[na];
a_count_all = new int[na];
a_ave = new double[na];
a_ave_all = new double[na];
a_max = new double[na];
a_max_all = new double[na];
a_min = new double[na];
a_min_all = new double[na];
}
cudable_comm=true;
// identify all SHAKE clusters
find_clusters();
// initialize list of SHAKE clusters to constrain
maxlist = 0;
list = NULL;
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
/* ---------------------------------------------------------------------- */
FixShakeCuda::~FixShakeCuda()
{
// unregister callbacks to this fix from Atom class
atom->delete_callback(id,0);
// set bond_type and angle_type back to positive for SHAKE clusters
// must set for all SHAKE bonds and angles stored by each atom
int **bond_type = atom->bond_type;
int **angle_type = atom->angle_type;
int nlocal = atom->nlocal;
int n;
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
else if (shake_flag[i] == 1) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = anglefind(i,shake_atom[i][1],shake_atom[i][2]);
if (n >= 0) angle_type[i][n] = -angle_type[i][n];
} else if (shake_flag[i] == 2) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
} else if (shake_flag[i] == 3) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
} else if (shake_flag[i] == 4) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][3]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
}
}
// delete locally stored arrays
memory->destroy(shake_flag);
memory->destroy(shake_atom);
memory->destroy(shake_type);
memory->destroy(xshake);
delete [] bond_flag;
delete [] angle_flag;
delete [] type_flag;
delete [] mass_list;
delete [] bond_distance;
delete [] angle_distance;
if (output_every) {
delete [] b_count;
delete [] b_count_all;
delete [] b_ave;
delete [] b_ave_all;
delete [] b_max;
delete [] b_max_all;
delete [] b_min;
delete [] b_min_all;
delete [] a_count;
delete [] a_count_all;
delete [] a_ave;
delete [] a_ave_all;
delete [] a_max;
delete [] a_max_all;
delete [] a_min;
delete [] a_min_all;
}
memory->destroy(list);
delete cu_shake_flag;
delete cu_shake_atom;
delete cu_shake_type;
delete cu_xshake;
delete cu_list;
delete cu_bond_distance;
delete cu_angle_distance;
}
/* ---------------------------------------------------------------------- */
int FixShakeCuda::setmask()
{
int mask = 0;
mask |= PRE_NEIGHBOR_CUDA;
mask |= POST_FORCE_CUDA;
mask |= POST_FORCE_RESPA;
return mask;
}
/* ----------------------------------------------------------------------
set bond and angle distances
this init must happen after force->bond and force->angle inits
------------------------------------------------------------------------- */
void FixShakeCuda::init()
{
int i,m,flag,flag_all,type1,type2,bond1_type,bond2_type;
double rsq,angle;
// error if more than one shake fix
int count = 0;
for (i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shake") == 0) count++;
if (count > 1) error->all("More than one fix shake");
// cannot use with minimization since SHAKE turns off bonds
// that should contribute to potential energy
if (update->whichflag == 2)
error->all("Fix shake cannot be used with minimization");
// error if npt,nph fix comes before shake fix
for (i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"npt") == 0) break;
if (strcmp(modify->fix[i]->style,"nph") == 0) break;
}
if (i < modify->nfix) {
for (int j = i; j < modify->nfix; j++)
if (strcmp(modify->fix[j]->style,"shake") == 0)
error->all("Shake fix must come before NPT/NPH fix");
}
// if rRESPA, find associated fix that must exist
// could have changed locations in fix list since created
// set ptrs to rRESPA variables
if (strstr(update->integrate_style,"respa")) {
for (i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"RESPA") == 0) ifix_respa = i;
nlevels_respa = ((Respa *) update->integrate)->nlevels;
loop_respa = ((Respa *) update->integrate)->loop;
step_respa = ((Respa *) update->integrate)->step;
}
// set equilibrium bond distances
if (force->bond == NULL)
error->all("Bond potential must be defined for SHAKE");
for (i = 1; i <= atom->nbondtypes; i++)
bond_distance[i] = force->bond->equilibrium_distance(i);
// set equilibrium angle distances
int nlocal = atom->nlocal;
for (i = 1; i <= atom->nangletypes; i++) {
if (angle_flag[i] == 0) continue;
if (force->angle == NULL)
error->all("Angle potential must be defined for SHAKE");
// scan all atoms for a SHAKE angle cluster
// extract bond types for the 2 bonds in the cluster
// bond types must be same in all clusters of this angle type,
// else set error flag
flag = 0;
bond1_type = bond2_type = 0;
for (m = 0; m < nlocal; m++) {
if (shake_flag[m] != 1) continue;
if (shake_type[m][2] != i) continue;
type1 = MIN(shake_type[m][0],shake_type[m][1]);
type2 = MAX(shake_type[m][0],shake_type[m][1]);
if (bond1_type > 0) {
if (type1 != bond1_type || type2 != bond2_type) {
flag = 1;
break;
}
}
bond1_type = type1;
bond2_type = type2;
}
// error check for any bond types that are not the same
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_MAX,world);
if (flag_all) error->all("Shake angles have different bond types");
// insure all procs have bond types
MPI_Allreduce(&bond1_type,&flag_all,1,MPI_INT,MPI_MAX,world);
bond1_type = flag_all;
MPI_Allreduce(&bond2_type,&flag_all,1,MPI_INT,MPI_MAX,world);
bond2_type = flag_all;
// if bond types are 0, no SHAKE angles of this type exist
// just skip this angle
if (bond1_type == 0) {
angle_distance[i] = 0.0;
continue;
}
// compute the angle distance as a function of 2 bond distances
angle = force->angle->equilibrium_angle(i);
rsq = 2.0*bond_distance[bond1_type]*bond_distance[bond2_type] *
(1.0-cos(angle));
angle_distance[i] = sqrt(rsq);
}
}
/* ----------------------------------------------------------------------
SHAKE as pre-integrator constraint
------------------------------------------------------------------------- */
void FixShakeCuda::setup(int vflag)
{
pre_neighbor();
if (output_every) stats();
// setup SHAKE output
int ntimestep = update->ntimestep;
next_output = ntimestep + output_every;
if (output_every == 0) next_output = update->laststep + 1;
if (output_every && ntimestep % output_every != 0)
next_output = (ntimestep/output_every)*output_every + output_every;
// half timestep constraint on pre-step, full timestep thereafter
if (strstr(update->integrate_style,"verlet")) {
dtv = update->dt;
dtfsq = 0.5 * update->dt * update->dt * force->ftm2v;
post_force(vflag);
dtfsq = update->dt * update->dt * force->ftm2v;
} else {
dtv = step_respa[0];
dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v;
dtf_inner = dtf_innerhalf;
((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
post_force_respa(vflag,nlevels_respa-1,0);
((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
dtf_inner = step_respa[0] * force->ftm2v;
}
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
/* ----------------------------------------------------------------------
build list of SHAKE clusters to constrain
if one or more atoms in cluster are on this proc,
this proc lists the cluster exactly once
------------------------------------------------------------------------- */
void FixShakeCuda::pre_neighbor()
{
int atom1,atom2,atom3,atom4;
// local copies of atom quantities
// used by SHAKE until next re-neighboring
x = atom->x;
v = atom->v;
f = atom->f;
mass = atom->mass;
rmass = atom->rmass;
type = atom->type;
nlocal = atom->nlocal;
// extend size of SHAKE list if necessary
if (nlocal > maxlist) {
maxlist = nlocal;
memory->destroy(list);
memory->create(list,maxlist,"shake:list");
delete cu_list; cu_list = new cCudaData<int , int , xx >(list,maxlist);
}
// build list of SHAKE clusters I compute
nlist = 0;
int count2=0,count3=0,count4=0,count3a=0;
for (int i = 0; i < nlocal; i++)
if (shake_flag[i]) {
if(shake_flag[i] == 2) count2++;
if(shake_flag[i] == 3) count3++;
if(shake_flag[i] == 4) count4++;
if(shake_flag[i] == 1) count3a++;
if (shake_flag[i] == 2) {
atom1 = atom->map(shake_atom[i][0]);
atom2 = atom->map(shake_atom[i][1]);
if (atom1 == -1 || atom2 == -1) {
char str[128];
sprintf(str,
"Shake atoms %d %d missing on proc %d at step " BIGINT_FORMAT,
shake_atom[i][0],shake_atom[i][1],me,update->ntimestep);
error->one(str);
}
if (i <= atom1 && i <= atom2) list[nlist++] = i;
} else if (shake_flag[i] % 2 == 1) {
atom1 = atom->map(shake_atom[i][0]);
atom2 = atom->map(shake_atom[i][1]);
atom3 = atom->map(shake_atom[i][2]);
if (atom1 == -1 || atom2 == -1 || atom3 == -1) {
char str[128];
sprintf(str,
"Shake atoms %d %d %d missing on proc %d at step "
BIGINT_FORMAT,
shake_atom[i][0],shake_atom[i][1],shake_atom[i][2],
me,update->ntimestep);
error->one(str);
}
if (i <= atom1 && i <= atom2 && i <= atom3) list[nlist++] = i;
} else {
atom1 = atom->map(shake_atom[i][0]);
atom2 = atom->map(shake_atom[i][1]);
atom3 = atom->map(shake_atom[i][2]);
atom4 = atom->map(shake_atom[i][3]);
if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) {
char str[128];
sprintf(str,
"Shake atoms %d %d %d %d missing on proc %d at step "
BIGINT_FORMAT,
shake_atom[i][0],shake_atom[i][1],
shake_atom[i][2],shake_atom[i][3],
me,update->ntimestep);
error->one(str);
}
if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4)
list[nlist++] = i;
}
}
count2/=2;
count3/=3;
count4/=4;
count3a/=3;
count3+=count2;
count4+=count3;
count3a+=count4;
for(int k = 0,l = count2; k < count2; k++)
{
if(shake_flag[list[k]]!=2)
{
while(shake_flag[list[l]]!=2 && l<nlist-1) l++;
if(shake_flag[list[l]]!=2) {printf("FixShakeCuda: Error in List SortA %i %i\n",k,l);return;}
int tmp = list[k]; list[k]=list[l]; list[l]=tmp;
}
}
for(int k = count2,l = count3; k < count3; k++)
{
if(shake_flag[list[k]]!=3)
{
while(shake_flag[list[l]]!=3 && l<nlist-1) l++;
if(shake_flag[list[l]]!=3) {printf("FixShakeCuda: Error in List SortB %i %i\n",k,l);return;}
int tmp = list[k]; list[k]=list[l]; list[l]=tmp;
}
}
for(int k = count3,l = count4; k < count4; k++)
{
if(shake_flag[list[k]]!=4)
{
while(shake_flag[list[l]]!=4 && l<nlist-1) l++;
if(shake_flag[list[l]]!=4) {printf("FixShakeCuda: Error in List SortC %i %i\n",k,l);return;}
int tmp = list[k]; list[k]=list[l]; list[l]=tmp;
}
}
cu_list->upload();
cu_bond_distance->upload();
cu_angle_distance->upload();
cu_shake_flag->upload();
cu_shake_atom->upload();
cu_shake_type->upload();
neighbor_step=true;
}
/* ----------------------------------------------------------------------
compute the force adjustment for SHAKE constraint
------------------------------------------------------------------------- */
void FixShakeCuda::post_force(int vflag)
{
timespec starttime;
timespec endtime;
if(cuda->finished_setup && neighbor_step)
{
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
if(not cuda->finished_setup)
cuda->downloadAll();
if (update->ntimestep == next_output)
{
if(cuda->finished_setup)
cuda->cu_x->download();
stats();
}
// xshake = unconstrained move with current v,f
unconstrained_update();
// communicate results if necessary
//if(cuda->finished_setup) cu_xshake->download();
if (nprocs > 1)
{
//if(cuda->finished_setup)
//cu_xshake->download();
comm->forward_comm_fix(this);
//if(cuda->finished_setup)
//cu_xshake->upload();
}
// virial setup
if (vflag) v_setup(vflag);
else evflag = 0;
// loop over clusters
clock_gettime(CLOCK_REALTIME,&starttime);
if(cuda->finished_setup)
{
cu_virial->upload();
if(vflag_atom) cuda->cu_vatom->upload();
Cuda_FixShakeCuda_Shake(&cuda->shared_data,vflag,vflag_atom,(int*)cu_list->dev_data(),nlist);
cu_virial->download();
if(vflag_atom) cuda->cu_vatom->download();
}
else
for (int i = 0; i < nlist; i++) {
int m = list[i];
if (shake_flag[m] == 2) shake2(m);
else if (shake_flag[m] == 3) shake3(m);
else if (shake_flag[m] == 4) shake4(m);
else shake3angle(m);
}
if((not cuda->finished_setup)) cuda->cu_f->upload();
clock_gettime(CLOCK_REALTIME,&endtime);
if(cuda->finished_setup)
time_postforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
else
time_postforce=0.0;
//printf("Postforce time: %lf\n",time_postforce);
}
/* ----------------------------------------------------------------------
count # of degrees-of-freedom removed by SHAKE for atoms in igroup
------------------------------------------------------------------------- */
int FixShakeCuda::dof(int igroup)
{
int groupbit = group->bitmask[igroup];
int *mask = atom->mask;
int *tag = atom->tag;
int nlocal = atom->nlocal;
// count dof in a cluster if and only if
// the central atom is in group and atom i is the central atom
int n = 0;
for (int i = 0; i < nlocal; i++) {
if (!(mask[i] & groupbit)) continue;
if (shake_flag[i] == 0) continue;
if (shake_atom[i][0] != tag[i]) continue;
if (shake_flag[i] == 1) n += 3;
else if (shake_flag[i] == 2) n += 1;
else if (shake_flag[i] == 3) n += 2;
else if (shake_flag[i] == 4) n += 3;
}
int nall;
MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
return nall;
}
/* ----------------------------------------------------------------------
identify whether each atom is in a SHAKE cluster
only include atoms in fix group and those bonds/angles specified in input
test whether all clusters are valid
set shake_flag, shake_atom, shake_type values
set bond,angle types negative so will be ignored in neighbor lists
------------------------------------------------------------------------- */
void FixShakeCuda::find_clusters()
{
int i,j,m,n;
int flag,flag_all,messtag,loop,nbuf,nbufmax,size;
double massone;
int *buf,*bufcopy;
MPI_Request request;
MPI_Status status;
if (me == 0 && screen) fprintf(screen,"Finding SHAKE clusters ...\n");
// local copies of atom ptrs
int *tag = atom->tag;
int *type = atom->type;
int *mask = atom->mask;
double *mass = atom->mass;
double *rmass = atom->rmass;
int **bond_type = atom->bond_type;
int **angle_type = atom->angle_type;
int **nspecial = atom->nspecial;
int **special = atom->special;
int nlocal = atom->nlocal;
// setup ring of procs
int next = me + 1;
int prev = me -1;
if (next == nprocs) next = 0;
if (prev < 0) prev = nprocs - 1;
// -----------------------------------------------------
// allocate arrays for self (1d) and bond partners (2d)
// max = max # of bond partners for owned atoms = 2nd dim of partner arrays
// npartner[i] = # of bonds attached to atom i
// nshake[i] = # of SHAKE bonds attached to atom i
// partner_tag[i][] = global IDs of each partner
// partner_mask[i][] = mask of each partner
// partner_type[i][] = type of each partner
// partner_massflag[i][] = 1 if partner meets mass criterion, 0 if not
// partner_bondtype[i][] = type of bond attached to each partner
// partner_shake[i][] = 1 if SHAKE bonded to partner, 0 if not
// partner_nshake[i][] = nshake value for each partner
// -----------------------------------------------------
int max = 0;
for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]);
int *npartner,*nshake;
memory->create(npartner,nlocal,"shake:npartner");
memory->create(nshake,nlocal,"shake:nshake");
int **partner_tag,**partner_mask,**partner_type,**partner_massflag;
int ** partner_bondtype,**partner_shake,**partner_nshake;
memory->create(partner_tag,nlocal,max,"shake:partner_tag");
memory->create(partner_mask,nlocal,max,"shake:partner_mask");
memory->create(partner_type,nlocal,max,"shake:partner_type");
memory->create(partner_massflag,nlocal,max,"shake:partner_massflag");
memory->create(partner_bondtype,nlocal,max,"shake:partner_bondtype");
memory->create(partner_shake,nlocal,max,"shake:partner_shake");
memory->create(partner_nshake,nlocal,max,"shake:partner_nshake");
// -----------------------------------------------------
// set npartner and partner_tag from special arrays
// -----------------------------------------------------
for (i = 0; i < nlocal; i++) {
npartner[i] = nspecial[i][0];
for (j = 0; j < npartner[i]; j++) partner_tag[i][j] = special[i][j];
}
// -----------------------------------------------------
// set partner_mask, partner_type, partner_massflag, partner_bondtype
// for bonded partners
// requires communication for off-proc partners
// -----------------------------------------------------
// fill in mask, type, massflag, bondtype if own bond partner
// info to store in buf for each off-proc bond = nper = 6
// 2 atoms IDs in bond, space for mask, type, massflag, bondtype
// nbufmax = largest buffer needed to hold info from any proc
int nper = 6;
nbuf = 0;
for (i = 0; i < nlocal; i++) {
for (j = 0; j < npartner[i]; j++) {
partner_mask[i][j] = 0;
partner_type[i][j] = 0;
partner_massflag[i][j] = 0;
partner_bondtype[i][j] = 0;
m = atom->map(partner_tag[i][j]);
if (m >= 0 && m < nlocal) {
partner_mask[i][j] = mask[m];
partner_type[i][j] = type[m];
if (nmass) {
if (rmass) massone = rmass[m];
else massone = mass[type[m]];
partner_massflag[i][j] = masscheck(massone);
}
n = bondfind(i,tag[i],partner_tag[i][j]);
if (n >= 0) partner_bondtype[i][j] = bond_type[i][n];
else {
n = bondfind(m,tag[i],partner_tag[i][j]);
if (n >= 0) partner_bondtype[i][j] = bond_type[m][n];
}
} else nbuf += nper;
}
}
MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world);
buf = new int[nbufmax];
bufcopy = new int[nbufmax];
// fill buffer with info
size = 0;
for (i = 0; i < nlocal; i++) {
for (j = 0; j < npartner[i]; j++) {
m = atom->map(partner_tag[i][j]);
if (m < 0 || m >= nlocal) {
buf[size] = tag[i];
buf[size+1] = partner_tag[i][j];
buf[size+2] = 0;
buf[size+3] = 0;
buf[size+4] = 0;
n = bondfind(i,tag[i],partner_tag[i][j]);
if (n >= 0) buf[size+5] = bond_type[i][n];
else buf[size+5] = 0;
size += nper;
}
}
}
// cycle buffer around ring of procs back to self
// when receive buffer, scan bond partner IDs for atoms I own
// if I own partner:
// fill in mask and type and massflag
// search for bond with 1st atom and fill in bondtype
messtag = 1;
for (loop = 0; loop < nprocs; loop++) {
i = 0;
while (i < size) {
m = atom->map(buf[i+1]);
if (m >= 0 && m < nlocal) {
buf[i+2] = mask[m];
buf[i+3] = type[m];
if (nmass) {
if (rmass) massone = rmass[m];
else massone = mass[type[m]];
buf[i+4] = masscheck(massone);
}
if (buf[i+5] == 0) {
n = bondfind(m,buf[i],buf[i+1]);
if (n >= 0) buf[i+5] = bond_type[m][n];
}
}
i += nper;
}
if (me != next) {
MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request);
MPI_Send(buf,size,MPI_INT,next,messtag,world);
MPI_Wait(&request,&status);
MPI_Get_count(&status,MPI_INT,&size);
for (j = 0; j < size; j++) buf[j] = bufcopy[j];
}
}
// store partner info returned to me
m = 0;
while (m < size) {
i = atom->map(buf[m]);
for (j = 0; j < npartner[i]; j++)
if (buf[m+1] == partner_tag[i][j]) break;
partner_mask[i][j] = buf[m+2];
partner_type[i][j] = buf[m+3];
partner_massflag[i][j] = buf[m+4];
partner_bondtype[i][j] = buf[m+5];
m += nper;
}
delete [] buf;
delete [] bufcopy;
// error check for unfilled partner info
// if partner_type not set, is an error
// partner_bondtype may not be set if special list is not consistent
// with bondatom (e.g. due to delete_bonds command)
// this is OK if one or both atoms are not in fix group, since
// bond won't be SHAKEn anyway
// else it's an error
flag = 0;
for (i = 0; i < nlocal; i++)
for (j = 0; j < npartner[i]; j++) {
if (partner_type[i][j] == 0) flag = 1;
if (!(mask[i] & groupbit)) continue;
if (!(partner_mask[i][j] & groupbit)) continue;
if (partner_bondtype[i][j] == 0) flag = 1;
}
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all("Did not find fix shake partner info");
// -----------------------------------------------------
// identify SHAKEable bonds
// set nshake[i] = # of SHAKE bonds attached to atom i
// set partner_shake[i][] = 1 if SHAKE bonded to partner, 0 if not
// both atoms must be in group, bondtype must be > 0
// check if bondtype is in input bond_flag
// check if type of either atom is in input type_flag
// check if mass of either atom is in input mass_list
// -----------------------------------------------------
int np;
for (i = 0; i < nlocal; i++) {
nshake[i] = 0;
np = npartner[i];
for (j = 0; j < np; j++) {
partner_shake[i][j] = 0;
if (!(mask[i] & groupbit)) continue;
if (!(partner_mask[i][j] & groupbit)) continue;
if (partner_bondtype[i][j] <= 0) continue;
if (bond_flag[partner_bondtype[i][j]]) {
partner_shake[i][j] = 1;
nshake[i]++;
continue;
}
if (type_flag[type[i]] || type_flag[partner_type[i][j]]) {
partner_shake[i][j] = 1;
nshake[i]++;
continue;
}
if (nmass) {
if (partner_massflag[i][j]) {
partner_shake[i][j] = 1;
nshake[i]++;
continue;
} else {
if (rmass) massone = rmass[i];
else massone = mass[type[i]];
if (masscheck(massone)) {
partner_shake[i][j] = 1;
nshake[i]++;
continue;
}
}
}
}
}
// -----------------------------------------------------
// set partner_nshake for bonded partners
// requires communication for off-proc partners
// -----------------------------------------------------
// fill in partner_nshake if own bond partner
// info to store in buf for each off-proc bond =
// 2 atoms IDs in bond, space for nshake value
// nbufmax = largest buffer needed to hold info from any proc
nbuf = 0;
for (i = 0; i < nlocal; i++) {
for (j = 0; j < npartner[i]; j++) {
m = atom->map(partner_tag[i][j]);
if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m];
else nbuf += 3;
}
}
MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world);
buf = new int[nbufmax];
bufcopy = new int[nbufmax];
// fill buffer with info
size = 0;
for (i = 0; i < nlocal; i++) {
for (j = 0; j < npartner[i]; j++) {
m = atom->map(partner_tag[i][j]);
if (m < 0 || m >= nlocal) {
buf[size] = tag[i];
buf[size+1] = partner_tag[i][j];
size += 3;
}
}
}
// cycle buffer around ring of procs back to self
// when receive buffer, scan bond partner IDs for atoms I own
// if I own partner, fill in nshake value
messtag = 2;
for (loop = 0; loop < nprocs; loop++) {
i = 0;
while (i < size) {
m = atom->map(buf[i+1]);
if (m >= 0 && m < nlocal) buf[i+2] = nshake[m];
i += 3;
}
if (me != next) {
MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request);
MPI_Send(buf,size,MPI_INT,next,messtag,world);
MPI_Wait(&request,&status);
MPI_Get_count(&status,MPI_INT,&size);
for (j = 0; j < size; j++) buf[j] = bufcopy[j];
}
}
// store partner info returned to me
m = 0;
while (m < size) {
i = atom->map(buf[m]);
for (j = 0; j < npartner[i]; j++)
if (buf[m+1] == partner_tag[i][j]) break;
partner_nshake[i][j] = buf[m+2];
m += 3;
}
delete [] buf;
delete [] bufcopy;
// -----------------------------------------------------
// error checks
// no atom with nshake > 3
// no connected atoms which both have nshake > 1
// -----------------------------------------------------
flag = 0;
for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all("Shake cluster of more than 4 atoms");
flag = 0;
for (i = 0; i < nlocal; i++) {
if (nshake[i] <= 1) continue;
for (j = 0; j < npartner[i]; j++)
if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1;
}
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all("Shake clusters are connected");
// -----------------------------------------------------
// set SHAKE arrays that are stored with atoms & add angle constraints
// zero shake arrays for all owned atoms
// if I am central atom set shake_flag & shake_atom & shake_type
// for 2-atom clusters, I am central atom if my atom ID < partner ID
// for 3-atom clusters, test for angle constraint
// angle will be stored by this atom if it exists
// if angle type matches angle_flag, then it is angle-constrained
// shake_flag[] = 0 if atom not in SHAKE cluster
// 2,3,4 = size of bond-only cluster
// 1 = 3-atom angle cluster
// shake_atom[][] = global IDs of 2,3,4 atoms in cluster
// central atom is 1st
// for 2-atom cluster, lowest ID is 1st
// shake_type[][] = bondtype of each bond in cluster
// for 3-atom angle cluster, 3rd value is angletype
// -----------------------------------------------------
for (i = 0; i < nlocal; i++) {
shake_flag[i] = 0;
shake_atom[i][0] = 0;
shake_atom[i][1] = 0;
shake_atom[i][2] = 0;
shake_atom[i][3] = 0;
shake_type[i][0] = 0;
shake_type[i][1] = 0;
shake_type[i][2] = 0;
if (nshake[i] == 1) {
for (j = 0; j < npartner[i]; j++)
if (partner_shake[i][j]) break;
if (partner_nshake[i][j] == 1 && tag[i] < partner_tag[i][j]) {
shake_flag[i] = 2;
shake_atom[i][0] = tag[i];
shake_atom[i][1] = partner_tag[i][j];
shake_type[i][0] = partner_bondtype[i][j];
}
}
if (nshake[i] > 1) {
shake_flag[i] = 1;
shake_atom[i][0] = tag[i];
for (j = 0; j < npartner[i]; j++)
if (partner_shake[i][j]) {
m = shake_flag[i];
shake_atom[i][m] = partner_tag[i][j];
shake_type[i][m-1] = partner_bondtype[i][j];
shake_flag[i]++;
}
}
if (nshake[i] == 2) {
n = anglefind(i,shake_atom[i][1],shake_atom[i][2]);
if (n < 0) continue;
if (angle_type[i][n] < 0) continue;
if (angle_flag[angle_type[i][n]]) {
shake_flag[i] = 1;
shake_type[i][2] = angle_type[i][n];
}
}
}
// -----------------------------------------------------
// set shake_flag,shake_atom,shake_type for non-central atoms
// requires communication for off-proc atoms
// -----------------------------------------------------
// fill in shake arrays for each bond partner I own
// info to store in buf for each off-proc bond =
// all values from shake_flag, shake_atom, shake_type
// nbufmax = largest buffer needed to hold info from any proc
nbuf = 0;
for (i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
for (j = 0; j < npartner[i]; j++) {
if (partner_shake[i][j] == 0) continue;
m = atom->map(partner_tag[i][j]);
if (m >= 0 && m < nlocal) {
shake_flag[m] = shake_flag[i];
shake_atom[m][0] = shake_atom[i][0];
shake_atom[m][1] = shake_atom[i][1];
shake_atom[m][2] = shake_atom[i][2];
shake_atom[m][3] = shake_atom[i][3];
shake_type[m][0] = shake_type[i][0];
shake_type[m][1] = shake_type[i][1];
shake_type[m][2] = shake_type[i][2];
} else nbuf += 9;
}
}
MPI_Allreduce(&nbuf,&nbufmax,1,MPI_INT,MPI_MAX,world);
buf = new int[nbufmax];
bufcopy = new int[nbufmax];
// fill buffer with info
size = 0;
for (i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
for (j = 0; j < npartner[i]; j++) {
if (partner_shake[i][j] == 0) continue;
m = atom->map(partner_tag[i][j]);
if (m < 0 || m >= nlocal) {
buf[size] = partner_tag[i][j];
buf[size+1] = shake_flag[i];
buf[size+2] = shake_atom[i][0];
buf[size+3] = shake_atom[i][1];
buf[size+4] = shake_atom[i][2];
buf[size+5] = shake_atom[i][3];
buf[size+6] = shake_type[i][0];
buf[size+7] = shake_type[i][1];
buf[size+8] = shake_type[i][2];
size += 9;
}
}
}
// cycle buffer around ring of procs back to self
// when receive buffer, scan for ID that I own
// if I own ID, fill in shake array values
messtag = 3;
for (loop = 0; loop < nprocs; loop++) {
i = 0;
while (i < size) {
m = atom->map(buf[i]);
if (m >= 0 && m < nlocal) {
shake_flag[m] = buf[i+1];
shake_atom[m][0] = buf[i+2];
shake_atom[m][1] = buf[i+3];
shake_atom[m][2] = buf[i+4];
shake_atom[m][3] = buf[i+5];
shake_type[m][0] = buf[i+6];
shake_type[m][1] = buf[i+7];
shake_type[m][2] = buf[i+8];
}
i += 9;
}
if (me != next) {
MPI_Irecv(bufcopy,nbufmax,MPI_INT,prev,messtag,world,&request);
MPI_Send(buf,size,MPI_INT,next,messtag,world);
MPI_Wait(&request,&status);
MPI_Get_count(&status,MPI_INT,&size);
for (j = 0; j < size; j++) buf[j] = bufcopy[j];
}
}
delete [] buf;
delete [] bufcopy;
// -----------------------------------------------------
// free local memory
// -----------------------------------------------------
memory->destroy(npartner);
memory->destroy(nshake);
memory->destroy(partner_tag);
memory->destroy(partner_mask);
memory->destroy(partner_type);
memory->destroy(partner_massflag);
memory->destroy(partner_bondtype);
memory->destroy(partner_shake);
memory->destroy(partner_nshake);
// -----------------------------------------------------
// set bond_type and angle_type negative for SHAKE clusters
// must set for all SHAKE bonds and angles stored by each atom
// -----------------------------------------------------
for (i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
else if (shake_flag[i] == 1) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = anglefind(i,shake_atom[i][1],shake_atom[i][2]);
if (n >= 0) angle_type[i][n] = -angle_type[i][n];
} else if (shake_flag[i] == 2) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
} else if (shake_flag[i] == 3) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
} else if (shake_flag[i] == 4) {
n = bondfind(i,shake_atom[i][0],shake_atom[i][1]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][2]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
n = bondfind(i,shake_atom[i][0],shake_atom[i][3]);
if (n >= 0) bond_type[i][n] = -bond_type[i][n];
}
}
// -----------------------------------------------------
// print info on SHAKE clusters
// -----------------------------------------------------
int count1,count2,count3,count4;
count1 = count2 = count3 = count4 = 0;
for (i = 0; i < nlocal; i++) {
if (shake_flag[i] == 1) count1++;
else if (shake_flag[i] == 2) count2++;
else if (shake_flag[i] == 3) count3++;
else if (shake_flag[i] == 4) count4++;
}
for(int i=0;i<nlocal;i++)
{
}
int tmp;
tmp = count1;
MPI_Allreduce(&tmp,&count1,1,MPI_INT,MPI_SUM,world);
tmp = count2;
MPI_Allreduce(&tmp,&count2,1,MPI_INT,MPI_SUM,world);
tmp = count3;
MPI_Allreduce(&tmp,&count3,1,MPI_INT,MPI_SUM,world);
tmp = count4;
MPI_Allreduce(&tmp,&count4,1,MPI_INT,MPI_SUM,world);
if (me == 0) {
if (screen) {
fprintf(screen," %d = # of size 2 clusters\n",count2/2);
fprintf(screen," %d = # of size 3 clusters\n",count3/3);
fprintf(screen," %d = # of size 4 clusters\n",count4/4);
fprintf(screen," %d = # of frozen angles\n",count1/3);
}
if (logfile) {
fprintf(logfile," %d = # of size 2 clusters\n",count2/2);
fprintf(logfile," %d = # of size 3 clusters\n",count3/3);
fprintf(logfile," %d = # of size 4 clusters\n",count4/4);
fprintf(logfile," %d = # of frozen angles\n",count1/3);
}
}
cu_shake_flag->upload();
cu_shake_atom->upload();
cu_shake_type->upload();
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
void FixShakeCuda::swap_clusters(int i, int j)
{
int tmp;
tmp = shake_flag[i]; shake_flag[i] = shake_flag[j]; shake_flag[j] = tmp;
tmp = shake_atom[i][0]; shake_atom[i][0] = shake_atom[j][0]; shake_atom[j][0] = tmp;
tmp = shake_atom[i][1]; shake_atom[i][1] = shake_atom[j][1]; shake_atom[j][1] = tmp;
tmp = shake_atom[i][2]; shake_atom[i][2] = shake_atom[j][2]; shake_atom[j][2] = tmp;
tmp = shake_atom[i][3]; shake_atom[i][3] = shake_atom[j][3]; shake_atom[j][3] = tmp;
tmp = shake_type[i][0]; shake_type[i][0] = shake_type[j][0]; shake_type[j][0] = tmp;
tmp = shake_type[i][1]; shake_type[i][1] = shake_type[j][1]; shake_type[j][1] = tmp;
tmp = shake_type[i][2]; shake_type[i][2] = shake_type[j][2]; shake_type[j][2] = tmp;
}
/* ----------------------------------------------------------------------
check if massone is within MASSDELTA of any mass in mass_list
return 1 if yes, 0 if not
------------------------------------------------------------------------- */
int FixShakeCuda::masscheck(double massone)
{
for (int i = 0; i < nmass; i++)
if (fabs(mass_list[i]-massone) <= MASSDELTA) return 1;
return 0;
}
/* ----------------------------------------------------------------------
update the unconstrained position of each atom
only for SHAKE clusters, else set to 0.0
assumes NVE update, seems to be accurate enough for NVT,NPT,NPH as well
------------------------------------------------------------------------- */
void FixShakeCuda::unconstrained_update()
{
if(cuda->finished_setup)
{
Cuda_FixShakeCuda_UnconstrainedUpdate(&cuda->shared_data);
return;
}
double dtfmsq;
if (rmass) {
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i]) {
dtfmsq = dtfsq / rmass[i];
xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0];
xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1];
xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2];
} else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0;
}
} else {
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i]) {
dtfmsq = dtfsq / mass[type[i]];
xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0];
xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1];
xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2];
} else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0;
}
}
cu_xshake->upload();
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::shake2(int m)
{
int nlist,list[2];
double v[6];
double invmass0,invmass1;
// local atom IDs and constraint distances
int i0 = atom->map(shake_atom[m][0]);
int i1 = atom->map(shake_atom[m][1]);
double bond1 = bond_distance[shake_type[m][0]];
// r01 = distance vec between atoms, with PBC
double r01[3];
r01[0] = x[i0][0] - x[i1][0];
r01[1] = x[i0][1] - x[i1][1];
r01[2] = x[i0][2] - x[i1][2];
domain->minimum_image(r01);
// s01 = distance vec after unconstrained update, with PBC
double s01[3];
s01[0] = xshake[i0][0] - xshake[i1][0];
s01[1] = xshake[i0][1] - xshake[i1][1];
s01[2] = xshake[i0][2] - xshake[i1][2];
domain->minimum_image(s01);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
// a,b,c = coeffs in quadratic equation for lamda
if (rmass) {
invmass0 = 1.0/rmass[i0];
invmass1 = 1.0/rmass[i1];
} else {
invmass0 = 1.0/mass[type[i0]];
invmass1 = 1.0/mass[type[i1]];
}
double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double b = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double c = s01sq - bond1*bond1;
// error check
double determ = b*b - 4.0*a*c;
if (determ < 0.0) {
error->warning("Shake determinant < 0.0");
determ = 0.0;
}
// exact quadratic solution for lamda
double lamda,lamda1,lamda2;
lamda1 = (-b+sqrt(determ)) / (2.0*a);
lamda2 = (-b-sqrt(determ)) / (2.0*a);
if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1;
else lamda = lamda2;
// update forces if atom is owned by this processor
lamda /= dtfsq;
if (i0 < nlocal) {
f[i0][0] += lamda*r01[0];
f[i0][1] += lamda*r01[1];
f[i0][2] += lamda*r01[2];
}
if (i1 < nlocal) {
f[i1][0] -= lamda*r01[0];
f[i1][1] -= lamda*r01[1];
f[i1][2] -= lamda*r01[2];
}
if (evflag) {
nlist = 0;
if (i0 < nlocal) list[nlist++] = i0;
if (i1 < nlocal) list[nlist++] = i1;
v[0] = lamda*r01[0]*r01[0];
v[1] = lamda*r01[1]*r01[1];
v[2] = lamda*r01[2]*r01[2];
v[3] = lamda*r01[0]*r01[1];
v[4] = lamda*r01[0]*r01[2];
v[5] = lamda*r01[1]*r01[2];
v_tally(nlist,list,2.0,v);
}
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::shake3(int m)
{
int nlist,list[3];
double v[6];
double invmass0,invmass1,invmass2;
// local atom IDs and constraint distances
int i0 = atom->map(shake_atom[m][0]);
int i1 = atom->map(shake_atom[m][1]);
int i2 = atom->map(shake_atom[m][2]);
double bond1 = bond_distance[shake_type[m][0]];
double bond2 = bond_distance[shake_type[m][1]];
// r01,r02 = distance vec between atoms, with PBC
double r01[3];
r01[0] = x[i0][0] - x[i1][0];
r01[1] = x[i0][1] - x[i1][1];
r01[2] = x[i0][2] - x[i1][2];
domain->minimum_image(r01);
double r02[3];
r02[0] = x[i0][0] - x[i2][0];
r02[1] = x[i0][1] - x[i2][1];
r02[2] = x[i0][2] - x[i2][2];
domain->minimum_image(r02);
// s01,s02 = distance vec after unconstrained update, with PBC
double s01[3];
s01[0] = xshake[i0][0] - xshake[i1][0];
s01[1] = xshake[i0][1] - xshake[i1][1];
s01[2] = xshake[i0][2] - xshake[i1][2];
domain->minimum_image(s01);
double s02[3];
s02[0] = xshake[i0][0] - xshake[i2][0];
s02[1] = xshake[i0][1] - xshake[i2][1];
s02[2] = xshake[i0][2] - xshake[i2][2];
domain->minimum_image(s02);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
// matrix coeffs and rhs for lamda equations
if (rmass) {
invmass0 = 1.0/rmass[i0];
invmass1 = 1.0/rmass[i1];
invmass2 = 1.0/rmass[i2];
} else {
invmass0 = 1.0/mass[type[i0]];
invmass1 = 1.0/mass[type[i1]];
invmass2 = 1.0/mass[type[i2]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
// inverse of matrix
double determ = a11*a22 - a12*a21;
if (determ == 0.0) error->one("Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = a22*determinv;
double a12inv = -a12*determinv;
double a21inv = -a21*determinv;
double a22inv = a11*determinv;
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,b1,b2,lamda01_new,lamda02_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 +
quad1_0102 * lamda01*lamda02;
quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 +
quad2_0102 * lamda01*lamda02;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
lamda01_new = a11inv*b1 + a12inv*b2;
lamda02_new = a21inv*b1 + a22inv*b2;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
if (i0 < nlocal) {
f[i0][0] += lamda01*r01[0] + lamda02*r02[0];
f[i0][1] += lamda01*r01[1] + lamda02*r02[1];
f[i0][2] += lamda01*r01[2] + lamda02*r02[2];
}
if (i1 < nlocal) {
f[i1][0] -= lamda01*r01[0];
f[i1][1] -= lamda01*r01[1];
f[i1][2] -= lamda01*r01[2];
}
if (i2 < nlocal) {
f[i2][0] -= lamda02*r02[0];
f[i2][1] -= lamda02*r02[1];
f[i2][2] -= lamda02*r02[2];
}
if (evflag) {
nlist = 0;
if (i0 < nlocal) list[nlist++] = i0;
if (i1 < nlocal) list[nlist++] = i1;
if (i2 < nlocal) list[nlist++] = i2;
v[0] = lamda01*r01[0]*r01[0] + lamda02*r02[0]*r02[0];
v[1] = lamda01*r01[1]*r01[1] + lamda02*r02[1]*r02[1];
v[2] = lamda01*r01[2]*r01[2] + lamda02*r02[2]*r02[2];
v[3] = lamda01*r01[0]*r01[1] + lamda02*r02[0]*r02[1];
v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2];
v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2];
v_tally(nlist,list,3.0,v);
}
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::shake4(int m)
{
int nlist,list[4];
double v[6];
double invmass0,invmass1,invmass2,invmass3;
// local atom IDs and constraint distances
int i0 = atom->map(shake_atom[m][0]);
int i1 = atom->map(shake_atom[m][1]);
int i2 = atom->map(shake_atom[m][2]);
int i3 = atom->map(shake_atom[m][3]);
double bond1 = bond_distance[shake_type[m][0]];
double bond2 = bond_distance[shake_type[m][1]];
double bond3 = bond_distance[shake_type[m][2]];
// r01,r02,r03 = distance vec between atoms, with PBC
double r01[3];
r01[0] = x[i0][0] - x[i1][0];
r01[1] = x[i0][1] - x[i1][1];
r01[2] = x[i0][2] - x[i1][2];
domain->minimum_image(r01);
double r02[3];
r02[0] = x[i0][0] - x[i2][0];
r02[1] = x[i0][1] - x[i2][1];
r02[2] = x[i0][2] - x[i2][2];
domain->minimum_image(r02);
double r03[3];
r03[0] = x[i0][0] - x[i3][0];
r03[1] = x[i0][1] - x[i3][1];
r03[2] = x[i0][2] - x[i3][2];
domain->minimum_image(r03);
// s01,s02,s03 = distance vec after unconstrained update, with PBC
double s01[3];
s01[0] = xshake[i0][0] - xshake[i1][0];
s01[1] = xshake[i0][1] - xshake[i1][1];
s01[2] = xshake[i0][2] - xshake[i1][2];
domain->minimum_image(s01);
double s02[3];
s02[0] = xshake[i0][0] - xshake[i2][0];
s02[1] = xshake[i0][1] - xshake[i2][1];
s02[2] = xshake[i0][2] - xshake[i2][2];
domain->minimum_image(s02);
double s03[3];
s03[0] = xshake[i0][0] - xshake[i3][0];
s03[1] = xshake[i0][1] - xshake[i3][1];
s03[2] = xshake[i0][2] - xshake[i3][2];
domain->minimum_image(s03);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double r03sq = r03[0]*r03[0] + r03[1]*r03[1] + r03[2]*r03[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
double s03sq = s03[0]*s03[0] + s03[1]*s03[1] + s03[2]*s03[2];
// matrix coeffs and rhs for lamda equations
if (rmass) {
invmass0 = 1.0/rmass[i0];
invmass1 = 1.0/rmass[i1];
invmass2 = 1.0/rmass[i2];
invmass3 = 1.0/rmass[i3];
} else {
invmass0 = 1.0/mass[type[i0]];
invmass1 = 1.0/mass[type[i1]];
invmass2 = 1.0/mass[type[i2]];
invmass3 = 1.0/mass[type[i3]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a13 = 2.0 * invmass0 *
(s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
double a23 = 2.0 * invmass0 *
(s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]);
double a31 = 2.0 * invmass0 *
(s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]);
double a32 = 2.0 * invmass0 *
(s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]);
double a33 = 2.0 * (invmass0+invmass3) *
(s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]);
// inverse of matrix;
double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 -
a11*a23*a32 - a12*a21*a33 - a13*a22*a31;
if (determ == 0.0) error->one("Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = determinv * (a22*a33 - a23*a32);
double a12inv = -determinv * (a12*a33 - a13*a32);
double a13inv = determinv * (a12*a23 - a13*a22);
double a21inv = -determinv * (a21*a33 - a23*a31);
double a22inv = determinv * (a11*a33 - a13*a31);
double a23inv = -determinv * (a11*a23 - a13*a21);
double a31inv = determinv * (a21*a32 - a22*a31);
double a32inv = -determinv * (a11*a32 - a12*a31);
double a33inv = determinv * (a11*a22 - a12*a21);
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double r0103 = (r01[0]*r03[0] + r01[1]*r03[1] + r01[2]*r03[2]);
double r0203 = (r02[0]*r03[0] + r02[1]*r03[1] + r02[2]*r03[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_0303 = invmass0*invmass0 * r03sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103;
double quad1_0203 = 2.0 * invmass0*invmass0 * r0203;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_0303 = invmass0*invmass0 * r03sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
double quad2_0103 = 2.0 * invmass0*invmass0 * r0103;
double quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203;
double quad3_0101 = invmass0*invmass0 * r01sq;
double quad3_0202 = invmass0*invmass0 * r02sq;
double quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq;
double quad3_0102 = 2.0 * invmass0*invmass0 * r0102;
double quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103;
double quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
double lamda03 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda03_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 +
quad1_0202 * lamda02*lamda02 +
quad1_0303 * lamda03*lamda03 +
quad1_0102 * lamda01*lamda02 +
quad1_0103 * lamda01*lamda03 +
quad1_0203 * lamda02*lamda03;
quad2 = quad2_0101 * lamda01*lamda01 +
quad2_0202 * lamda02*lamda02 +
quad2_0303 * lamda03*lamda03 +
quad2_0102 * lamda01*lamda02 +
quad2_0103 * lamda01*lamda03 +
quad2_0203 * lamda02*lamda03;
quad3 = quad3_0101 * lamda01*lamda01 +
quad3_0202 * lamda02*lamda02 +
quad3_0303 * lamda03*lamda03 +
quad3_0102 * lamda01*lamda02 +
quad3_0103 * lamda01*lamda03 +
quad3_0203 * lamda02*lamda03;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
b3 = bond3*bond3 - s03sq - quad3;
lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3;
lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3;
lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
if (fabs(lamda03_new-lamda03) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
lamda03 = lamda03_new;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
lamda03 = lamda03/dtfsq;
if (i0 < nlocal) {
f[i0][0] += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0];
f[i0][1] += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1];
f[i0][2] += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2];
}
if (i1 < nlocal) {
f[i1][0] -= lamda01*r01[0];
f[i1][1] -= lamda01*r01[1];
f[i1][2] -= lamda01*r01[2];
}
if (i2 < nlocal) {
f[i2][0] -= lamda02*r02[0];
f[i2][1] -= lamda02*r02[1];
f[i2][2] -= lamda02*r02[2];
}
if (i3 < nlocal) {
f[i3][0] -= lamda03*r03[0];
f[i3][1] -= lamda03*r03[1];
f[i3][2] -= lamda03*r03[2];
}
if (evflag) {
nlist = 0;
if (i0 < nlocal) list[nlist++] = i0;
if (i1 < nlocal) list[nlist++] = i1;
if (i2 < nlocal) list[nlist++] = i2;
if (i3 < nlocal) list[nlist++] = i3;
v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda03*r03[0]*r03[0];
v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda03*r03[1]*r03[1];
v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda03*r03[2]*r03[2];
v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda03*r03[0]*r03[1];
v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2];
v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2];
//if(i0==7271) printf("%lf %lf %lf %lf %lf %lf\n",v[0],v[1],v[2],v[3],v[4],v[5]);
v_tally(nlist,list,4.0,v);
}
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::shake3angle(int m)
{
int nlist,list[3];
double v[6];
double invmass0,invmass1,invmass2;
// local atom IDs and constraint distances
int i0 = atom->map(shake_atom[m][0]);
int i1 = atom->map(shake_atom[m][1]);
int i2 = atom->map(shake_atom[m][2]);
double bond1 = bond_distance[shake_type[m][0]];
double bond2 = bond_distance[shake_type[m][1]];
double bond12 = angle_distance[shake_type[m][2]];
// r01,r02,r12 = distance vec between atoms, with PBC
double r01[3];
r01[0] = x[i0][0] - x[i1][0];
r01[1] = x[i0][1] - x[i1][1];
r01[2] = x[i0][2] - x[i1][2];
domain->minimum_image(r01);
double r02[3];
r02[0] = x[i0][0] - x[i2][0];
r02[1] = x[i0][1] - x[i2][1];
r02[2] = x[i0][2] - x[i2][2];
domain->minimum_image(r02);
double r12[3];
r12[0] = x[i1][0] - x[i2][0];
r12[1] = x[i1][1] - x[i2][1];
r12[2] = x[i1][2] - x[i2][2];
domain->minimum_image(r12);
// s01,s02,s12 = distance vec after unconstrained update, with PBC
double s01[3];
s01[0] = xshake[i0][0] - xshake[i1][0];
s01[1] = xshake[i0][1] - xshake[i1][1];
s01[2] = xshake[i0][2] - xshake[i1][2];
domain->minimum_image(s01);
double s02[3];
s02[0] = xshake[i0][0] - xshake[i2][0];
s02[1] = xshake[i0][1] - xshake[i2][1];
s02[2] = xshake[i0][2] - xshake[i2][2];
domain->minimum_image(s02);
double s12[3];
s12[0] = xshake[i1][0] - xshake[i2][0];
s12[1] = xshake[i1][1] - xshake[i2][1];
s12[2] = xshake[i1][2] - xshake[i2][2];
domain->minimum_image(s12);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double r12sq = r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
double s12sq = s12[0]*s12[0] + s12[1]*s12[1] + s12[2]*s12[2];
// matrix coeffs and rhs for lamda equations
if (rmass) {
invmass0 = 1.0/rmass[i0];
invmass1 = 1.0/rmass[i1];
invmass2 = 1.0/rmass[i2];
} else {
invmass0 = 1.0/mass[type[i0]];
invmass1 = 1.0/mass[type[i1]];
invmass2 = 1.0/mass[type[i2]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a13 = - 2.0 * invmass1 *
(s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
double a23 = 2.0 * invmass2 *
(s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]);
double a31 = - 2.0 * invmass1 *
(s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]);
double a32 = 2.0 * invmass2 *
(s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]);
double a33 = 2.0 * (invmass1+invmass2) *
(s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]);
// inverse of matrix
double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 -
a11*a23*a32 - a12*a21*a33 - a13*a22*a31;
if (determ == 0.0) error->one("Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = determinv * (a22*a33 - a23*a32);
double a12inv = -determinv * (a12*a33 - a13*a32);
double a13inv = determinv * (a12*a23 - a13*a22);
double a21inv = -determinv * (a21*a33 - a23*a31);
double a22inv = determinv * (a11*a33 - a13*a31);
double a23inv = -determinv * (a11*a23 - a13*a21);
double a31inv = determinv * (a21*a32 - a22*a31);
double a32inv = -determinv * (a11*a32 - a12*a31);
double a33inv = determinv * (a11*a22 - a12*a21);
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double r0112 = (r01[0]*r12[0] + r01[1]*r12[1] + r01[2]*r12[2]);
double r0212 = (r02[0]*r12[0] + r02[1]*r12[1] + r02[2]*r12[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_1212 = invmass1*invmass1 * r12sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112;
double quad1_0212 = - 2.0 * invmass0*invmass1 * r0212;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_1212 = invmass2*invmass2 * r12sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
double quad2_0112 = 2.0 * invmass0*invmass2 * r0112;
double quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212;
double quad3_0101 = invmass1*invmass1 * r01sq;
double quad3_0202 = invmass2*invmass2 * r02sq;
double quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq;
double quad3_0102 = - 2.0 * invmass1*invmass2 * r0102;
double quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112;
double quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
double lamda12 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 +
quad1_0202 * lamda02*lamda02 +
quad1_1212 * lamda12*lamda12 +
quad1_0102 * lamda01*lamda02 +
quad1_0112 * lamda01*lamda12 +
quad1_0212 * lamda02*lamda12;
quad2 = quad2_0101 * lamda01*lamda01 +
quad2_0202 * lamda02*lamda02 +
quad2_1212 * lamda12*lamda12 +
quad2_0102 * lamda01*lamda02 +
quad2_0112 * lamda01*lamda12 +
quad2_0212 * lamda02*lamda12;
quad3 = quad3_0101 * lamda01*lamda01 +
quad3_0202 * lamda02*lamda02 +
quad3_1212 * lamda12*lamda12 +
quad3_0102 * lamda01*lamda02 +
quad3_0112 * lamda01*lamda12 +
quad3_0212 * lamda02*lamda12;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
b3 = bond12*bond12 - s12sq - quad3;
lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3;
lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3;
lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
if (fabs(lamda12_new-lamda12) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
lamda12 = lamda12_new;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
lamda12 = lamda12/dtfsq;
if (i0 < nlocal) {
f[i0][0] += lamda01*r01[0] + lamda02*r02[0];
f[i0][1] += lamda01*r01[1] + lamda02*r02[1];
f[i0][2] += lamda01*r01[2] + lamda02*r02[2];
}
if (i1 < nlocal) {
f[i1][0] -= lamda01*r01[0] - lamda12*r12[0];
f[i1][1] -= lamda01*r01[1] - lamda12*r12[1];
f[i1][2] -= lamda01*r01[2] - lamda12*r12[2];
}
if (i2 < nlocal) {
f[i2][0] -= lamda02*r02[0] + lamda12*r12[0];
f[i2][1] -= lamda02*r02[1] + lamda12*r12[1];
f[i2][2] -= lamda02*r02[2] + lamda12*r12[2];
}
if (evflag) {
nlist = 0;
if (i0 < nlocal) list[nlist++] = i0;
if (i1 < nlocal) list[nlist++] = i1;
if (i2 < nlocal) list[nlist++] = i2;
v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda12*r12[0]*r12[0];
v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda12*r12[1]*r12[1];
v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda12*r12[2]*r12[2];
v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda12*r12[0]*r12[1];
v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2];
v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2];
v_tally(nlist,list,3.0,v);
}
}
/* ----------------------------------------------------------------------
print-out bond & angle statistics
------------------------------------------------------------------------- */
void FixShakeCuda::stats()
{
int i,j,m,n,iatom,jatom,katom;
double delx,dely,delz;
double r,r1,r2,r3,angle;
// zero out accumulators
int nb = atom->nbondtypes + 1;
int na = atom->nangletypes + 1;
for (i = 0; i < nb; i++) {
b_count[i] = 0;
b_ave[i] = b_max[i] = 0.0;
b_min[i] = BIG;
}
for (i = 0; i < na; i++) {
a_count[i] = 0;
a_ave[i] = a_max[i] = 0.0;
a_min[i] = BIG;
}
// log stats for each bond & angle
// OK to double count since are just averaging
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
// bond stats
n = shake_flag[i];
if (n == 1) n = 3;
iatom = atom->map(shake_atom[i][0]);
for (j = 1; j < n; j++) {
jatom = atom->map(shake_atom[i][j]);
delx = x[iatom][0] - x[jatom][0];
dely = x[iatom][1] - x[jatom][1];
delz = x[iatom][2] - x[jatom][2];
domain->minimum_image(delx,dely,delz);
r = sqrt(delx*delx + dely*dely + delz*delz);
m = shake_type[i][j-1];
b_count[m]++;
b_ave[m] += r;
b_max[m] = MAX(b_max[m],r);
b_min[m] = MIN(b_min[m],r);
}
// angle stats
if (shake_flag[i] == 1) {
iatom = atom->map(shake_atom[i][0]);
jatom = atom->map(shake_atom[i][1]);
katom = atom->map(shake_atom[i][2]);
delx = x[iatom][0] - x[jatom][0];
dely = x[iatom][1] - x[jatom][1];
delz = x[iatom][2] - x[jatom][2];
domain->minimum_image(delx,dely,delz);
r1 = sqrt(delx*delx + dely*dely + delz*delz);
delx = x[iatom][0] - x[katom][0];
dely = x[iatom][1] - x[katom][1];
delz = x[iatom][2] - x[katom][2];
domain->minimum_image(delx,dely,delz);
r2 = sqrt(delx*delx + dely*dely + delz*delz);
delx = x[jatom][0] - x[katom][0];
dely = x[jatom][1] - x[katom][1];
delz = x[jatom][2] - x[katom][2];
domain->minimum_image(delx,dely,delz);
r3 = sqrt(delx*delx + dely*dely + delz*delz);
angle = acos((r1*r1 + r2*r2 - r3*r3) / (2.0*r1*r2));
angle *= 180.0/PI;
m = shake_type[i][2];
a_count[m]++;
a_ave[m] += angle;
a_max[m] = MAX(a_max[m],angle);
a_min[m] = MIN(a_min[m],angle);
}
}
// sum across all procs
MPI_Allreduce(b_count,b_count_all,nb,MPI_INT,MPI_SUM,world);
MPI_Allreduce(b_ave,b_ave_all,nb,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(b_max,b_max_all,nb,MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(b_min,b_min_all,nb,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(a_count,a_count_all,na,MPI_INT,MPI_SUM,world);
MPI_Allreduce(a_ave,a_ave_all,na,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(a_max,a_max_all,na,MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(a_min,a_min_all,na,MPI_DOUBLE,MPI_MIN,world);
// print stats only for non-zero counts
if (me == 0) {
if (screen) {
fprintf(screen,
"SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n",
update->ntimestep);
for (i = 1; i < nb; i++)
if (b_count_all[i])
fprintf(screen," %d %g %g\n",i,
b_ave_all[i]/b_count_all[i],b_max_all[i]-b_min_all[i]);
for (i = 1; i < na; i++)
if (a_count_all[i])
fprintf(screen," %d %g %g\n",i,
a_ave_all[i]/a_count_all[i],a_max_all[i]-a_min_all[i]);
}
if (logfile) {
fprintf(logfile,
"SHAKE stats (type/ave/delta) on step " BIGINT_FORMAT "\n",
update->ntimestep);
for (i = 0; i < nb; i++)
if (b_count_all[i])
fprintf(logfile," %d %g %g\n",i,
b_ave_all[i]/b_count_all[i],b_max_all[i]-b_min_all[i]);
for (i = 0; i < na; i++)
if (a_count_all[i])
fprintf(logfile," %d %g %g\n",i,
a_ave_all[i]/a_count_all[i],a_max_all[i]-a_min_all[i]);
}
}
// next timestep for stats
next_output += output_every;
}
/* ----------------------------------------------------------------------
find a bond between global tags n1 and n2 stored with local atom i
return -1 if don't find it
return bond index if do find it
------------------------------------------------------------------------- */
int FixShakeCuda::bondfind(int i, int n1, int n2)
{
int *tag = atom->tag;
int **bond_atom = atom->bond_atom;
int nbonds = atom->num_bond[i];
int m;
for (m = 0; m < nbonds; m++) {
if (n1 == tag[i] && n2 == bond_atom[i][m]) break;
if (n1 == bond_atom[i][m] && n2 == tag[i]) break;
}
if (m < nbonds) return m;
return -1;
}
/* ----------------------------------------------------------------------
find an angle with global end atoms n1 and n2 stored with local atom i
return -1 if don't find it
return angle index if do find it
------------------------------------------------------------------------- */
int FixShakeCuda::anglefind(int i, int n1, int n2)
{
int **angle_atom1 = atom->angle_atom1;
int **angle_atom3 = atom->angle_atom3;
int nangles = atom->num_angle[i];
int m;
for (m = 0; m < nangles; m++) {
if (n1 == angle_atom1[i][m] && n2 == angle_atom3[i][m]) break;
if (n1 == angle_atom3[i][m] && n2 == angle_atom1[i][m]) break;
}
if (m < nangles) return m;
return -1;
}
/* ----------------------------------------------------------------------
memory usage of local atom-based arrays
------------------------------------------------------------------------- */
double FixShakeCuda::memory_usage()
{
int nmax = atom->nmax;
double bytes = nmax * sizeof(int);
bytes += nmax*4 * sizeof(int);
bytes += nmax*3 * sizeof(int);
bytes += nmax*3 * sizeof(double);
bytes += maxvatom*6 * sizeof(double);
return bytes;
}
/* ----------------------------------------------------------------------
allocate local atom-based arrays
------------------------------------------------------------------------- */
void FixShakeCuda::grow_arrays(int nmax)
{
memory->grow(shake_flag,nmax,"shake:shake_flag");
memory->grow(shake_atom,nmax,4,"shake:shake_atom");
memory->grow(shake_type,nmax,3,"shake:shake_type");
memory->destroy(xshake);
memory->create(xshake,nmax,3,"shake:xshake");
delete cu_shake_flag; cu_shake_flag = new cCudaData<int, int, xx > (shake_flag, nmax );
delete cu_shake_atom; cu_shake_atom = new cCudaData<int, int, yx> ((int*)shake_atom, nmax, 4);
delete cu_shake_type; cu_shake_type = new cCudaData<int, int, yx> ((int*)shake_type, nmax, 3);
delete cu_xshake; cu_xshake = new cCudaData<double, X_FLOAT, xy> ((double*)xshake, nmax, 3);
cu_shake_flag->upload();
cu_shake_atom->upload();
cu_shake_type->upload();
if(cu_bond_distance)
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
/* ----------------------------------------------------------------------
copy values within local atom-based arrays
------------------------------------------------------------------------- */
void FixShakeCuda::copy_arrays(int i, int j)
{
int flag = shake_flag[j] = shake_flag[i];
if (flag == 1) {
shake_atom[j][0] = shake_atom[i][0];
shake_atom[j][1] = shake_atom[i][1];
shake_atom[j][2] = shake_atom[i][2];
shake_type[j][0] = shake_type[i][0];
shake_type[j][1] = shake_type[i][1];
shake_type[j][2] = shake_type[i][2];
} else if (flag == 2) {
shake_atom[j][0] = shake_atom[i][0];
shake_atom[j][1] = shake_atom[i][1];
shake_type[j][0] = shake_type[i][0];
} else if (flag == 3) {
shake_atom[j][0] = shake_atom[i][0];
shake_atom[j][1] = shake_atom[i][1];
shake_atom[j][2] = shake_atom[i][2];
shake_type[j][0] = shake_type[i][0];
shake_type[j][1] = shake_type[i][1];
} else if (flag == 4) {
shake_atom[j][0] = shake_atom[i][0];
shake_atom[j][1] = shake_atom[i][1];
shake_atom[j][2] = shake_atom[i][2];
shake_atom[j][3] = shake_atom[i][3];
shake_type[j][0] = shake_type[i][0];
shake_type[j][1] = shake_type[i][1];
shake_type[j][2] = shake_type[i][2];
}
}
/* ----------------------------------------------------------------------
initialize one atom's array values, called when atom is created
------------------------------------------------------------------------- */
void FixShakeCuda::set_arrays(int i)
{
shake_flag[i] = 0;
}
/* ----------------------------------------------------------------------
pack values in local atom-based arrays for exchange with another proc
------------------------------------------------------------------------- */
int FixShakeCuda::pack_exchange(int i, double *buf)
{
int m = 0;
buf[m++] = shake_flag[i];
int flag = shake_flag[i];
if (flag == 1) {
buf[m++] = shake_atom[i][0];
buf[m++] = shake_atom[i][1];
buf[m++] = shake_atom[i][2];
buf[m++] = shake_type[i][0];
buf[m++] = shake_type[i][1];
buf[m++] = shake_type[i][2];
} else if (flag == 2) {
buf[m++] = shake_atom[i][0];
buf[m++] = shake_atom[i][1];
buf[m++] = shake_type[i][0];
} else if (flag == 3) {
buf[m++] = shake_atom[i][0];
buf[m++] = shake_atom[i][1];
buf[m++] = shake_atom[i][2];
buf[m++] = shake_type[i][0];
buf[m++] = shake_type[i][1];
} else if (flag == 4) {
buf[m++] = shake_atom[i][0];
buf[m++] = shake_atom[i][1];
buf[m++] = shake_atom[i][2];
buf[m++] = shake_atom[i][3];
buf[m++] = shake_type[i][0];
buf[m++] = shake_type[i][1];
buf[m++] = shake_type[i][2];
}
return m;
}
/* ----------------------------------------------------------------------
unpack values in local atom-based arrays from exchange with another proc
------------------------------------------------------------------------- */
int FixShakeCuda::unpack_exchange(int nlocal, double *buf)
{
int m = 0;
int flag = shake_flag[nlocal] = static_cast<int> (buf[m++]);
if (flag == 1) {
shake_atom[nlocal][0] = static_cast<int> (buf[m++]);
shake_atom[nlocal][1] = static_cast<int> (buf[m++]);
shake_atom[nlocal][2] = static_cast<int> (buf[m++]);
shake_type[nlocal][0] = static_cast<int> (buf[m++]);
shake_type[nlocal][1] = static_cast<int> (buf[m++]);
shake_type[nlocal][2] = static_cast<int> (buf[m++]);
} else if (flag == 2) {
shake_atom[nlocal][0] = static_cast<int> (buf[m++]);
shake_atom[nlocal][1] = static_cast<int> (buf[m++]);
shake_type[nlocal][0] = static_cast<int> (buf[m++]);
} else if (flag == 3) {
shake_atom[nlocal][0] = static_cast<int> (buf[m++]);
shake_atom[nlocal][1] = static_cast<int> (buf[m++]);
shake_atom[nlocal][2] = static_cast<int> (buf[m++]);
shake_type[nlocal][0] = static_cast<int> (buf[m++]);
shake_type[nlocal][1] = static_cast<int> (buf[m++]);
} else if (flag == 4) {
shake_atom[nlocal][0] = static_cast<int> (buf[m++]);
shake_atom[nlocal][1] = static_cast<int> (buf[m++]);
shake_atom[nlocal][2] = static_cast<int> (buf[m++]);
shake_atom[nlocal][3] = static_cast<int> (buf[m++]);
shake_type[nlocal][0] = static_cast<int> (buf[m++]);
shake_type[nlocal][1] = static_cast<int> (buf[m++]);
shake_type[nlocal][2] = static_cast<int> (buf[m++]);
}
return m;
}
/* ----------------------------------------------------------------------
enforce SHAKE constraints from rRESPA
prediction portion is different than Verlet
rRESPA updating of atom coords is done with full v, but only portions of f
------------------------------------------------------------------------- */
/*
void FixShakeCuda::post_force_respa(int vflag, int ilevel, int iloop)
{
// call stats only on outermost level
if (ilevel == nlevels_respa-1 && update->ntimestep == next_output) stats();
// perform SHAKE on every loop iteration of every rRESPA level
// except last loop iteration of inner levels
if (ilevel < nlevels_respa-1 && iloop == loop_respa[ilevel]-1) return;
// xshake = atom coords after next x update in innermost loop
// depends on rRESPA level
// for levels > 0 this includes more than one velocity update
// xshake = predicted position from call to this routine at level N =
// x + dt0 (v + dtN/m fN + 1/2 dt(N-1)/m f(N-1) + ... + 1/2 dt0/m f0)
double ***f_level = ((FixRespa *) modify->fix[ifix_respa])->f_level;
dtfsq = dtf_inner * step_respa[ilevel];
double invmass,dtfmsq;
int jlevel;
if (rmass) {
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i]) {
invmass = 1.0 / rmass[i];
dtfmsq = dtfsq * invmass;
xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0];
xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1];
xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2];
for (jlevel = 0; jlevel < ilevel; jlevel++) {
dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass;
xshake[i][0] += dtfmsq*f_level[i][jlevel][0];
xshake[i][1] += dtfmsq*f_level[i][jlevel][1];
xshake[i][2] += dtfmsq*f_level[i][jlevel][2];
}
} else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0;
}
} else {
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i]) {
invmass = 1.0 / mass[type[i]];
dtfmsq = dtfsq * invmass;
xshake[i][0] = x[i][0] + dtv*v[i][0] + dtfmsq*f[i][0];
xshake[i][1] = x[i][1] + dtv*v[i][1] + dtfmsq*f[i][1];
xshake[i][2] = x[i][2] + dtv*v[i][2] + dtfmsq*f[i][2];
for (jlevel = 0; jlevel < ilevel; jlevel++) {
dtfmsq = dtf_innerhalf * step_respa[jlevel] * invmass;
xshake[i][0] += dtfmsq*f_level[i][jlevel][0];
xshake[i][1] += dtfmsq*f_level[i][jlevel][1];
xshake[i][2] += dtfmsq*f_level[i][jlevel][2];
}
} else xshake[i][2] = xshake[i][1] = xshake[i][0] = 0.0;
}
}
// communicate results if necessary
if (nprocs > 1) comm->forward_comm_fix(this);
// virial setup
if (vflag) v_setup(vflag);
else evflag = 0;
// loop over clusters
int m;
for (int i = 0; i < nlist; i++) {
m = list[i];
if (shake_flag[m] == 2) shake2(m);
else if (shake_flag[m] == 3) shake3(m);
else if (shake_flag[m] == 4) shake4(m);
else shake3angle(m);
}
}
/* ---------------------------------------------------------------------- */
int FixShakeCuda::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
if(cuda->finished_setup)
{
int iswap=*list;
if(iswap<0)
{
iswap=-iswap-1;
int first= ((int*) buf)[0];
Cuda_FixShakeCuda_PackComm_Self(&cuda->shared_data,n,iswap,first,pbc,pbc_flag);
}
else
Cuda_FixShakeCuda_PackComm(&cuda->shared_data,n,iswap,(void*) buf,pbc,pbc_flag);
return 3;
}
int i,j,m;
double dx,dy,dz;
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = xshake[j][0];
buf[m++] = xshake[j][1];
buf[m++] = xshake[j][2];
}
} else {
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = xshake[j][0] + dx;
buf[m++] = xshake[j][1] + dy;
buf[m++] = xshake[j][2] + dz;
}
}
return 3;
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::unpack_comm(int n, int first, double *buf)
{
if(cuda->finished_setup)
{
Cuda_FixShakeCuda_UnpackComm(&cuda->shared_data,n,first,(void*)buf);
return;
}
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
xshake[i][0] = buf[m++];
xshake[i][1] = buf[m++];
xshake[i][2] = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
void FixShakeCuda::reset_dt()
{
if (strstr(update->integrate_style,"verlet")) {
dtv = update->dt;
dtfsq = update->dt * update->dt * force->ftm2v;
} else {
dtv = step_respa[0];
dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v;
dtf_inner = step_respa[0] * force->ftm2v;
}
if(cu_shake_atom)
Cuda_FixShakeCuda_Init(&cuda->shared_data,dtv, dtfsq,
cu_shake_flag->dev_data(),cu_shake_atom->dev_data(),cu_shake_type->dev_data(), cu_xshake->dev_data(),
cu_bond_distance->dev_data(),cu_angle_distance->dev_data(),cu_virial->dev_data(),
max_iter,tolerance);
}
Event Timeline
Log In to Comment