diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index e54b53ae8..5a1d3c7f1 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -1,1192 +1,1193 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Ray Shan (SNL), Stan Moore (SNL) ------------------------------------------------------------------------- */ #include #include #include #include #include "fix_qeq_reax_kokkos.h" #include "kokkos.h" #include "atom.h" #include "atom_masks.h" #include "atom_kokkos.h" #include "comm.h" #include "force.h" #include "group.h" #include "modify.h" #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" #include "update.h" #include "integrate.h" #include "respa.h" #include "math_const.h" #include "memory.h" #include "error.h" #include "pair_reaxc_kokkos.h" using namespace LAMMPS_NS; using namespace FixConst; #define SMALL 0.0001 #define EV_TO_KCAL_PER_MOL 14.4 #define TEAMSIZE 128 /* ---------------------------------------------------------------------- */ template FixQEqReaxKokkos:: FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : FixQEqReax(lmp, narg, arg) { kokkosable = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK; datamask_modify = Q_MASK | X_MASK; nmax = nmax = m_cap = 0; allocated_flag = 0; + nprev = 4; } /* ---------------------------------------------------------------------- */ template FixQEqReaxKokkos::~FixQEqReaxKokkos() { if (copymode) return; } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::init() { atomKK->k_q.modify(); atomKK->k_q.sync(); FixQEqReax::init(); neighflag = lmp->kokkos->neighflag_qeq; int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> kokkos_host = Kokkos::Impl::is_same::value && !Kokkos::Impl::is_same::value; neighbor->requests[irequest]-> kokkos_device = Kokkos::Impl::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->fix = 1; neighbor->requests[irequest]->pair = 0; neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; } else { //if (neighflag == HALF || neighflag == HALFTHREAD) neighbor->requests[irequest]->fix = 1; neighbor->requests[irequest]->pair = 0; neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; neighbor->requests[irequest]->ghost = 1; } int ntypes = atom->ntypes; k_params = Kokkos::DualView ("FixQEqReax::params",ntypes+1); params = k_params.template view(); for (n = 1; n <= ntypes; n++) { k_params.h_view(n).chi = chi[n]; k_params.h_view(n).eta = eta[n]; k_params.h_view(n).gamma = gamma[n]; } k_params.template modify(); cutsq = swb * swb; init_shielding_k(); init_hist(); } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::init_shielding_k() { int i,j; int ntypes = atom->ntypes; k_shield = DAT::tdual_ffloat_2d("qeq/kk:shield",ntypes+1,ntypes+1); d_shield = k_shield.template view(); for( i = 1; i <= ntypes; ++i ) for( j = 1; j <= ntypes; ++j ) k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 ); k_shield.template modify(); k_shield.template sync(); k_tap = DAT::tdual_ffloat_1d("qeq/kk:tap",8); d_tap = k_tap.template view(); for (i = 0; i < 8; i ++) k_tap.h_view(i) = Tap[i]; k_tap.template modify(); k_tap.template sync(); } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::init_hist() { int i,j; - k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,5); + k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,nprev); d_s_hist = k_s_hist.template view(); h_s_hist = k_s_hist.h_view; - k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,5); + k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,nprev); d_t_hist = k_t_hist.template view(); h_t_hist = k_t_hist.h_view; for( i = 0; i < atom->nmax; i++ ) - for( j = 0; j < 5; j++ ) + for( j = 0; j < nprev; j++ ) k_s_hist.h_view(i,j) = k_t_hist.h_view(i,j) = 0.0; k_s_hist.template modify(); k_s_hist.template sync(); k_t_hist.template modify(); k_t_hist.template sync(); } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::setup_pre_force(int vflag) { //neighbor->build_one(list); pre_force(vflag); } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::pre_force(int vflag) { if (update->ntimestep % nevery) return; atomKK->sync(execution_space,datamask_read); atomKK->modified(execution_space,datamask_modify); x = atomKK->k_x.view(); v = atomKK->k_v.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); tag = atomKK->k_tag.view(); type = atomKK->k_type.view(); mask = atomKK->k_mask.view(); nlocal = atomKK->nlocal; nall = atom->nlocal + atom->nghost; newton_pair = force->newton_pair; k_params.template sync(); k_shield.template sync(); k_tap.template sync(); NeighListKokkos* k_list = static_cast*>(list); d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; inum = list->inum; copymode = 1; int teamsize = TEAMSIZE; // allocate allocate_array(); // get max number of neighbor if (!allocated_flag || update->ntimestep == neighbor->lastcall) allocate_matrix(); // compute_H FixQEqReaxKokkosComputeHFunctor computeH_functor(this); Kokkos::parallel_scan(inum,computeH_functor); // init_matvec FixQEqReaxKokkosMatVecFunctor matvec_functor(this); Kokkos::parallel_for(inum,matvec_functor); // comm->forward_comm_fix(this); //Dist_vector( s ); pack_flag = 2; k_s.template modify(); k_s.template sync(); comm->forward_comm_fix(this); k_s.template modify(); k_s.template sync(); // comm->forward_comm_fix(this); //Dist_vector( t ); pack_flag = 3; k_t.template modify(); k_t.template sync(); comm->forward_comm_fix(this); k_t.template modify(); k_t.template sync(); // 1st cg solve over b_s, s cg_solve1(); // 2nd cg solve over b_t, t cg_solve2(); // calculate_Q(); calculate_q(); copymode = 0; if (!allocated_flag) allocated_flag = 1; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::num_neigh_item(int ii, int &maxneigh) const { const int i = d_ilist[ii]; maxneigh += d_numneigh[i]; } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::allocate_matrix() { int i,ii,m; const int inum = list->inum; nmax = atom->nmax; // determine the total space for the H matrix m_cap = 0; FixQEqReaxKokkosNumNeighFunctor neigh_functor(this); Kokkos::parallel_reduce(inum,neigh_functor,m_cap); d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax); d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax); d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap); d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap); } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::allocate_array() { if (atom->nmax > nmax) { nmax = atom->nmax; k_o = DAT::tdual_ffloat_1d("qeq/kk:h_o",nmax); d_o = k_o.template view(); h_o = k_o.h_view; d_Hdia_inv = typename AT::t_ffloat_1d("qeq/kk:h_Hdia_inv",nmax); d_b_s = typename AT::t_ffloat_1d("qeq/kk:h_b_s",nmax); d_b_t = typename AT::t_ffloat_1d("qeq/kk:h_b_t",nmax); k_s = DAT::tdual_ffloat_1d("qeq/kk:h_s",nmax); d_s = k_s.template view(); h_s = k_s.h_view; k_t = DAT::tdual_ffloat_1d("qeq/kk:h_t",nmax); d_t = k_t.template view(); h_t = k_t.h_view; d_p = typename AT::t_ffloat_1d("qeq/kk:h_p",nmax); d_r = typename AT::t_ffloat_1d("qeq/kk:h_r",nmax); k_d = DAT::tdual_ffloat_1d("qeq/kk:h_d",nmax); d_d = k_d.template view(); h_d = k_d.h_view; - k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,5); + k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,nprev); d_s_hist = k_s_hist.template view(); h_s_hist = k_s_hist.h_view; - k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,5); + k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,nprev); d_t_hist = k_t_hist.template view(); h_t_hist = k_t_hist.h_view; } // init_storage const int ignum = atom->nlocal + atom->nghost; FixQEqReaxKokkosZeroFunctor zero_functor(this); Kokkos::parallel_for(ignum,zero_functor); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::zero_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); if (mask[i] & groupbit) { d_Hdia_inv[i] = 1.0 / params(itype).eta; d_b_s[i] = -params(itype).chi; d_b_t[i] = -1.0; d_s[i] = 0.0; d_t[i] = 0.0; d_p[i] = 0.0; d_o[i] = 0.0; d_r[i] = 0.0; d_d[i] = 0.0; - //for( int j = 0; j < 5; j++ ) + //for( int j = 0; j < nprev; j++ ) //d_s_hist(i,j) = d_t_hist(i,j) = 0.0; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const { const int i = d_ilist[ii]; int j,jj,jtype,flag; if (mask[i] & groupbit) { const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); const int itype = type(i); const tagint itag = tag(i); const int jnum = d_numneigh[i]; if (final) d_firstnbr[i] = m_fill; for (jj = 0; jj < jnum; jj++) { j = d_neighbors(i,jj); j &= NEIGHMASK; jtype = type(j); const X_FLOAT delx = x(j,0) - xtmp; const X_FLOAT dely = x(j,1) - ytmp; const X_FLOAT delz = x(j,2) - ztmp; if (neighflag != FULL) { const tagint jtag = tag(j); flag = 0; if (j < nlocal) flag = 1; else if (itag < jtag) flag = 1; else if (itag == jtag) { if (delz > SMALL) flag = 1; else if (fabs(delz) < SMALL) { if (dely > SMALL) flag = 1; else if (fabs(dely) < SMALL && delx > SMALL) flag = 1; } } if (!flag) continue; } const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq > cutsq) continue; if (final) { const F_FLOAT r = sqrt(rsq); d_jlist(m_fill) = j; const F_FLOAT shldij = d_shield(itype,jtype); d_val(m_fill) = calculate_H_k(r,shldij); } m_fill++; } if (final) d_numnbrs[i] = m_fill - d_firstnbr[i]; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::calculate_H_k(const F_FLOAT &r, const F_FLOAT &shld) const { F_FLOAT taper, denom; taper = d_tap[7] * r + d_tap[6]; taper = taper * r + d_tap[5]; taper = taper * r + d_tap[4]; taper = taper * r + d_tap[3]; taper = taper * r + d_tap[2]; taper = taper * r + d_tap[1]; taper = taper * r + d_tap[0]; denom = r * r * r + shld; denom = pow(denom,0.3333333333333); return taper * EV_TO_KCAL_PER_MOL / denom; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::mat_vec_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); if (mask[i] & groupbit) { d_Hdia_inv[i] = 1.0 / params(itype).eta; d_b_s[i] = -params(itype).chi; d_b_t[i] = -1.0; d_t[i] = d_t_hist(i,2) + 3*(d_t_hist(i,0) - d_t_hist(i,1)); d_s[i] = 4*(d_s_hist(i,0)+d_s_hist(i,2))-(6*d_s_hist(i,1)+d_s_hist(i,3)); } } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::cg_solve1() // b = b_s, x = s; { const int inum = list->inum; const int ignum = inum + list->gnum; F_FLOAT tmp, sig_old, b_norm; const int teamsize = TEAMSIZE; // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor sparse12_functor(this); Kokkos::parallel_for(inum,sparse12_functor); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+atom->nghost),*this); if (neighflag == HALF) { FixQEqReaxKokkosSparse13Functor sparse13_functor(this); Kokkos::parallel_for(inum,sparse13_functor); } else { FixQEqReaxKokkosSparse13Functor sparse13_functor(this); Kokkos::parallel_for(inum,sparse13_functor); } } else { Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); } if (neighflag != FULL) { k_o.template modify(); k_o.template sync(); comm->reverse_comm_fix(this); //Coll_vector( q ); k_o.template modify(); k_o.template sync(); } // vector_sum( r , 1., b, -1., q, nn ); // preconditioning: d[j] = r[j] * Hdia_inv[j]; // b_norm = parallel_norm( b, nn ); F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm1Functor norm1_functor(this); Kokkos::parallel_reduce(inum,norm1_functor,my_norm); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; int loop; const int loopmax = 200; for (loop = 1; loop < loopmax & sqrt(sig_new)/b_norm > tolerance; loop++) { // comm->forward_comm_fix(this); //Dist_vector( d ); pack_flag = 1; k_d.template modify(); k_d.template sync(); comm->forward_comm_fix(this); k_d.template modify(); k_d.template sync(); // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+atom->nghost),*this); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); } else { FixQEqReaxKokkosSparse23Functor sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); } } else { Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); } if (neighflag != FULL) { k_o.template modify(); k_o.template sync(); comm->reverse_comm_fix(this); //Coll_vector( q ); k_o.template modify(); k_o.template sync(); } // tmp = parallel_dot( d, q, nn); my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; alpha = sig_new / tmp; sig_old = sig_new; // vector_add( s, alpha, d, nn ); // vector_add( r, -alpha, q, nn ); my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon1Functor precon1_functor(this); Kokkos::parallel_for(inum,precon1_functor); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; beta = sig_new / sig_old; // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); } if (loop >= loopmax && comm->me == 0) { char str[128]; sprintf(str,"Fix qeq/reax cg_solve1 convergence failed after %d iterations " "at " BIGINT_FORMAT " step: %f",loop,update->ntimestep,sqrt(sig_new)/b_norm); error->warning(FLERR,str); //error->all(FLERR,str); } } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::cg_solve2() // b = b_t, x = t; { const int inum = list->inum; const int ignum = inum + list->gnum; F_FLOAT tmp, sig_old, b_norm; const int teamsize = TEAMSIZE; // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse32Functor sparse32_functor(this); Kokkos::parallel_for(inum,sparse32_functor); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+atom->nghost),*this); if (neighflag == HALF) { FixQEqReaxKokkosSparse33Functor sparse33_functor(this); Kokkos::parallel_for(inum,sparse33_functor); } else { FixQEqReaxKokkosSparse33Functor sparse33_functor(this); Kokkos::parallel_for(inum,sparse33_functor); } } else { Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); } if (neighflag != FULL) { k_o.template modify(); k_o.template sync(); comm->reverse_comm_fix(this); //Coll_vector( q ); k_o.template modify(); k_o.template sync(); } // vector_sum( r , 1., b, -1., q, nn ); // preconditioning: d[j] = r[j] * Hdia_inv[j]; // b_norm = parallel_norm( b, nn ); F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm2Functor norm2_functor(this); Kokkos::parallel_reduce(inum,norm2_functor,my_norm); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; int loop; const int loopmax = 200; for (loop = 1; loop < loopmax & sqrt(sig_new)/b_norm > tolerance; loop++) { // comm->forward_comm_fix(this); //Dist_vector( d ); pack_flag = 1; k_d.template modify(); k_d.template sync(); comm->forward_comm_fix(this); k_d.template modify(); k_d.template sync(); // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+atom->nghost),*this); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); } else { FixQEqReaxKokkosSparse23Functor sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); } } else { Kokkos::parallel_for(Kokkos::TeamPolicy (inum, teamsize), *this); } if (neighflag != FULL) { k_o.template modify(); k_o.template sync(); comm->reverse_comm_fix(this); //Coll_vector( q ); k_o.template modify(); k_o.template sync(); } // tmp = parallel_dot( d, q, nn); my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; alpha = sig_new / tmp; sig_old = sig_new; // vector_add( t, alpha, d, nn ); // vector_add( r, -alpha, q, nn ); my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon2Functor precon2_functor(this); Kokkos::parallel_for(inum,precon2_functor); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; beta = sig_new / sig_old; // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); } if (loop >= loopmax && comm->me == 0) { char str[128]; sprintf(str,"Fix qeq/reax cg_solve2 convergence failed after %d iterations " "at " BIGINT_FORMAT " step: %f",loop,update->ntimestep,sqrt(sig_new)/b_norm); error->warning(FLERR,str); //error->all(FLERR,str); } } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::calculate_q() { F_FLOAT sum, sum_all; const int inum = list->inum; // s_sum = parallel_vector_acc( s, nn ); sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc1Functor vecacc1_functor(this); Kokkos::parallel_reduce(inum,vecacc1_functor,sum); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT s_sum = sum_all; // t_sum = parallel_vector_acc( t, nn); sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc2Functor vecacc2_functor(this); Kokkos::parallel_reduce(inum,vecacc2_functor,sum); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT t_sum = sum_all; // u = s_sum / t_sum; delta = s_sum/t_sum; // q[i] = s[i] - u * t[i]; FixQEqReaxKokkosCalculateQFunctor calculateQ_functor(this); Kokkos::parallel_for(inum,calculateQ_functor); pack_flag = 4; //comm->forward_comm_fix( this ); //Dist_vector( atom->q ); atomKK->k_q.modify(); atomKK->k_q.sync(); comm->forward_comm_fix(this); atomKK->k_q.modify(); atomKK->k_q.sync(); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse12_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); if (mask[i] & groupbit) { d_o[i] = params(itype).eta * d_s[i]; } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse13_item(int ii) const { // The q array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_o = d_o; const int i = d_ilist[ii]; if (mask[i] & groupbit) { F_FLOAT tmp = 0.0; for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) { const int j = d_jlist(jj); tmp += d_val(jj) * d_s[j]; a_o[j] += d_val(jj) * d_s[i]; } a_o[i] += tmp; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec1, const membertype1 &team) const { const int i = d_ilist[team.league_rank()]; if (mask[i] & groupbit) { F_FLOAT doitmp; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { const int j = d_jlist(jj); doi += d_val(jj) * d_s[j]; }, doitmp); Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;}); } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse22_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); if (mask[i] & groupbit) { d_o[i] = params(itype).eta * d_d[i]; } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse23_item(int ii) const { // The q array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_o = d_o; const int i = d_ilist[ii]; if (mask[i] & groupbit) { F_FLOAT tmp = 0.0; for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) { const int j = d_jlist(jj); tmp += d_val(jj) * d_d[j]; a_o[j] += d_val(jj) * d_d[i]; } a_o[i] += tmp; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec2, const membertype2 &team) const { const int i = d_ilist[team.league_rank()]; if (mask[i] & groupbit) { F_FLOAT doitmp; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { const int j = d_jlist(jj); doi += d_val(jj) * d_d[j]; }, doitmp); Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp; }); } } template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagZeroQGhosts, const int &i) const { if (mask[i] & groupbit) d_o[i] = 0.0; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse32_item(int ii) const { const int i = d_ilist[ii]; const int itype = type(i); if (mask[i] & groupbit) d_o[i] = params(itype).eta * d_t[i]; } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::sparse33_item(int ii) const { // The q array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_o = d_o; const int i = d_ilist[ii]; if (mask[i] & groupbit) { F_FLOAT tmp = 0.0; for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) { const int j = d_jlist(jj); tmp += d_val(jj) * d_t[j]; a_o[j] += d_val(jj) * d_t[i]; } a_o[i] += tmp; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::operator() (TagSparseMatvec3, const membertype3 &team) const { const int i = d_ilist[team.league_rank()]; if (mask[i] & groupbit) { F_FLOAT doitmp; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) { const int j = d_jlist(jj); doi += d_val(jj) * d_t[j]; }, doitmp); Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;}); } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::vecsum2_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) d_d[i] = 1.0 * d_p[i] + beta * d_d[i]; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::norm1_item(int ii) const { F_FLOAT tmp = 0; const int i = d_ilist[ii]; if (mask[i] & groupbit) { d_r[i] = 1.0*d_b_s[i] + -1.0*d_o[i]; d_d[i] = d_r[i] * d_Hdia_inv[i]; tmp = d_b_s[i] * d_b_s[i]; } return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::norm2_item(int ii) const { F_FLOAT tmp = 0; const int i = d_ilist[ii]; if (mask[i] & groupbit) { d_r[i] = 1.0*d_b_t[i] + -1.0*d_o[i]; d_d[i] = d_r[i] * d_Hdia_inv[i]; tmp = d_b_t[i] * d_b_t[i]; } return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::dot1_item(int ii) const { F_FLOAT tmp = 0.0; const int i = d_ilist[ii]; if (mask[i] & groupbit) tmp = d_r[i] * d_d[i]; return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::dot2_item(int ii) const { double tmp = 0.0; const int i = d_ilist[ii]; if (mask[i] & groupbit) { tmp = d_d[i] * d_o[i]; } return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::precon1_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { d_s[i] += alpha * d_d[i]; d_r[i] += -alpha * d_o[i]; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::precon2_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { d_t[i] += alpha * d_d[i]; d_r[i] += -alpha * d_o[i]; } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::precon_item(int ii) const { F_FLOAT tmp = 0.0; const int i = d_ilist[ii]; if (mask[i] & groupbit) { d_p[i] = d_r[i] * d_Hdia_inv[i]; tmp = d_r[i] * d_p[i]; } return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::vecacc1_item(int ii) const { F_FLOAT tmp = 0.0; const int i = d_ilist[ii]; if (mask[i] & groupbit) tmp = d_s[i]; return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::vecacc2_item(int ii) const { F_FLOAT tmp = 0.0; const int i = d_ilist[ii]; if (mask[i] & groupbit) { tmp = d_t[i]; } return tmp; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::calculate_q_item(int ii) const { const int i = d_ilist[ii]; if (mask[i] & groupbit) { q(i) = d_s[i] - delta * d_t[i]; for (int k = 4; k > 0; --k) { d_s_hist(i,k) = d_s_hist(i,k-1); d_t_hist(i,k) = d_t_hist(i,k-1); } d_s_hist(i,0) = d_s[i]; d_t_hist(i,0) = d_t[i]; } } /* ---------------------------------------------------------------------- */ template int FixQEqReaxKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int m; if( pack_flag == 1) for(m = 0; m < n; m++) buf[m] = h_d[list[m]]; else if( pack_flag == 2 ) for(m = 0; m < n; m++) buf[m] = h_s[list[m]]; else if( pack_flag == 3 ) for(m = 0; m < n; m++) buf[m] = h_t[list[m]]; else if( pack_flag == 4 ) for(m = 0; m < n; m++) buf[m] = atom->q[list[m]]; return n; } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::unpack_forward_comm(int n, int first, double *buf) { int i, m; if( pack_flag == 1) for(m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m]; else if( pack_flag == 2) for(m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m]; else if( pack_flag == 3) for(m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m]; else if( pack_flag == 4) for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; } /* ---------------------------------------------------------------------- */ template int FixQEqReaxKokkos::pack_reverse_comm(int n, int first, double *buf) { int i, m; for(m = 0, i = first; m < n; m++, i++) { buf[m] = h_o[i]; } return n; } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::unpack_reverse_comm(int n, int *list, double *buf) { for(int m = 0; m < n; m++) { h_o[list[m]] += buf[m]; } } /* ---------------------------------------------------------------------- */ template void FixQEqReaxKokkos::cleanup_copy() { id = style = NULL; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ template double FixQEqReaxKokkos::memory_usage() { double bytes; - bytes = atom->nmax*5*2 * sizeof(F_FLOAT); // s_hist & t_hist + bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist bytes += atom->nmax*8 * sizeof(F_FLOAT); // storage bytes += n_cap*2 * sizeof(int); // matrix... bytes += m_cap * sizeof(int); bytes += m_cap * sizeof(F_FLOAT); return bytes; } /* ---------------------------------------------------------------------- */\ namespace LAMMPS_NS { template class FixQEqReaxKokkos; #ifdef KOKKOS_HAVE_CUDA template class FixQEqReaxKokkos; #endif } diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 9d165f3fd..33b70c972 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -1,1106 +1,1106 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Hasan Metin Aktulga, Purdue University (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) Hybrid and sub-group capabilities: Ray Shan (Sandia) ------------------------------------------------------------------------- */ #include #include #include #include #include "fix_qeq_reax.h" #include "pair_reaxc.h" #include "atom.h" #include "comm.h" #include "domain.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "update.h" #include "force.h" #include "group.h" #include "pair.h" #include "respa.h" #include "memory.h" #include "citeme.h" #include "error.h" #include "reaxc_defs.h" using namespace LAMMPS_NS; using namespace FixConst; #define EV_TO_KCAL_PER_MOL 14.4 //#define DANGER_ZONE 0.95 //#define LOOSE_ZONE 0.7 #define SQR(x) ((x)*(x)) #define CUBE(x) ((x)*(x)*(x)) #define MIN_NBRS 100 static const char cite_fix_qeq_reax[] = "fix qeq/reax command:\n\n" "@Article{Aktulga12,\n" " author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},\n" " title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},\n" " journal = {Parallel Computing},\n" " year = 2012,\n" " volume = 38,\n" " pages = {245--259}\n" "}\n\n"; /* ---------------------------------------------------------------------- */ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), pertype_option(NULL) { if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax); if (narg<8 || narg>9) error->all(FLERR,"Illegal fix qeq/reax command"); nevery = force->inumeric(FLERR,arg[3]); if (nevery <= 0) error->all(FLERR,"Illegal fix qeq/reax command"); swa = force->numeric(FLERR,arg[4]); swb = force->numeric(FLERR,arg[5]); tolerance = force->numeric(FLERR,arg[6]); int len = strlen(arg[7]) + 1; pertype_option = new char[len]; strcpy(pertype_option,arg[7]); // dual CG support only available for USER-OMP variant // check for compatibility is in Fix::post_constructor() dual_enabled = 0; if (narg == 9) { if (strcmp(arg[8],"dual") == 0) dual_enabled = 1; else error->all(FLERR,"Illegal fix qeq/reax command"); } shld = NULL; n = n_cap = 0; N = nmax = 0; m_fill = m_cap = 0; pack_flag = 0; s = NULL; t = NULL; - nprev = 5; + nprev = 4; Hdia_inv = NULL; b_s = NULL; b_t = NULL; b_prc = NULL; b_prm = NULL; // CG p = NULL; q = NULL; r = NULL; d = NULL; // H matrix H.firstnbr = NULL; H.numnbrs = NULL; H.jlist = NULL; H.val = NULL; // dual CG support // Update comm sizes for this fix if (dual_enabled) comm_forward = comm_reverse = 2; else comm_forward = comm_reverse = 1; // perform initial allocation of atom-based arrays // register with Atom class reaxc = NULL; reaxc = (PairReaxC *) force->pair_match("reax/c",0); if (reaxc) { s_hist = t_hist = NULL; grow_arrays(atom->nmax); atom->add_callback(0); for (int i = 0; i < atom->nmax; i++) for (int j = 0; j < nprev; ++j) s_hist[i][j] = t_hist[i][j] = 0; } } /* ---------------------------------------------------------------------- */ FixQEqReax::~FixQEqReax() { if (copymode) return; delete[] pertype_option; // unregister callbacks to this fix from Atom class atom->delete_callback(id,0); memory->destroy(s_hist); memory->destroy(t_hist); deallocate_storage(); deallocate_matrix(); memory->destroy(shld); if (!reaxflag) { memory->destroy(chi); memory->destroy(eta); memory->destroy(gamma); } } /* ---------------------------------------------------------------------- */ void FixQEqReax::post_constructor() { pertype_parameters(pertype_option); if (dual_enabled) error->all(FLERR,"Dual keyword only supported with fix qeq/reax/omp"); } /* ---------------------------------------------------------------------- */ int FixQEqReax::setmask() { int mask = 0; mask |= PRE_FORCE; mask |= PRE_FORCE_RESPA; mask |= MIN_PRE_FORCE; return mask; } /* ---------------------------------------------------------------------- */ void FixQEqReax::pertype_parameters(char *arg) { if (strcmp(arg,"reax/c") == 0) { reaxflag = 1; Pair *pair = force->pair_match("reax/c",0); if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax"); int tmp; chi = (double *) pair->extract("chi",tmp); eta = (double *) pair->extract("eta",tmp); gamma = (double *) pair->extract("gamma",tmp); if (chi == NULL || eta == NULL || gamma == NULL) error->all(FLERR, "Fix qeq/reax could not extract params from pair reax/c"); return; } int i,itype,ntypes; double v1,v2,v3; FILE *pf; reaxflag = 0; ntypes = atom->ntypes; memory->create(chi,ntypes+1,"qeq/reax:chi"); memory->create(eta,ntypes+1,"qeq/reax:eta"); memory->create(gamma,ntypes+1,"qeq/reax:gamma"); if (comm->me == 0) { if ((pf = fopen(arg,"r")) == NULL) error->one(FLERR,"Fix qeq/reax parameter file could not be found"); for (i = 1; i <= ntypes && !feof(pf); i++) { fscanf(pf,"%d %lg %lg %lg",&itype,&v1,&v2,&v3); if (itype < 1 || itype > ntypes) error->one(FLERR,"Fix qeq/reax invalid atom type in param file"); chi[itype] = v1; eta[itype] = v2; gamma[itype] = v3; } if (i <= ntypes) error->one(FLERR,"Invalid param file for fix qeq/reax"); fclose(pf); } MPI_Bcast(&chi[1],ntypes,MPI_DOUBLE,0,world); MPI_Bcast(&eta[1],ntypes,MPI_DOUBLE,0,world); MPI_Bcast(&gamma[1],ntypes,MPI_DOUBLE,0,world); } /* ---------------------------------------------------------------------- */ void FixQEqReax::allocate_storage() { nmax = atom->nmax; memory->create(s,nmax,"qeq:s"); memory->create(t,nmax,"qeq:t"); memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); memory->create(b_s,nmax,"qeq:b_s"); memory->create(b_t,nmax,"qeq:b_t"); memory->create(b_prc,nmax,"qeq:b_prc"); memory->create(b_prm,nmax,"qeq:b_prm"); // dual CG support int size = nmax; if (dual_enabled) size*= 2; memory->create(p,size,"qeq:p"); memory->create(q,size,"qeq:q"); memory->create(r,size,"qeq:r"); memory->create(d,size,"qeq:d"); } /* ---------------------------------------------------------------------- */ void FixQEqReax::deallocate_storage() { memory->destroy(s); memory->destroy(t); memory->destroy( Hdia_inv ); memory->destroy( b_s ); memory->destroy( b_t ); memory->destroy( b_prc ); memory->destroy( b_prm ); memory->destroy( p ); memory->destroy( q ); memory->destroy( r ); memory->destroy( d ); } /* ---------------------------------------------------------------------- */ void FixQEqReax::reallocate_storage() { deallocate_storage(); allocate_storage(); init_storage(); } /* ---------------------------------------------------------------------- */ void FixQEqReax::allocate_matrix() { int i,ii,inum,m; int *ilist, *numneigh; int mincap; double safezone; if (reaxflag) { mincap = reaxc->system->mincap; safezone = reaxc->system->safezone; } else { mincap = MIN_CAP; safezone = SAFE_ZONE; } n = atom->nlocal; n_cap = MAX( (int)(n * safezone), mincap); // determine the total space for the H matrix if (reaxc) { inum = reaxc->list->inum; ilist = reaxc->list->ilist; numneigh = reaxc->list->numneigh; } else { inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; } m = 0; for (ii = 0; ii < inum; ii++) { i = ilist[ii]; m += numneigh[i]; } m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS); H.n = n_cap; H.m = m_cap; memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr"); memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs"); memory->create(H.jlist,m_cap,"qeq:H.jlist"); memory->create(H.val,m_cap,"qeq:H.val"); } /* ---------------------------------------------------------------------- */ void FixQEqReax::deallocate_matrix() { memory->destroy( H.firstnbr ); memory->destroy( H.numnbrs ); memory->destroy( H.jlist ); memory->destroy( H.val ); } /* ---------------------------------------------------------------------- */ void FixQEqReax::reallocate_matrix() { deallocate_matrix(); allocate_matrix(); } /* ---------------------------------------------------------------------- */ void FixQEqReax::init() { if (!atom->q_flag) error->all(FLERR,"Fix qeq/reax requires atom attribute q"); ngroup = group->count(igroup); if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms"); // need a half neighbor list w/ Newton off and ghost neighbors // built whenever re-neighboring occurs int irequest = neighbor->request(this,instance_me); neighbor->requests[irequest]->pair = 0; neighbor->requests[irequest]->fix = 1; neighbor->requests[irequest]->newton = 2; neighbor->requests[irequest]->ghost = 1; init_shielding(); init_taper(); if (strstr(update->integrate_style,"respa")) nlevels_respa = ((Respa *) update->integrate)->nlevels; } /* ---------------------------------------------------------------------- */ void FixQEqReax::init_list(int id, NeighList *ptr) { list = ptr; } /* ---------------------------------------------------------------------- */ void FixQEqReax::init_shielding() { int i,j; int ntypes; ntypes = atom->ntypes; if (shld == NULL) memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); for (i = 1; i <= ntypes; ++i) for (j = 1; j <= ntypes; ++j) shld[i][j] = pow( gamma[i] * gamma[j], -1.5); } /* ---------------------------------------------------------------------- */ void FixQEqReax::init_taper() { double d7, swa2, swa3, swb2, swb3; if (fabs(swa) > 0.01 && comm->me == 0) error->warning(FLERR,"Fix qeq/reax has non-zero lower Taper radius cutoff"); if (swb < 0) error->all(FLERR, "Fix qeq/reax has negative upper Taper radius cutoff"); else if (swb < 5 && comm->me == 0) error->warning(FLERR,"Fix qeq/reax has very low Taper radius cutoff"); d7 = pow( swb - swa, 7); swa2 = SQR( swa); swa3 = CUBE( swa); swb2 = SQR( swb); swb3 = CUBE( swb); Tap[7] = 20.0 / d7; Tap[6] = -70.0 * (swa + swb) / d7; Tap[5] = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7; Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7; Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7; Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7; Tap[1] = 140.0 * swa3 * swb3 / d7; Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 + 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7; } /* ---------------------------------------------------------------------- */ void FixQEqReax::setup_pre_force(int vflag) { deallocate_storage(); allocate_storage(); init_storage(); deallocate_matrix(); allocate_matrix(); pre_force(vflag); } /* ---------------------------------------------------------------------- */ void FixQEqReax::setup_pre_force_respa(int vflag, int ilevel) { if (ilevel < nlevels_respa-1) return; setup_pre_force(vflag); } /* ---------------------------------------------------------------------- */ void FixQEqReax::min_setup_pre_force(int vflag) { setup_pre_force(vflag); } /* ---------------------------------------------------------------------- */ void FixQEqReax::init_storage() { int NN; if (reaxc) NN = reaxc->list->inum + reaxc->list->gnum; else NN = list->inum + list->gnum; for (int i = 0; i < NN; i++) { Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi[atom->type[i]]; b_t[i] = -1.0; b_prc[i] = 0; b_prm[i] = 0; s[i] = t[i] = 0; } } /* ---------------------------------------------------------------------- */ void FixQEqReax::pre_force(int vflag) { double t_start, t_end; if (update->ntimestep % nevery) return; if (comm->me == 0) t_start = MPI_Wtime(); n = atom->nlocal; N = atom->nlocal + atom->nghost; // grow arrays if necessary // need to be atom->nmax in length if (atom->nmax > nmax) reallocate_storage(); if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) reallocate_matrix(); init_matvec(); matvecs_s = CG(b_s, s); // CG on s - parallel matvecs_t = CG(b_t, t); // CG on t - parallel matvecs = matvecs_s + matvecs_t; calculate_Q(); if (comm->me == 0) { t_end = MPI_Wtime(); qeq_time = t_end - t_start; } } /* ---------------------------------------------------------------------- */ void FixQEqReax::pre_force_respa(int vflag, int ilevel, int iloop) { if (ilevel == nlevels_respa-1) pre_force(vflag); } /* ---------------------------------------------------------------------- */ void FixQEqReax::min_pre_force(int vflag) { pre_force(vflag); } /* ---------------------------------------------------------------------- */ void FixQEqReax::init_matvec() { /* fill-in H matrix */ compute_H(); int nn, ii, i; int *ilist; if (reaxc) { nn = reaxc->list->inum; ilist = reaxc->list->ilist; } else { nn = list->inum; ilist = list->ilist; } for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { /* init pre-conditioner for H and init solution vectors */ Hdia_inv[i] = 1. / eta[ atom->type[i] ]; b_s[i] = -chi[ atom->type[i] ]; b_t[i] = -1.0; /* linear extrapolation for s & t from previous solutions */ //s[i] = 2 * s_hist[i][0] - s_hist[i][1]; //t[i] = 2 * t_hist[i][0] - t_hist[i][1]; /* quadratic extrapolation for s & t from previous solutions */ //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] ); t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1]); /* cubic extrapolation for s & t from previous solutions */ s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); //t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]); } } pack_flag = 2; comm->forward_comm_fix(this); //Dist_vector( s ); pack_flag = 3; comm->forward_comm_fix(this); //Dist_vector( t ); } /* ---------------------------------------------------------------------- */ void FixQEqReax::compute_H() { int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh; int i, j, ii, jj, flag; double dx, dy, dz, r_sqr; const double SMALL = 0.0001; int *type = atom->type; tagint *tag = atom->tag; double **x = atom->x; int *mask = atom->mask; if (reaxc) { inum = reaxc->list->inum; ilist = reaxc->list->ilist; numneigh = reaxc->list->numneigh; firstneigh = reaxc->list->firstneigh; } else { inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; } // fill in the H matrix m_fill = 0; r_sqr = 0; for (ii = 0; ii < inum; ii++) { i = ilist[ii]; if (mask[i] & groupbit) { jlist = firstneigh[i]; jnum = numneigh[i]; H.firstnbr[i] = m_fill; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; dx = x[j][0] - x[i][0]; dy = x[j][1] - x[i][1]; dz = x[j][2] - x[i][2]; r_sqr = SQR(dx) + SQR(dy) + SQR(dz); flag = 0; if (r_sqr <= SQR(swb)) { if (j < n) flag = 1; else if (tag[i] < tag[j]) flag = 1; else if (tag[i] == tag[j]) { if (dz > SMALL) flag = 1; else if (fabs(dz) < SMALL) { if (dy > SMALL) flag = 1; else if (fabs(dy) < SMALL && dx > SMALL) flag = 1; } } } if (flag) { H.jlist[m_fill] = j; H.val[m_fill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]]); m_fill++; } } H.numnbrs[i] = m_fill - H.firstnbr[i]; } } if (m_fill >= H.m) { char str[128]; sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n", m_fill, H.m); error->warning(FLERR,str); error->all(FLERR,"Fix qeq/reax has insufficient QEq matrix size"); } } /* ---------------------------------------------------------------------- */ double FixQEqReax::calculate_H( double r, double gamma) { double Taper, denom; Taper = Tap[7] * r + Tap[6]; Taper = Taper * r + Tap[5]; Taper = Taper * r + Tap[4]; Taper = Taper * r + Tap[3]; Taper = Taper * r + Tap[2]; Taper = Taper * r + Tap[1]; Taper = Taper * r + Tap[0]; denom = r * r * r + gamma; denom = pow(denom,0.3333333333333); return Taper * EV_TO_KCAL_PER_MOL / denom; } /* ---------------------------------------------------------------------- */ int FixQEqReax::CG( double *b, double *x) { int i, j, imax; double tmp, alpha, beta, b_norm; double sig_old, sig_new; int nn, jj; int *ilist; if (reaxc) { nn = reaxc->list->inum; ilist = reaxc->list->ilist; } else { nn = list->inum; ilist = list->ilist; } imax = 200; pack_flag = 1; sparse_matvec( &H, x, q); comm->reverse_comm_fix(this); //Coll_Vector( q ); vector_sum( r , 1., b, -1., q, nn); for (jj = 0; jj < nn; ++jj) { j = ilist[jj]; if (atom->mask[j] & groupbit) d[j] = r[j] * Hdia_inv[j]; //pre-condition } b_norm = parallel_norm( b, nn); sig_new = parallel_dot( r, d, nn); for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) { comm->forward_comm_fix(this); //Dist_vector( d ); sparse_matvec( &H, d, q ); comm->reverse_comm_fix(this); //Coll_vector( q ); tmp = parallel_dot( d, q, nn); alpha = sig_new / tmp; vector_add( x, alpha, d, nn ); vector_add( r, -alpha, q, nn ); // pre-conditioning for (jj = 0; jj < nn; ++jj) { j = ilist[jj]; if (atom->mask[j] & groupbit) p[j] = r[j] * Hdia_inv[j]; } sig_old = sig_new; sig_new = parallel_dot( r, p, nn); beta = sig_new / sig_old; vector_sum( d, 1., p, beta, d, nn ); } if (i >= imax && comm->me == 0) { char str[128]; sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations " "at " BIGINT_FORMAT " step",i,update->ntimestep); error->warning(FLERR,str); } return i; } /* ---------------------------------------------------------------------- */ void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b) { int i, j, itr_j; int nn, NN, ii; int *ilist; if (reaxc) { nn = reaxc->list->inum; NN = reaxc->list->inum + reaxc->list->gnum; ilist = reaxc->list->ilist; } else { nn = list->inum; NN = list->inum + list->gnum; ilist = list->ilist; } for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i]; } for (ii = nn; ii < NN; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) b[i] = 0; } for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { for (itr_j=A->firstnbr[i]; itr_jfirstnbr[i]+A->numnbrs[i]; itr_j++) { j = A->jlist[itr_j]; b[i] += A->val[itr_j] * x[j]; b[j] += A->val[itr_j] * x[i]; } } } } /* ---------------------------------------------------------------------- */ void FixQEqReax::calculate_Q() { int i, k; double u, s_sum, t_sum; double *q = atom->q; int nn, ii; int *ilist; if (reaxc) { nn = reaxc->list->inum; ilist = reaxc->list->ilist; } else { nn = list->inum; ilist = list->ilist; } s_sum = parallel_vector_acc( s, nn); t_sum = parallel_vector_acc( t, nn); u = s_sum / t_sum; for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { q[i] = s[i] - u * t[i]; /* backup s & t */ for (k = 4; k > 0; --k) { s_hist[i][k] = s_hist[i][k-1]; t_hist[i][k] = t_hist[i][k-1]; } s_hist[i][0] = s[i]; t_hist[i][0] = t[i]; } } pack_flag = 4; comm->forward_comm_fix(this); //Dist_vector( atom->q ); } /* ---------------------------------------------------------------------- */ int FixQEqReax::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int m; if (pack_flag == 1) for(m = 0; m < n; m++) buf[m] = d[list[m]]; else if (pack_flag == 2) for(m = 0; m < n; m++) buf[m] = s[list[m]]; else if (pack_flag == 3) for(m = 0; m < n; m++) buf[m] = t[list[m]]; else if (pack_flag == 4) for(m = 0; m < n; m++) buf[m] = atom->q[list[m]]; else if (pack_flag == 5) { m = 0; for(int i = 0; i < n; i++) { int j = 2 * list[i]; buf[m++] = d[j ]; buf[m++] = d[j+1]; } return m; } return n; } /* ---------------------------------------------------------------------- */ void FixQEqReax::unpack_forward_comm(int n, int first, double *buf) { int i, m; if (pack_flag == 1) for(m = 0, i = first; m < n; m++, i++) d[i] = buf[m]; else if (pack_flag == 2) for(m = 0, i = first; m < n; m++, i++) s[i] = buf[m]; else if (pack_flag == 3) for(m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; else if (pack_flag == 4) for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; else if (pack_flag == 5) { int last = first + n; m = 0; for(i = first; i < last; i++) { int j = 2 * i; d[j ] = buf[m++]; d[j+1] = buf[m++]; } } } /* ---------------------------------------------------------------------- */ int FixQEqReax::pack_reverse_comm(int n, int first, double *buf) { int i, m; if (pack_flag == 5) { m = 0; int last = first + n; for(i = first; i < last; i++) { int indxI = 2 * i; buf[m++] = q[indxI ]; buf[m++] = q[indxI+1]; } return m; } else { for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; return n; } } /* ---------------------------------------------------------------------- */ void FixQEqReax::unpack_reverse_comm(int n, int *list, double *buf) { if (pack_flag == 5) { int m = 0; for(int i = 0; i < n; i++) { int indxI = 2 * list[i]; q[indxI ] += buf[m++]; q[indxI+1] += buf[m++]; } } else { for (int m = 0; m < n; m++) q[list[m]] += buf[m]; } } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double FixQEqReax::memory_usage() { double bytes; bytes = atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist bytes += atom->nmax*11 * sizeof(double); // storage bytes += n_cap*2 * sizeof(int); // matrix... bytes += m_cap * sizeof(int); bytes += m_cap * sizeof(double); if (dual_enabled) bytes += atom->nmax*4 * sizeof(double); // double size for q, d, r, and p return bytes; } /* ---------------------------------------------------------------------- allocate fictitious charge arrays ------------------------------------------------------------------------- */ void FixQEqReax::grow_arrays(int nmax) { memory->grow(s_hist,nmax,nprev,"qeq:s_hist"); memory->grow(t_hist,nmax,nprev,"qeq:t_hist"); } /* ---------------------------------------------------------------------- copy values within fictitious charge arrays ------------------------------------------------------------------------- */ void FixQEqReax::copy_arrays(int i, int j, int delflag) { for (int m = 0; m < nprev; m++) { s_hist[j][m] = s_hist[i][m]; t_hist[j][m] = t_hist[i][m]; } } /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ int FixQEqReax::pack_exchange(int i, double *buf) { for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m]; return nprev*2; } /* ---------------------------------------------------------------------- unpack values in local atom-based array from exchange with another proc ------------------------------------------------------------------------- */ int FixQEqReax::unpack_exchange(int nlocal, double *buf) { for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m]; for (int m = 0; m < nprev; m++) t_hist[nlocal][m] = buf[nprev+m]; return nprev*2; } /* ---------------------------------------------------------------------- */ double FixQEqReax::parallel_norm( double *v, int n) { int i; double my_sum, norm_sqr; int ii; int *ilist; if (reaxc) ilist = reaxc->list->ilist; else ilist = list->ilist; my_sum = 0.0; norm_sqr = 0.0; for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) my_sum += SQR( v[i]); } MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world); return sqrt( norm_sqr); } /* ---------------------------------------------------------------------- */ double FixQEqReax::parallel_dot( double *v1, double *v2, int n) { int i; double my_dot, res; int ii; int *ilist; if (reaxc) ilist = reaxc->list->ilist; else ilist = list->ilist; my_dot = 0.0; res = 0.0; for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) my_dot += v1[i] * v2[i]; } MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world); return res; } /* ---------------------------------------------------------------------- */ double FixQEqReax::parallel_vector_acc( double *v, int n) { int i; double my_acc, res; int ii; int *ilist; if (reaxc) ilist = reaxc->list->ilist; else ilist = list->ilist; my_acc = 0.0; res = 0.0; for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) my_acc += v[i]; } MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world); return res; } /* ---------------------------------------------------------------------- */ void FixQEqReax::vector_sum( double* dest, double c, double* v, double d, double* y, int k) { int kk; int *ilist; if (reaxc) ilist = reaxc->list->ilist; else ilist = list->ilist; for (--k; k>=0; --k) { kk = ilist[k]; if (atom->mask[kk] & groupbit) dest[kk] = c * v[kk] + d * y[kk]; } } /* ---------------------------------------------------------------------- */ void FixQEqReax::vector_add( double* dest, double c, double* v, int k) { int kk; int *ilist; if (reaxc) ilist = reaxc->list->ilist; else ilist = list->ilist; for (--k; k>=0; --k) { kk = ilist[k]; if (atom->mask[kk] & groupbit) dest[kk] += c * v[kk]; } }