fix_qeq_reax_omp.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Jul 5, 00:15

fix_qeq_reax_omp.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing author:
	Hasan Metin Aktulga, Michigan State University, hma@cse.msu.edu

	Hybrid & sub-group capabilities added by Ray Shan (Materials Design)

	OpenMP based threading support for fix qeq/reax/omp added
	by Hasan Metin Aktulga (MSU), Chris Knight (ALCF), Paul Coffman (ALCF),
	Kurt O'Hearn (MSU), Ray Shan (Materials Design), Wei Jiang (ALCF)

	Integration of the pair_style reax/c/omp into the User-OMP package
	by Axel Kohlmeyer (Temple U.)

	Please cite the related publication:
	H. M. Aktulga, C. Knight, P. Coffman, K. A. O'Hearn, T. R. Shan,
	W. Jiang, "Optimizing the performance of reactive molecular dynamics
	simulations for multi-core architectures", International Journal of
	High Performance Computing Applications, to appear.
	------------------------------------------------------------------------- */

	#include <math.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include "fix_qeq_reax_omp.h"
	#include "pair_reaxc_omp.h"
	#include "atom.h"
	#include "comm.h"
	#include "domain.h"
	#include "neighbor.h"
	#include "neigh_list.h"
	#include "neigh_request.h"
	#include "update.h"
	#include "force.h"
	#include "group.h"
	#include "pair.h"
	#include "respa.h"
	#include "memory.h"
	#include "error.h"
	#include "reaxc_defs.h"

	using namespace LAMMPS_NS;
	using namespace FixConst;

	#define EV_TO_KCAL_PER_MOL 14.4
	//#define DANGER_ZONE 0.95
	//#define LOOSE_ZONE 0.7
	#define SQR(x) ((x)*(x))
	#define CUBE(x) ((x)(x)(x))
	#define MIN_NBRS 100

	/* ---------------------------------------------------------------------- */

	FixQEqReaxOMP::FixQEqReaxOMP(LAMMPS lmp, int narg, char *arg) :
	FixQEqReax(lmp, narg, arg)
	{
	if (narg<8 \|\| narg>9) error->all(FLERR,"Illegal fix qeq/reax/omp command");

	b_temp = NULL;

	// ASPC: Kolafa, J. Comp. Chem., 25(3), 335 (2003)
	do_aspc = 0;
	aspc_order = 1;
	// Must be consistent with nprev to store history: nprev = aspc_order + 2
	aspc_order_max = nprev - 2;
	aspc_omega = 0.0;
	aspc_b = NULL;
	}

	FixQEqReaxOMP::~FixQEqReaxOMP()
	{
	memory->destroy(b_temp);
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::post_constructor()
	{
	pertype_parameters(pertype_option);
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::allocate_storage()
	{
	FixQEqReax::allocate_storage();

	// dual CG support
	int size = nmax;
	if (dual_enabled) size*= 2;
	memory->create(b_temp, comm->nthreads, size, "qeq/reax/omp:b_temp");
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::deallocate_storage()
	{
	memory->destroy(b_temp);

	FixQEqReax::deallocate_storage();
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::init()
	{
	FixQEqReax::init();

	// APSC setup
	if (do_aspc) {
	memory->create(aspc_b, aspc_order_max+2, "qeq/reax/aspc_b");

	// Calculate damping factor
	double o = double(aspc_order);
	aspc_omega = (o+2.0) / (2*o+3.0);

	// Calculate B coefficients
	double c = (4.0 * o + 6.0) / (o + 3.0);
	aspc_b[0] = c;

	double n = 1.0;
	double d = 4.0;
	double s = -1.0;
	double f = 2.0;

	for (int i=1; i<aspc_order_max+2; i++) {
	c*= (o + n) / (o + d);
	aspc_b[i] = s * f * c;

	s *= -1.0;
	f += 1.0;
	n -= 1.0;
	d += 1.0;
	}
	}
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::compute_H()
	{
	int inum, ilist, numneigh, **firstneigh;
	double SMALL = 0.0001;

	int *type = atom->type;
	tagint * tag = atom->tag;
	double **x = atom->x;
	int *mask = atom->mask;

	if (reaxc) {
	inum = reaxc->list->inum;
	ilist = reaxc->list->ilist;
	numneigh = reaxc->list->numneigh;
	firstneigh = reaxc->list->firstneigh;
	} else {
	inum = list->inum;
	ilist = list->ilist;
	numneigh = list->numneigh;
	firstneigh = list->firstneigh;
	}
	int ai, num_nbrs;

	// sumscan of the number of neighbors per atom to determine the offsets
	// most likely, we are overallocating. desirable to work on this part
	// to reduce the memory footprint of the far_nbrs list.

	num_nbrs = 0;

	for (int itr_i = 0; itr_i < inum; ++itr_i) {
	ai = ilist[itr_i];
	H.firstnbr[ai] = num_nbrs;
	num_nbrs += numneigh[ai];
	}
	m_fill = num_nbrs;

	// fill in the H matrix

	#if defined(_OPENMP)
	#pragma omp parallel default(shared)
	#endif
	{
	int i, j, ii, jj, mfill, jnum, flag;
	int *jlist;
	double dx, dy, dz, r_sqr;

	mfill = 0;

	#if defined(_OPENMP)
	#pragma omp for schedule(guided)
	#endif
	for (ii = 0; ii < inum; ii++) {
	i = ilist[ii];
	if (mask[i] & groupbit) {
	jlist = firstneigh[i];
	jnum = numneigh[i];
	mfill = H.firstnbr[i];

	for (jj = 0; jj < jnum; jj++) {
	j = jlist[jj];

	dx = x[j][0] - x[i][0];
	dy = x[j][1] - x[i][1];
	dz = x[j][2] - x[i][2];
	r_sqr = SQR(dx) + SQR(dy) + SQR(dz);

	flag = 0;
	if (r_sqr <= SQR(swb)) {
	if (j < n) flag = 1;
	else if (tag[i] < tag[j]) flag = 1;
	else if (tag[i] == tag[j]) {
	if (dz > SMALL) flag = 1;
	else if (fabs(dz) < SMALL) {
	if (dy > SMALL) flag = 1;
	else if (fabs(dy) < SMALL && dx > SMALL) flag = 1;
	}
	}
	}

	if (flag) {
	H.jlist[mfill] = j;
	H.val[mfill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] );
	mfill++;
	}
	}

	H.numnbrs[i] = mfill - H.firstnbr[i];
	}
	}

	if (mfill >= H.m) {
	char str[128];
	sprintf(str,"H matrix size has been exceeded: mfill=%d H.m=%d\n",
	mfill, H.m);
	error->warning(FLERR,str);
	error->all(FLERR,"Fix qeq/reax/omp has insufficient QEq matrix size");
	}
	} // omp

	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::init_storage()
	{
	int NN;

	if (reaxc) NN = reaxc->list->inum + reaxc->list->gnum;
	else NN = list->inum + list->gnum;

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(static)
	#endif
	for (int i = 0; i < NN; i++) {
	Hdia_inv[i] = 1. / eta[atom->type[i]];
	b_s[i] = -chi[atom->type[i]];
	b_t[i] = -1.0;
	b_prc[i] = 0;
	b_prm[i] = 0;
	s[i] = t[i] = 0;
	}
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::pre_force(int vflag)
	{

	#ifdef OMP_TIMING
	double endTimeBase, startTimeBase, funcstartTimeBase;
	funcstartTimeBase = MPI_Wtime();
	#endif

	double t_start, t_end;

	if (update->ntimestep % nevery) return;
	if (comm->me == 0) t_start = MPI_Wtime();

	n = atom->nlocal;
	N = atom->nlocal + atom->nghost;

	// grow arrays if necessary
	// need to be atom->nmax in length

	if (atom->nmax > nmax) reallocate_storage();
	if (n > n_capDANGER_ZONE \|\| m_fill > m_capDANGER_ZONE)
	reallocate_matrix();

	#ifdef OMP_TIMING
	startTimeBase = MPI_Wtime();
	#endif

	init_matvec();

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTEINITMVINDEX] += (endTimeBase-startTimeBase);
	startTimeBase = endTimeBase;
	#endif

	if (dual_enabled) {
	matvecs = dual_CG(b_s, b_t, s, t); // OMP_TIMING inside dual_CG
	} else {
	matvecs_s = CG(b_s, s); // CG on s - parallel

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase);
	ompTimingCount[COMPUTECG1INDEX]++;
	ompTimingCGCount[COMPUTECG1INDEX]+= matvecs_s;
	startTimeBase = endTimeBase;
	#endif

	matvecs_t = CG(b_t, t); // CG on t - parallel

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase);
	ompTimingCount[COMPUTECG2INDEX]++;
	ompTimingCGCount[COMPUTECG2INDEX]+= matvecs_t;
	startTimeBase = endTimeBase;
	#endif

	} // if (dual_enabled)

	#ifdef OMP_TIMING
	startTimeBase = MPI_Wtime();
	#endif

	calculate_Q();

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTECALCQINDEX] += (endTimeBase-startTimeBase);
	#endif

	if (comm->me == 0) {
	t_end = MPI_Wtime();
	qeq_time = t_end - t_start;
	}

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTEQEQINDEX] += (endTimeBase-funcstartTimeBase);
	#endif
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::init_matvec()
	{
	#ifdef OMP_TIMING
	long endTimeBase, startTimeBase;
	startTimeBase = MPI_Wtime();
	#endif

	/* fill-in H matrix */
	compute_H();

	int nn,i;
	int *ilist;

	if (reaxc) {
	nn = reaxc->list->inum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	ilist = list->ilist;
	}

	// Should really be more careful with initialization and first (aspc_order+2) MD steps
	if (do_aspc) {

	double m_aspc_omega = 1.0 - aspc_omega;
	#if defined(_OPENMP)
	#pragma omp parallel for schedule(dynamic,50) private(i)
	#endif
	for (int ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {

	/* init pre-conditioner for H and init solution vectors */
	Hdia_inv[i] = 1. / eta[ atom->type[i] ];
	b_s[i] = -chi[ atom->type[i] ];
	b_t[i] = -1.0;

	// Predictor Step
	double tp = 0.0;
	double sp = 0.0;
	for (int j=0; j<aspc_order+2; j++) {
	tp+= aspc_b[j] * t_hist[i][j];
	sp+= aspc_b[j] * s_hist[i][j];
	}

	// Corrector Step
	t[i] = aspc_omega * t_hist[i][0] + m_aspc_omega * tp;
	s[i] = aspc_omega * s_hist[i][0] + m_aspc_omega * sp;
	}
	}

	} else {

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(dynamic,50) private(i)
	#endif
	for (int ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {

	/* init pre-conditioner for H and init solution vectors */
	Hdia_inv[i] = 1. / eta[ atom->type[i] ];
	b_s[i] = -chi[ atom->type[i] ];
	b_t[i] = -1.0;

	/* linear extrapolation for s & t from previous solutions */
	//s[i] = 2 * s_hist[i][0] - s_hist[i][1];
	//t[i] = 2 * t_hist[i][0] - t_hist[i][1];

	/* quadratic extrapolation for s & t from previous solutions */
	//s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] );
	t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );

	/* cubic extrapolation for s & t from previous solutions */
	s[i] = 4(s_hist[i][0]+s_hist[i][2])-(6s_hist[i][1]+s_hist[i][3]);
	//t[i] = 4(t_hist[i][0]+t_hist[i][2])-(6t_hist[i][1]+t_hist[i][3]);
	}
	}
	}

	pack_flag = 2;
	comm->forward_comm_fix(this); //Dist_vector( s );
	pack_flag = 3;
	comm->forward_comm_fix(this); //Dist_vector( t );

	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTEMVCOMPINDEX] += (long) (endTimeBase-startTimeBase);
	#endif
	}

	/* ---------------------------------------------------------------------- */

	int FixQEqReaxOMP::CG( double b, double x)
	{
	int i, ii, imax;
	double alpha, beta, b_norm;
	double sig_old, sig_new;

	double my_buf[2], buf[2];

	int nn, jj;
	int *ilist;
	if (reaxc) {
	nn = reaxc->list->inum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	ilist = list->ilist;
	}

	imax = 200;

	pack_flag = 1;
	sparse_matvec( &H, x, q );
	comm->reverse_comm_fix( this); //Coll_Vector( q );

	double tmp1, tmp2;
	tmp1 = tmp2 = 0.0;

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2)
	#endif
	for (jj = 0; jj < nn; ++jj) {
	i = ilist[jj];
	if (atom->mask[i] & groupbit) {
	r[i] = b[i] - q[i];
	d[i] = r[i] * Hdia_inv[i]; //pre-condition

	tmp1 += b[i] * b[i];
	tmp2 += r[i] * d[i];
	}
	}

	my_buf[0] = tmp1;
	my_buf[1] = tmp2;

	MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);

	b_norm = sqrt(buf[0]);
	sig_new = buf[1];

	for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) {
	comm->forward_comm_fix(this); //Dist_vector( d );
	sparse_matvec( &H, d, q );
	comm->reverse_comm_fix(this); //Coll_vector( q );

	tmp1 = 0.0;
	#if defined(_OPENMP)
	#pragma omp parallel
	#endif
	{

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) tmp1 += d[ii] * q[ii];
	}

	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp master
	#endif
	{
	MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world);

	alpha = sig_new / tmp2;
	tmp1 = 0.0;
	}

	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) {
	x[ii] += alpha * d[ii];
	r[ii] -= alpha * q[ii];

	// pre-conditioning
	p[ii] = r[ii] * Hdia_inv[ii];
	tmp1 += r[ii] * p[ii];
	}
	}
	} // omp parallel

	sig_old = sig_new;

	MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world);

	sig_new = tmp2;
	beta = sig_new / sig_old;

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50) private(ii)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) d[ii] = p[ii] + beta * d[ii];
	}
	}

	if (i >= imax && comm->me == 0) {
	char str[128];
	sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
	"at " BIGINT_FORMAT " step",i,update->ntimestep);
	error->warning(FLERR,str);
	}

	return i;
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::sparse_matvec( sparse_matrix A, double x, double *b)
	{
	#if defined(_OPENMP)
	#pragma omp parallel default(shared)
	#endif
	{
	int i, j, itr_j;
	int nn, NN, ii;
	int *ilist;
	int nthreads = comm->nthreads;
	#if defined(_OPENMP)
	int tid = omp_get_thread_num();
	#else
	int tid = 0;
	#endif

	if (reaxc) {
	nn = reaxc->list->inum;
	NN = reaxc->list->inum + reaxc->list->gnum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	NN = list->inum + list->gnum;
	ilist = list->ilist;
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i];
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = nn; ii < NN; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) b[i] = 0;
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i)
	for (int t=0; t<nthreads; t++) b_temp[t][i] = 0.0;

	// Wait for b accumulated and b_temp zeroed.
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
	j = A->jlist[itr_j];
	b[i] += A->val[itr_j] * x[j];

	b_temp[tid][j] += A->val[itr_j] * x[i];
	}
	}
	}

	// Wait till b_temp accumulated
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i)
	for (int t = 0; t < nthreads; ++t) b[i] += b_temp[t][i];

	} //end omp parallel
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::calculate_Q()
	{
	int i;
	double *q = atom->q;

	int nn;
	int *ilist;

	if (reaxc) {
	nn = reaxc->list->inum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	ilist = list->ilist;
	}

	double tmp1, tmp2;
	tmp1 = tmp2 = 0.0;
	#if defined(_OPENMP)
	#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2)
	#endif
	for (int ii = 0; ii < nn; ii++) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	tmp1 += s[i];
	tmp2 += t[i];
	}
	}

	double my_buf[2], buf[2];
	buf[0] = 0.0;
	buf[1] = 0.0;

	my_buf[0] = tmp1;
	my_buf[1] = tmp2;

	MPI_Allreduce(&my_buf,&buf,2,MPI_DOUBLE,MPI_SUM,world);

	double u = buf[0] / buf[1];

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(static) private(i)
	#endif
	for (int ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	q[i] = s[i] - u * t[i];

	// backup s & t
	for (int k = 4; k > 0; --k) {
	s_hist[i][k] = s_hist[i][k-1];
	t_hist[i][k] = t_hist[i][k-1];
	}
	s_hist[i][0] = s[i];
	t_hist[i][0] = t[i];
	}
	}

	pack_flag = 4;
	comm->forward_comm_fix( this); //Dist_vector( atom->q );
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::vector_sum( double* dest, double c, double* v,
	double d, double* y, int k)
	{
	int i;
	int *ilist;

	if (reaxc) ilist = reaxc->list->ilist;
	else ilist = list->ilist;

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(static) private(i)
	#endif
	for (int ii=0; ii<k; ii++) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) dest[i] = c * v[i] + d * y[i];
	}
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::vector_add( double* dest, double c, double* v, int k)
	{
	int i;
	int *ilist;

	if (reaxc) ilist = reaxc->list->ilist;
	else ilist = list->ilist;

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(static) private(i)
	#endif
	for (int ii=0; ii<k; ii++) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) dest[i] += c * v[i];
	}
	}

	/* ---------------------------------------------------------------------- */

	/* ---------------------------------------------------------------------- */
	/* dual CG support */
	/* ---------------------------------------------------------------------- */

	int FixQEqReaxOMP::dual_CG( double b1, double b2, double x1, double x2)
	{

	#ifdef OMP_TIMING
	double endTimeBase, startTimeBase;
	startTimeBase = MPI_Wtime();
	#endif

	int i, imax;
	double alpha_s, alpha_t, beta_s, beta_t, b_norm_s, b_norm_t;
	double sig_old_s, sig_old_t, sig_new_s, sig_new_t;

	double my_buf[4], buf[4];

	int nn, ii, jj;
	int *ilist;
	if (reaxc) {
	nn = reaxc->list->inum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	ilist = list->ilist;
	}

	imax = 200;

	pack_flag = 5; // forward 2x d and reverse 2x q
	dual_sparse_matvec( &H, x1, x2, q );
	comm->reverse_comm_fix( this); //Coll_Vector( q );

	double tmp1, tmp2, tmp3, tmp4;
	tmp1 = tmp2 = tmp3 = tmp4 = 0.0;

	#if defined(_OPENMP)
	#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2,tmp3,tmp4)
	#endif
	for (jj = 0; jj < nn; ++jj) {
	i = ilist[jj];
	if (atom->mask[i] & groupbit) {
	int indxI = 2 * i;
	r[indxI ] = b1[i] - q[indxI ];
	r[indxI+1] = b2[i] - q[indxI+1];

	d[indxI ] = r[indxI ] * Hdia_inv[i]; //pre-condition
	d[indxI+1] = r[indxI+1] * Hdia_inv[i];

	tmp1 += b1[i] * b1[i];
	tmp2 += b2[i] * b2[i];

	tmp3 += r[indxI ] * d[indxI ];
	tmp4 += r[indxI+1] * d[indxI+1];
	}
	}

	my_buf[0] = tmp1;
	my_buf[1] = tmp2;
	my_buf[2] = tmp3;
	my_buf[3] = tmp4;

	MPI_Allreduce(&my_buf, &buf, 4, MPI_DOUBLE, MPI_SUM, world);

	b_norm_s = sqrt(buf[0]);
	b_norm_t = sqrt(buf[1]);

	sig_new_s = buf[2];
	sig_new_t = buf[3];

	for (i = 1; i < imax; ++i) {
	comm->forward_comm_fix(this); //Dist_vector( d );
	dual_sparse_matvec( &H, d, q );
	comm->reverse_comm_fix(this); //Coll_vector( q );

	tmp1 = tmp2 = 0.0;
	#if defined(_OPENMP)
	#pragma omp parallel
	#endif
	{

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) {
	int indxI = 2 * ii;
	tmp1 += d[indxI ] * q[indxI ];
	tmp2 += d[indxI+1] * q[indxI+1];
	}
	}

	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp master
	#endif
	{
	my_buf[0] = tmp1;
	my_buf[1] = tmp2;

	MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);

	alpha_s = sig_new_s / buf[0];
	alpha_t = sig_new_t / buf[1];

	tmp1 = tmp2 = 0.0;
	}

	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) {
	int indxI = 2 * ii;
	x1[ii] += alpha_s * d[indxI ];
	x2[ii] += alpha_t * d[indxI+1];

	r[indxI ] -= alpha_s * q[indxI ];
	r[indxI+1] -= alpha_t * q[indxI+1];

	// pre-conditioning
	p[indxI ] = r[indxI ] * Hdia_inv[ii];
	p[indxI+1] = r[indxI+1] * Hdia_inv[ii];

	tmp1 += r[indxI ] * p[indxI ];
	tmp2 += r[indxI+1] * p[indxI+1];
	}
	}
	} // omp parallel

	my_buf[0] = tmp1;
	my_buf[1] = tmp2;

	sig_old_s = sig_new_s;
	sig_old_t = sig_new_t;

	MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world);

	sig_new_s = buf[0];
	sig_new_t = buf[1];

	if (sqrt(sig_new_s)/b_norm_s <= tolerance
	\|\| sqrt(sig_new_t)/b_norm_t <= tolerance) break;

	beta_s = sig_new_s / sig_old_s;
	beta_t = sig_new_t / sig_old_t;

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50) private(ii)
	#endif
	for (jj = 0; jj < nn; jj++) {
	ii = ilist[jj];
	if (atom->mask[ii] & groupbit) {
	int indxI = 2 * ii;

	d[indxI ] = p[indxI ] + beta_s * d[indxI ];
	d[indxI+1] = p[indxI+1] + beta_t * d[indxI+1];
	}
	}
	}

	i++;
	matvecs_s = matvecs_t = i; // The plus one makes consistent with count from CG()
	matvecs = i;

	// Timing info for iterating s&t together
	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase);
	ompTimingCount[COMPUTECG1INDEX]++;
	ompTimingCGCount[COMPUTECG1INDEX]+= i;
	startTimeBase = endTimeBase;
	#endif

	// If necessary, converge other system
	if (sqrt(sig_new_s)/b_norm_s > tolerance) {
	pack_flag = 2;
	comm->forward_comm_fix(this); // x1 => s

	i+= CG(b1, x1);
	matvecs_s = i;
	}
	else if (sqrt(sig_new_t)/b_norm_t > tolerance) {
	pack_flag = 3;
	comm->forward_comm_fix(this); // x2 => t

	i+= CG(b2, x2);
	matvecs_t = i;
	}

	// Timing info for remainder of s or t
	#ifdef OMP_TIMING
	endTimeBase = MPI_Wtime();
	ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase);
	ompTimingCount[COMPUTECG2INDEX]++;
	ompTimingCGCount[COMPUTECG2INDEX]+= i - matvecs;
	startTimeBase = endTimeBase;
	#endif

	if ( i >= imax && comm->me == 0) {
	char str[128];
	sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
	"at " BIGINT_FORMAT " step",i,update->ntimestep);
	error->warning(FLERR,str);
	}

	return i;
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix A, double x1, double x2, double b)
	{
	#if defined(_OPENMP)
	#pragma omp parallel default(shared)
	#endif
	{
	int i, j, itr_j;
	int nn, NN, ii;
	int *ilist;
	int indxI, indxJ;

	int nthreads = comm->nthreads;
	#if defined(_OPENMP)
	int tid = omp_get_thread_num();
	#else
	int tid = 0;
	#endif

	if (reaxc) {
	nn = reaxc->list->inum;
	NN = reaxc->list->inum + reaxc->list->gnum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	NN = list->inum + list->gnum;
	ilist = list->ilist;
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	b[indxI ] = eta[ atom->type[i] ] * x1[i];
	b[indxI+1] = eta[ atom->type[i] ] * x2[i];
	}
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = nn; ii < NN; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	b[indxI] = 0;
	b[indxI+1] = 0;
	}
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i) {
	indxI = 2 * i;
	for (int t=0; t<nthreads; t++) {
	b_temp[t][indxI ] = 0.0;
	b_temp[t][indxI+1] = 0.0;
	}
	}

	// Wait for b accumulated and b_temp zeroed
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
	j = A->jlist[itr_j];
	indxJ = 2 * j;
	b[indxI ] += A->val[itr_j] * x1[j];
	b[indxI+1] += A->val[itr_j] * x2[j];

	b_temp[tid][indxJ ] += A->val[itr_j] * x1[i];
	b_temp[tid][indxJ+1] += A->val[itr_j] * x2[i];
	}
	}
	}

	// Wait till b_temp accumulated
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i) {
	indxI = 2 * i;
	for (int t = 0; t < nthreads; ++t) {
	b[indxI ] += b_temp[t][indxI ];
	b[indxI+1] += b_temp[t][indxI+1];
	}
	}

	} // omp parallel
	}

	/* ---------------------------------------------------------------------- */

	void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix A, double x, double *b )
	{
	#if defined(_OPENMP)
	#pragma omp parallel default(shared)
	#endif
	{
	int i, j, itr_j;
	int nn, NN, ii;
	int *ilist;
	int indxI, indxJ;

	int nthreads = comm->nthreads;
	#if defined(_OPENMP)
	int tid = omp_get_thread_num();
	#else
	int tid = 0;
	#endif

	if (reaxc) {
	nn = reaxc->list->inum;
	NN = reaxc->list->inum + reaxc->list->gnum;
	ilist = reaxc->list->ilist;
	} else {
	nn = list->inum;
	NN = list->inum + list->gnum;
	ilist = list->ilist;
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	b[indxI ] = eta[ atom->type[i] ] * x[indxI ];
	b[indxI+1] = eta[ atom->type[i] ] * x[indxI+1];
	}
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = nn; ii < NN; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	b[indxI] = 0;
	b[indxI+1] = 0;
	}
	}

	#if defined(_OPENMP)
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i) {
	indxI = 2 * i;
	for (int t=0; t<nthreads; t++) {
	b_temp[t][indxI ] = 0.0;
	b_temp[t][indxI+1] = 0.0;
	}
	}

	// Wait for b accumulated and b_temp zeroed
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (ii = 0; ii < nn; ++ii) {
	i = ilist[ii];
	if (atom->mask[i] & groupbit) {
	indxI = 2 * i;
	for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
	j = A->jlist[itr_j];
	indxJ = 2 * j;
	b[indxI ] += A->val[itr_j] * x[indxJ ];
	b[indxI+1] += A->val[itr_j] * x[indxJ+1];

	b_temp[tid][indxJ ] += A->val[itr_j] * x[indxI ];
	b_temp[tid][indxJ+1] += A->val[itr_j] * x[indxI+1];
	}
	}
	}

	// Wait till b_temp accumulated
	#if defined(_OPENMP)
	#pragma omp barrier
	#pragma omp for schedule(dynamic,50)
	#endif
	for (i = 0; i < NN; ++i) {
	indxI = 2 * i;
	for (int t = 0; t < nthreads; ++t) {
	b[indxI ] += b_temp[t][indxI ];
	b[indxI+1] += b_temp[t][indxI+1];
	}
	}
	} // omp parallel
	}

fix_qeq_reax_omp.cppNo OneTemporaryActions

File Metadata

fix_qeq_reax_omp.cppView Options

Event Timeline

fix_qeq_reax_omp.cpp
No OneTemporary
Actions

fix_qeq_reax_omp.cpp
View Options