verlet_split_intel.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Apr 5, 01:24

verlet_split_intel.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing authors: Yuxing Peng and Chris Knight (U Chicago)
	------------------------------------------------------------------------- */

	#include "string.h"
	#include "verlet_split_intel.h"
	#include "universe.h"
	#include "neighbor.h"
	#include "domain.h"
	#include "comm.h"
	#include "atom.h"
	#include "atom_vec.h"
	#include "force.h"
	#include "pair.h"
	#include "bond.h"
	#include "angle.h"
	#include "dihedral.h"
	#include "improper.h"
	#include "kspace.h"
	#include "output.h"
	#include "update.h"
	#include "fix.h"
	#include "modify.h"
	#include "timer.h"
	#include "memory.h"
	#include "error.h"

	using namespace LAMMPS_NS;

	/* ---------------------------------------------------------------------- */

	VerletSplitIntel::VerletSplitIntel(LAMMPS lmp, int narg, char *arg) :
	VerletIntel(lmp, narg, arg)
	{
	// error checks on partitions

	if (universe->nworlds != 2)
	error->universe_all(FLERR,"Verlet/split requires 2 partitions");
	if (universe->procs_per_world[0] % universe->procs_per_world[1])
	error->universe_all(FLERR,"Verlet/split requires Rspace partition "
	"size be multiple of Kspace partition size");

	// master = 1 for Rspace procs, 0 for Kspace procs

	if (universe->iworld == 0) master = 1;
	else master = 0;

	ratio = universe->procs_per_world[0] / universe->procs_per_world[1];

	// Kspace root proc broadcasts info about Kspace proc layout to Rspace procs

	int kspace_procgrid[3];

	if (universe->me == universe->root_proc[1]) {
	kspace_procgrid[0] = comm->procgrid[0];
	kspace_procgrid[1] = comm->procgrid[1];
	kspace_procgrid[2] = comm->procgrid[2];
	}
	MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld);

	int ***kspace_grid2proc;
	memory->create(kspace_grid2proc,kspace_procgrid[0],
	kspace_procgrid[1],kspace_procgrid[2],
	"verlet/split:kspace_grid2proc");

	if (universe->me == universe->root_proc[1]) {
	for (int i = 0; i < comm->procgrid[0]; i++)
	for (int j = 0; j < comm->procgrid[1]; j++)
	for (int k = 0; k < comm->procgrid[2]; k++)
	kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k];
	}
	MPI_Bcast(&kspace_grid2proc[0][0][0],
	kspace_procgrid[0]kspace_procgrid[1]kspace_procgrid[2],MPI_INT,
	universe->root_proc[1],universe->uworld);

	// Rspace partition must be multiple of Kspace partition in each dim
	// so atoms of one Kspace proc coincide with atoms of several Rspace procs

	if (master) {
	int flag = 0;
	if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1;
	if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1;
	if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1;
	if (flag)
	error->one(FLERR,
	"Verlet/split requires Rspace partition layout be "
	"multiple of Kspace partition layout in each dim");
	}

	// block = 1 Kspace proc with set of Rspace procs it overlays
	// me_block = 0 for Kspace proc
	// me_block = 1 to ratio for Rspace procs
	// block = MPI communicator for that set of procs

	int iblock,key;

	if (!master) {
	iblock = comm->me;
	key = 0;
	} else {
	int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]);
	int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]);
	int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]);
	iblock = kspace_grid2proc[kpx][kpy][kpz];
	key = 1;
	}

	MPI_Comm_split(universe->uworld,iblock,key,&block);
	MPI_Comm_rank(block,&me_block);

	// output block groupings to universe screen/logfile
	// bmap is ordered by block and then by proc within block

	int *bmap = new int[universe->nprocs];
	for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1;
	bmap[iblock*(ratio+1)+me_block] = universe->me;

	int *bmapall = new int[universe->nprocs];
	MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld);

	if (universe->me == 0) {
	if (universe->uscreen) {
	fprintf(universe->uscreen,
	"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
	int m = 0;
	for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
	fprintf(universe->uscreen," block %d:",i);
	int kspace_proc = bmapall[m];
	for (int j = 1; j <= ratio; j++)
	fprintf(universe->uscreen," %d",bmapall[m+j]);
	fprintf(universe->uscreen," %d",kspace_proc);
	kspace_proc = bmapall[m];
	for (int j = 1; j <= ratio; j++) {
	if (j == 1) fprintf(universe->uscreen," (");
	else fprintf(universe->uscreen," ");
	fprintf(universe->uscreen,"%d",
	universe->uni2orig[bmapall[m+j]]);
	}
	fprintf(universe->uscreen," %d)\n",universe->uni2orig[kspace_proc]);
	m += ratio + 1;
	}
	}
	if (universe->ulogfile) {
	fprintf(universe->ulogfile,
	"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
	int m = 0;
	for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
	fprintf(universe->ulogfile," block %d:",i);
	int kspace_proc = bmapall[m];
	for (int j = 1; j <= ratio; j++)
	fprintf(universe->ulogfile," %d",bmapall[m+j]);

	fprintf(universe->ulogfile," %d",kspace_proc);
	kspace_proc = bmapall[m];
	for (int j = 1; j <= ratio; j++) {
	if (j == 1) fprintf(universe->ulogfile," (");
	else fprintf(universe->ulogfile," ");
	fprintf(universe->ulogfile,"%d",
	universe->uni2orig[bmapall[m+j]]);
	}
	fprintf(universe->ulogfile," %d)\n",universe->uni2orig[kspace_proc]);
	m += ratio + 1;
	}
	}
	}

	memory->destroy(kspace_grid2proc);
	delete [] bmap;
	delete [] bmapall;

	// size/disp = vectors for MPI gather/scatter within block

	qsize = new int[ratio+1];
	qdisp = new int[ratio+1];
	xsize = new int[ratio+1];
	xdisp = new int[ratio+1];

	// f_kspace = Rspace copy of Kspace forces
	// allocate dummy version for Kspace partition

	maxatom = 0;
	f_kspace = NULL;
	if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace");
	}

	/* ---------------------------------------------------------------------- */

	VerletSplitIntel::~VerletSplitIntel()
	{
	delete [] qsize;
	delete [] qdisp;
	delete [] xsize;
	delete [] xdisp;
	memory->destroy(f_kspace);
	MPI_Comm_free(&block);
	}

	/* ----------------------------------------------------------------------
	initialization before run
	------------------------------------------------------------------------- */

	void VerletSplitIntel::init()
	{
	if (!force->kspace && comm->me == 0)
	error->warning(FLERR,"No Kspace calculation with verlet/split");

	if (force->kspace_match("tip4p",0)) tip4p_flag = 1;
	else tip4p_flag = 0;

	// currently TIP4P does not work with verlet/split, so generate error
	// see Axel email on this, also other TIP4P notes below

	if (tip4p_flag) error->all(FLERR,"Verlet/split does not yet support TIP4P");

	VerletIntel::init();
	}

	/* ----------------------------------------------------------------------
	setup before run
	servant partition only sets up KSpace calculation
	------------------------------------------------------------------------- */

	void VerletSplitIntel::setup()
	{
	if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");

	if (!master) force->kspace->setup();
	else {
	VerletIntel::setup();
	}
	}

	/* ----------------------------------------------------------------------
	setup without output
	flag = 0 = just force calculation
	flag = 1 = reneighbor and force calculation
	servant partition only sets up KSpace calculation
	------------------------------------------------------------------------- */

	void VerletSplitIntel::setup_minimal(int flag)
	{
	if (!master) force->kspace->setup();
	else {
	VerletIntel::setup_minimal(flag);
	}
	}

	/* ----------------------------------------------------------------------
	run for N steps
	master partition does everything but Kspace
	servant partition does just Kspace
	communicate back and forth every step:
	atom coords from master -> servant
	kspace forces from servant -> master
	also box bounds from master -> servant if necessary
	------------------------------------------------------------------------- */

	void VerletSplitIntel::run(int n)
	{
	bigint ntimestep;
	int nflag,sortflag;

	// sync both partitions before start timer

	MPI_Barrier(universe->uworld);
	timer->init();
	timer->barrier_start(TIME_LOOP);

	// setup initial Rspace <-> Kspace comm params

	rk_setup();

	// check if OpenMP support fix defined

	Fix *fix_omp;
	int ifix = modify->find_fix("package_omp");
	if (ifix < 0) fix_omp = NULL;
	else fix_omp = modify->fix[ifix];

	// flags for timestepping iterations

	int n_post_integrate = modify->n_post_integrate;
	int n_pre_exchange = modify->n_pre_exchange;
	int n_pre_neighbor = modify->n_pre_neighbor;
	int n_pre_force = modify->n_pre_force;
	int n_post_force = modify->n_post_force;
	int n_end_of_step = modify->n_end_of_step;

	if (atom->sortfreq > 0) sortflag = 1;
	else sortflag = 0;

	for (int i = 0; i < n; i++) {

	ntimestep = ++update->ntimestep;
	ev_set(ntimestep);

	// initial time integration

	if (master) {
	modify->initial_integrate(vflag);
	if (n_post_integrate) modify->post_integrate();
	}

	// regular communication vs neighbor list rebuild

	if (master) nflag = neighbor->decide();
	MPI_Bcast(&nflag,1,MPI_INT,1,block);

	if (master) {
	if (nflag == 0) {
	timer->stamp();
	comm->forward_comm();
	timer->stamp(TIME_COMM);
	} else {
	if (n_pre_exchange) modify->pre_exchange();
	if (triclinic) domain->x2lamda(atom->nlocal);
	domain->pbc();
	if (domain->box_change) {
	domain->reset_box();
	comm->setup();
	if (neighbor->style) neighbor->setup_bins();
	}
	timer->stamp();
	comm->exchange();
	if (sortflag && ntimestep >= atom->nextsort) atom->sort();
	comm->borders();
	if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
	timer->stamp(TIME_COMM);
	if (n_pre_neighbor) modify->pre_neighbor();
	neighbor->build();
	timer->stamp(TIME_NEIGHBOR);
	}
	}

	// if reneighboring occurred, re-setup Rspace <-> Kspace comm params
	// comm Rspace atom coords to Kspace procs

	if (nflag) rk_setup();
	r2k_comm();

	// force computations

	force_clear();

	if (master) {
	if (n_pre_force) modify->pre_force(vflag);

	timer->stamp();
	if (force->pair) {
	force->pair->compute(eflag,vflag);
	timer->stamp(TIME_PAIR);
	}

	if (atom->molecular) {
	if (force->bond) force->bond->compute(eflag,vflag);
	if (force->angle) force->angle->compute(eflag,vflag);
	if (force->dihedral) force->dihedral->compute(eflag,vflag);
	if (force->improper) force->improper->compute(eflag,vflag);
	timer->stamp(TIME_BOND);
	}

	#ifdef _LMP_INTEL_OFFLOAD
	if (sync_mode == 1) {
	fix_intel->sync_coprocessor();
	timer->stamp(TIME_PAIR);
	}
	#endif

	if (force->newton) {
	comm->reverse_comm();
	timer->stamp(TIME_COMM);
	}

	#ifdef _LMP_INTEL_OFFLOAD
	if (sync_mode == 2) {
	fix_intel->sync_coprocessor();
	timer->stamp(TIME_PAIR);
	}
	#endif

	} else {

	// run FixOMP as sole pre_force fix, if defined

	if (fix_omp) fix_omp->pre_force(vflag);

	if (force->kspace) {
	timer->stamp();
	force->kspace->compute(eflag,vflag);
	timer->stamp(TIME_KSPACE);
	}

	// TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()

	if (tip4p_flag && force->newton) {
	comm->reverse_comm();
	timer->stamp(TIME_COMM);
	}
	}

	// comm and sum Kspace forces back to Rspace procs

	k2r_comm();

	// force modifications, final time integration, diagnostics
	// all output

	if (master) {
	if (n_post_force) modify->post_force(vflag);
	modify->final_integrate();
	if (n_end_of_step) modify->end_of_step();

	if (ntimestep == output->next) {
	timer->stamp();
	output->write(ntimestep);
	timer->stamp(TIME_OUTPUT);
	}
	}
	}
	}

	/* ----------------------------------------------------------------------
	setup params for Rspace <-> Kspace communication
	called initially and after every reneighbor
	also communcicate atom charges from Rspace to KSpace since static
	------------------------------------------------------------------------- */

	void VerletSplitIntel::rk_setup()
	{
	// grow f_kspace array on master procs if necessary

	if (master) {
	if (atom->nlocal > maxatom) {
	memory->destroy(f_kspace);
	maxatom = atom->nmax;
	memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace");
	}
	}

	// qsize = # of atoms owned by each master proc in block

	int n = 0;
	if (master) n = atom->nlocal;
	MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block);

	// setup qdisp, xsize, xdisp based on qsize
	// only needed by Kspace proc
	// set Kspace nlocal to sum of Rspace nlocals
	// insure Kspace atom arrays are large enough

	if (!master) {
	qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0;
	for (int i = 1; i <= ratio; i++) {
	qdisp[i] = qdisp[i-1]+qsize[i-1];
	xsize[i] = 3*qsize[i];
	xdisp[i] = xdisp[i-1]+xsize[i-1];
	}

	atom->nlocal = qdisp[ratio] + qsize[ratio];
	while (atom->nmax <= atom->nlocal) atom->avec->grow(0);
	atom->nghost = 0;
	}

	// one-time gather of Rspace atom charges to Kspace proc

	MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block);

	// for TIP4P also need to send atom type and tag
	// KSpace procs need to acquire ghost atoms and map all their atoms
	// map_clear() call is in lieu of comm->exchange() which performs map_clear
	// borders() call acquires ghost atoms and maps them
	// NOTE: do atom coords need to be communicated here before borders() call?
	// could do this by calling r2k_comm() here and not again from run()
	// except that forward_comm() in r2k_comm() is wrong

	if (tip4p_flag) {
	//r2k_comm();
	MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block);
	MPI_Gatherv(atom->tag,n,MPI_LMP_TAGINT,
	atom->tag,qsize,qdisp,MPI_LMP_TAGINT,0,block);
	if (!master) {
	if (triclinic) domain->x2lamda(atom->nlocal);
	if (domain->box_change) comm->setup();
	timer->stamp();
	atom->map_clear();
	comm->borders();
	if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
	timer->stamp(TIME_COMM);
	}
	}
	}

	/* ----------------------------------------------------------------------
	communicate Rspace atom coords to Kspace
	also eflag,vflag and box bounds if needed
	------------------------------------------------------------------------- */

	void VerletSplitIntel::r2k_comm()
	{
	MPI_Status status;

	int n = 0;
	if (master) n = atom->nlocal;
	MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp,
	MPI_DOUBLE,0,block);

	// send eflag,vflag from Rspace to Kspace

	if (me_block == 1) {
	int flags[2];
	flags[0] = eflag; flags[1] = vflag;
	MPI_Send(flags,2,MPI_INT,0,0,block);
	} else if (!master) {
	int flags[2];
	MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status);
	eflag = flags[0]; vflag = flags[1];
	}

	// send box bounds from Rspace to Kspace if simulation box is dynamic

	if (domain->box_change) {
	if (me_block == 1) {
	MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block);
	MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block);
	} else if (!master) {
	MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status);
	MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status);
	domain->set_global_box();
	domain->set_local_box();
	force->kspace->setup();
	}
	}

	// for TIP4P, Kspace partition needs to update its ghost atoms

	if (tip4p_flag && !master) {
	timer->stamp();
	comm->forward_comm();
	timer->stamp(TIME_COMM);
	}
	}

	/* ----------------------------------------------------------------------
	communicate and sum Kspace atom forces back to Rspace
	------------------------------------------------------------------------- */

	void VerletSplitIntel::k2r_comm()
	{
	if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block);
	if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block);

	int n = 0;
	if (master) n = atom->nlocal;
	MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE,
	f_kspace[0],n*3,MPI_DOUBLE,0,block);

	if (master) {
	double **f = atom->f;
	int nlocal = atom->nlocal;
	for (int i = 0; i < nlocal; i++) {
	f[i][0] += f_kspace[i][0];
	f[i][1] += f_kspace[i][1];
	f[i][2] += f_kspace[i][2];
	}
	}
	}

	/* ----------------------------------------------------------------------
	memory usage of Kspace force array on master procs
	------------------------------------------------------------------------- */

	bigint VerletSplitIntel::memory_usage()
	{
	bigint bytes = maxatom3 sizeof(double);
	return bytes;
	}

verlet_split_intel.cppNo OneTemporaryActions

File Metadata

verlet_split_intel.cppView Options

Event Timeline

verlet_split_intel.cpp
No OneTemporary
Actions

verlet_split_intel.cpp
View Options