modify_cuda.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Nov 7, 12:47

modify_cuda.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	#include <cstdio>
	#include <cstring>
	#include "modify_cuda.h"
	#include "style_compute.h"
	#include "style_fix.h"
	#include "atom.h"
	#include "comm.h"
	#include "fix.h"
	#include "compute.h"
	#include "group.h"
	#include "update.h"
	#include "domain.h"
	#include "cuda.h"
	#include "memory.h"
	#include "error.h"

	using namespace LAMMPS_NS;

	#define DELTA 4

	// mask settings - same as in fix.cpp

	#define INITIAL_INTEGRATE 1
	#define POST_INTEGRATE 2
	#define PRE_EXCHANGE 4
	#define PRE_NEIGHBOR 8
	#define PRE_FORCE 16
	#define POST_FORCE 32
	#define FINAL_INTEGRATE 64
	#define END_OF_STEP 128
	#define THERMO_ENERGY 256
	#define INITIAL_INTEGRATE_RESPA 512
	#define POST_INTEGRATE_RESPA 1024
	#define PRE_FORCE_RESPA 2048
	#define POST_FORCE_RESPA 4096
	#define FINAL_INTEGRATE_RESPA 8192
	#define MIN_PRE_EXCHANGE 16384
	#define MIN_POST_FORCE 32768
	#define MIN_ENERGY 65536

	#include "cuda_modify_flags.h"

	#define BIG 1.0e20

	/* ---------------------------------------------------------------------- */

	ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp)
	{
	cuda = lmp->cuda;
	if(cuda == NULL)
	error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");

	n_initial_integrate_cuda = 0;
	n_post_integrate_cuda = 0;
	n_pre_exchange = 0;
	n_pre_neighbor_cuda = 0;
	n_pre_force_cuda = 0;
	n_post_force_cuda = 0;
	n_final_integrate_cuda = 0;
	n_end_of_step_cuda = 0;
	n_thermo_energy_cuda = 0;

	n_initial_integrate_host = 0;
	n_post_integrate_host = 0;
	n_pre_exchange = 0;
	n_pre_neighbor_host = 0;
	n_pre_force_host = 0;
	n_post_force_host = 0;
	n_final_integrate_host = 0;
	n_end_of_step_host = 0;
	n_thermo_energy_host = 0;

	list_initial_integrate_cuda = NULL;
	list_post_integrate_cuda = NULL;
	list_pre_exchange_cuda = NULL;
	list_pre_neighbor_cuda = NULL;
	list_pre_force_cuda = NULL;
	list_post_force_cuda = NULL;
	list_final_integrate_cuda = NULL;
	list_end_of_step_cuda = NULL;
	list_thermo_energy_cuda = NULL;
	end_of_step_every_cuda = NULL;
	}

	/* ---------------------------------------------------------------------- */

	ModifyCuda::~ModifyCuda()
	{
	delete [] list_initial_integrate_cuda;
	delete [] list_post_integrate_cuda;
	delete [] list_pre_exchange_cuda;
	delete [] list_pre_neighbor_cuda;
	delete [] list_pre_force_cuda;
	delete [] list_post_force_cuda;
	delete [] list_final_integrate_cuda;
	delete [] list_end_of_step_cuda;
	delete [] list_thermo_energy_cuda;
	delete [] end_of_step_every_cuda;
	}

	/* ----------------------------------------------------------------------
	initialize all fixes and computes
	------------------------------------------------------------------------- */

	void ModifyCuda::init()
	{
	int i,j;

	// delete storage of restart info since it is not valid after 1st run

	restart_deallocate();

	// create lists of fixes to call at each stage of run

	list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate);
	list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate);
	list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange);
	list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor);
	list_init(PRE_FORCE,n_pre_force,list_pre_force);
	list_init(POST_FORCE,n_post_force,list_post_force);
	list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate);
	list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step);
	list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy);

	list_init(INITIAL_INTEGRATE_CUDA, n_initial_integrate_cuda, list_initial_integrate_cuda);
	list_init(POST_INTEGRATE_CUDA, n_post_integrate_cuda, list_post_integrate_cuda);
	list_init(PRE_EXCHANGE_CUDA, n_pre_exchange_cuda, list_pre_exchange_cuda);
	list_init(PRE_NEIGHBOR_CUDA, n_pre_neighbor_cuda, list_pre_neighbor_cuda);
	list_init(PRE_FORCE_CUDA, n_pre_force_cuda, list_pre_force_cuda);
	list_init(POST_FORCE_CUDA, n_post_force_cuda, list_post_force_cuda);
	list_init(FINAL_INTEGRATE_CUDA, n_final_integrate_cuda, list_final_integrate_cuda);
	list_init_end_of_step_cuda(END_OF_STEP_CUDA, n_end_of_step_cuda, list_end_of_step_cuda);
	list_init_thermo_energy(THERMO_ENERGY_CUDA, n_thermo_energy_cuda, list_thermo_energy_cuda);

	n_initial_integrate_host = n_initial_integrate;
	n_post_integrate_host = n_post_integrate;
	n_pre_exchange_host = n_pre_exchange;
	n_pre_neighbor_host = n_pre_neighbor;
	n_pre_force_host = n_pre_force;
	n_post_force_host = n_post_force;
	n_final_integrate_host = n_final_integrate;
	n_end_of_step_host = n_end_of_step;
	n_thermo_energy_host = n_thermo_energy;

	n_initial_integrate = n_initial_integrate_cuda+n_initial_integrate_host;
	n_post_integrate = n_post_integrate_cuda+n_post_integrate_host;
	n_pre_exchange = n_pre_exchange_cuda+n_pre_exchange_host;
	n_pre_neighbor = n_pre_neighbor_cuda+n_pre_neighbor_host;
	n_pre_force = n_pre_force_cuda+n_pre_force_host;
	n_post_force = n_post_force_cuda+n_post_force_host;
	n_final_integrate = n_final_integrate_cuda+n_final_integrate_host;
	n_end_of_step = n_end_of_step_cuda+n_end_of_step_host;
	n_thermo_energy = n_thermo_energy_cuda+n_thermo_energy_host;

	list_init(INITIAL_INTEGRATE_RESPA,
	n_initial_integrate_respa,list_initial_integrate_respa);
	list_init(POST_INTEGRATE_RESPA,
	n_post_integrate_respa,list_post_integrate_respa);
	list_init(POST_FORCE_RESPA,
	n_post_force_respa,list_post_force_respa);
	list_init(PRE_FORCE_RESPA,
	n_pre_force_respa,list_pre_force_respa);
	list_init(FINAL_INTEGRATE_RESPA,
	n_final_integrate_respa,list_final_integrate_respa);

	list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange);
	list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force);
	list_init(MIN_ENERGY,n_min_energy,list_min_energy);

	// init each fix
	// needs to come before compute init
	// this is b/c some computes call fix->dof()
	// FixRigid::dof() depends on its own init having been called

	for (i = 0; i < nfix; i++) fix[i]->init();

	// set global flag if any fix has its restart_pbc flag set

	restart_pbc_any = 0;
	for (i = 0; i < nfix; i++)
	if (fix[i]->restart_pbc) restart_pbc_any = 1;

	// create list of computes that store invocation times

	list_init_compute();

	// init each compute
	// set invoked_scalar,vector,etc to -1 to force new run to re-compute them
	// add initial timestep to all computes that store invocation times
	// since any of them may be invoked by initial thermo
	// do not clear out invocation times stored within a compute,
	// b/c some may be holdovers from previous run, like for ave fixes

	for (i = 0; i < ncompute; i++) {
	compute[i]->init();
	compute[i]->invoked_scalar = -1;
	compute[i]->invoked_vector = -1;
	compute[i]->invoked_array = -1;
	compute[i]->invoked_peratom = -1;
	compute[i]->invoked_local = -1;
	}
	addstep_compute_all(update->ntimestep);

	// warn if any particle is time integrated more than once

	int nlocal = atom->nlocal;
	int *mask = atom->mask;

	int *flag = new int[nlocal];
	for (i = 0; i < nlocal; i++) flag[i] = 0;

	int groupbit;
	for (i = 0; i < nfix; i++) {
	if (fix[i]->time_integrate == 0) continue;
	groupbit = fix[i]->groupbit;
	for (j = 0; j < nlocal; j++)
	if (mask[j] & groupbit) flag[j]++;
	}

	int check = 0;
	for (i = 0; i < nlocal; i++)
	if (flag[i] > 1) check = 1;

	delete [] flag;

	int checkall;
	MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world);
	if (comm->me == 0 && checkall)
	error->warning(FLERR,"One or more atoms are time integrated more than once");
	}

	/* ----------------------------------------------------------------------
	1st half of integrate call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::initial_integrate(int vflag)
	{
	for(int i = 0; i < n_initial_integrate_cuda; i++)
	fix[list_initial_integrate_cuda[i]]->initial_integrate(vflag);

	if(n_initial_integrate_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_initial_integrate_host; i++)
	fix[list_initial_integrate[i]]->initial_integrate(vflag);
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	post_integrate call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::post_integrate()
	{
	for(int i = 0; i < n_post_integrate_cuda; i++)
	fix[list_post_integrate_cuda[i]]->post_integrate();

	if(n_post_integrate_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_post_integrate_host; i++)
	fix[list_post_integrate[i]]->post_integrate();
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	pre_exchange call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::pre_exchange()
	{
	for(int i = 0; i < n_pre_exchange_cuda; i++)
	fix[list_pre_exchange_cuda[i]]->pre_exchange();

	if(n_pre_exchange_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_pre_exchange_host; i++)
	fix[list_pre_exchange[i]]->pre_exchange();
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	pre_neighbor call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::pre_neighbor()
	{
	for(int i = 0; i < n_pre_neighbor_cuda; i++)
	fix[list_pre_neighbor_cuda[i]]->pre_neighbor();

	if(n_pre_neighbor_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_pre_neighbor_host; i++)
	fix[list_pre_neighbor[i]]->pre_neighbor();
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	pre_force call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::pre_force(int vflag)
	{
	for(int i = 0; i < n_pre_force_cuda; i++)
	fix[list_pre_force_cuda[i]]->pre_force(vflag);

	if(n_pre_force_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_pre_force_host; i++)
	fix[list_pre_force[i]]->pre_force(vflag);
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	post_force call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::post_force(int vflag)
	{
	for(int i = 0; i < n_post_force_cuda; i++)
	fix[list_post_force_cuda[i]]->post_force(vflag);

	if(n_post_force_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_post_force_host; i++)
	fix[list_post_force[i]]->post_force(vflag);
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	2nd half of integrate call, only for relevant fixes
	------------------------------------------------------------------------- */

	void ModifyCuda::final_integrate()
	{
	for (int i = 0; i < n_final_integrate_cuda; i++)
	fix[list_final_integrate_cuda[i]]->final_integrate();

	if(n_final_integrate_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_final_integrate_host; i++)
	fix[list_final_integrate[i]]->final_integrate();
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}

	/* ----------------------------------------------------------------------
	end-of-timestep call, only for relevant fixes
	only call fix->end_of_step() on timesteps that are multiples of nevery
	------------------------------------------------------------------------- */

	void ModifyCuda::end_of_step()
	{
	for (int i = 0; i < n_end_of_step_cuda; i++)
	if (update->ntimestep % end_of_step_every_cuda[i] == 0)
	fix[list_end_of_step_cuda[i]]->end_of_step();

	if(n_end_of_step_host != 0)
	{
	int do_thisstep=0;
	for (int i = 0; i < n_end_of_step_host; i++)
	if (update->ntimestep % end_of_step_every[i] == 0) do_thisstep=1;
	if(do_thisstep)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_end_of_step_host; i++)
	if (update->ntimestep % end_of_step_every[i] == 0)
	fix[list_end_of_step[i]]->end_of_step();
	cuda->uploadAll(); cuda->oncpu = false;
	}
	}
	}

	/* ----------------------------------------------------------------------
	thermo energy call, only for relevant fixes
	called by Thermo class
	compute_scalar() is fix call to return energy
	------------------------------------------------------------------------- */

	double ModifyCuda::thermo_energy()
	{
	double energy = 0.0;

	for (int i = 0; i < n_thermo_energy_cuda; i++)
	energy += fix[list_thermo_energy_cuda[i]]->compute_scalar();

	if(n_thermo_energy_host != 0)
	{
	cuda->downloadAll(); cuda->oncpu = true;
	for (int i = 0; i < n_thermo_energy_host; i++)
	energy += fix[list_thermo_energy[i]]->compute_scalar();
	cuda->uploadAll(); cuda->oncpu = false;
	}

	return energy;
	}



	void ModifyCuda::list_init_end_of_step_cuda(int mask, int &n, int *&list)
	{
	delete [] list;
	delete [] end_of_step_every_cuda;

	n = 0;
	for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++;
	list = new int[n];
	end_of_step_every_cuda = new int[n];

	n = 0;
	for (int i = 0; i < nfix; i++)
	if (fmask[i] & mask) {
	list[n] = i;
	end_of_step_every_cuda[n++] = fix[i]->nevery;
	}
	}

modify_cuda.cppNo OneTemporaryActions

File Metadata

modify_cuda.cppView Options

Event Timeline

modify_cuda.cpp
No OneTemporary
Actions

modify_cuda.cpp
View Options