pair_gayberne_gpu.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Jun 29, 16:36

pair_gayberne_gpu.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing author: Mike Brown (SNL)
	------------------------------------------------------------------------- */

	#include "math.h"
	#include "stdio.h"
	#include "stdlib.h"
	#include "pair_gayberne_gpu.h"
	#include "math_extra.h"
	#include "atom.h"
	#include "atom_vec.h"
	#include "comm.h"
	#include "force.h"
	#include "neighbor.h"
	#include "neigh_list.h"
	#include "integrate.h"
	#include "memory.h"
	#include "error.h"
	#include "neigh_request.h"
	#include "universe.h"

	#include <string>

	#ifdef GB_GPU_OMP
	#include "omp.h"
	#endif

	#define MIN(a,b) ((a) < (b) ? (a) : (b))
	#define MAX(a,b) ((a) > (b) ? (a) : (b))

	// External functions from cuda library for atom decomposition

	bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
	const double upsilon, const double mu, double **shape,
	double well, double cutsq, double **sigma,
	double *epsilon, double host_lshape, int **form,
	double host_lj1, double host_lj2, double **host_lj3,
	double host_lj4, double offset, double *special_lj,
	const int max_nbors, const int thread, const int gpu_id);
	void gb_gpu_clear(const int thread);
	int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
	int ilist, const int numj, const int *type,
	const int thread, bool &success);
	void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
	const int thread);
	void gb_gpu_atom(double host_x, double host_quat, const int *host_type,
	const bool rebuild, const int thread);
	void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
	const int thread);
	double gb_gpu_forces(double f, double tor, const int *ilist,
	const bool eflag, const bool vflag, const bool eflag_atom,
	const bool vflag_atom, double eatom, double *vatom,
	double *virial, const int thread);
	std::string gb_gpu_name(const int i, const int max_nbors);
	void gb_gpu_time(const int thread);
	int gb_gpu_num_devices();
	double gb_gpu_bytes();

	using namespace LAMMPS_NS;

	/* ---------------------------------------------------------------------- */

	PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0),
	omp_chunk(0), nthreads(1),
	multi_gpu_mode(ONE_NODE),
	multi_gpu_param(0)
	{
	ij_new[0]=NULL;
	}

	/* ----------------------------------------------------------------------
	free all arrays
	------------------------------------------------------------------------- */

	PairGayBerneGPU::~PairGayBerneGPU()
	{
	printf("\n\n-------------------------------------");
	printf("--------------------------------\n");
	printf(" GPU Time Stamps: ");
	printf("\n-------------------------------------");
	printf("--------------------------------\n");
	gb_gpu_time(my_thread);
	printf("-------------------------------------");
	printf("--------------------------------\n\n");

	#pragma omp parallel
	{
	#ifdef GB_GPU_OMP
	int my_thread=omp_get_thread_num();
	#endif
	gb_gpu_clear(my_thread);
	if (ij_new[my_thread]!=NULL)
	delete [] ij_new[my_thread];
	}
	}

	/* ---------------------------------------------------------------------- */

	void PairGayBerneGPU::compute(int eflag, int vflag)
	{
	if (eflag \|\| vflag) ev_setup(eflag,vflag);
	else evflag = vflag_fdotr = 0;
	if (vflag_atom)
	error->all("Per-atom virial not available with GPU Gay-Berne");

	int nlocal = atom->nlocal;
	int nall = nlocal + atom->nghost;
	int inum = list->inum;

	bool rebuild=false;
	if (neighbor->ncalls > last_neighbor) {
	last_neighbor=neighbor->ncalls;
	rebuild=true;
	}

	#pragma omp parallel
	{

	bool success=true;
	#ifdef GB_GPU_OMP
	int my_thread=omp_get_thread_num();
	if (rebuild) {
	omp_chunk=static_cast<int>(ceil(static_cast<double>(inum)/nthreads));
	if (my_thread==nthreads-1)
	thread_inum[my_thread]=inum-(nthreads-1)*omp_chunk;
	else
	thread_inum[my_thread]=omp_chunk;
	olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal,
	thread_inum[my_thread],
	list->ilist+omp_chunk*my_thread,
	list->numneigh, atom->type, my_thread,
	success);
	}
	#else
	if (rebuild)
	olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal, inum, list->ilist,
	list->numneigh, atom->type, my_thread,
	success);
	#endif
	if (!success)
	error->one("Total # of atoms exceeds maximum allowed per GPGPU");

	// copy atom data to GPU
	gb_gpu_atom(atom->x,atom->quat,atom->type,rebuild,my_thread);

	int i,j,ii,jj,jnum;
	double factor_lj;
	int *jlist;

	if (rebuild==true) {
	int num_ij = 0;

	// loop over neighbors of my atoms
	int *ijp=ij_new[my_thread];
	#ifdef GB_GPU_OMP
	int mgo=my_thread*omp_chunk;
	int mgot=mgo+thread_inum[my_thread];
	#else
	int mgo=0, mgot=inum;
	#endif
	for (ii = mgo; ii<mgot; ii++) {
	i = olist[my_thread][ii];
	jlist = list->firstneigh[i];
	jnum = list->numneigh[i];

	for (jj = 0; jj < jnum; jj++) {
	j = jlist[jj];

	*ijp=j;
	ijp++;
	num_ij++;

	if (num_ij==ij_size) {
	gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
	ijp=ij_new[my_thread];
	num_ij=0;
	}
	}
	}
	if (num_ij>0)
	gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
	}

	gb_gpu_gayberne(eflag,vflag,rebuild,my_thread);
	double lvirial[6];
	for (int i=0; i<6; i++) lvirial[i]=0.0;
	double my_eng=gb_gpu_forces(atom->f,atom->torque,olist[my_thread],eflag,vflag,
	eflag_atom, vflag_atom, eatom, vatom, lvirial,
	my_thread);
	#pragma omp critical
	{
	eng_vdwl+=my_eng;
	virial[0]+=lvirial[0];
	virial[1]+=lvirial[1];
	virial[2]+=lvirial[2];
	virial[3]+=lvirial[3];
	virial[4]+=lvirial[4];
	virial[5]+=lvirial[5];
	}

	} //End parallel

	if (vflag_fdotr) virial_compute();
	}

	/* ----------------------------------------------------------------------
	global settings
	------------------------------------------------------------------------- */

	void PairGayBerneGPU::settings(int narg, char **arg)
	{
	// strip off GPU keyword/value and send remaining args to parent

	if (narg < 2) error->all("Illegal pair_style command");

	// set multi_gpu_mode to one/node for multiple gpus on 1 node
	// -- param is starting gpu id
	// set multi_gpu_mode to one/gpu to select the same gpu id on every node
	// -- param is id of gpu
	// set multi_gpu_mode to multi/gpu to get ma
	// -- param is number of gpus per node

	if (strcmp(arg[0],"one/node") == 0)
	multi_gpu_mode = ONE_NODE;
	else if (strcmp(arg[0],"one/gpu") == 0)
	multi_gpu_mode = ONE_GPU;
	else if (strcmp(arg[0],"multi/gpu") == 0)
	multi_gpu_mode = MULTI_GPU;
	else error->all("Illegal pair_style command");

	multi_gpu_param = atoi(arg[1]);

	if (multi_gpu_mode == MULTI_GPU && multi_gpu_param < 1)
	error->all("Illegal pair_style command");

	PairGayBerne::settings(narg-2,&arg[2]);
	}

	/* ----------------------------------------------------------------------
	init specific to this pair style
	------------------------------------------------------------------------- */

	void PairGayBerneGPU::init_style()
	{
	if (force->pair_match("gpu",0) == NULL)
	error->all("Cannot use pair hybrid with multiple GPU pair styles");
	if (!atom->quat_flag \|\| !atom->torque_flag \|\| !atom->avec->shape_type)
	error->all("Pair gayberne requires atom attributes quat, torque, shape");
	if (atom->radius_flag)
	error->all("Pair gayberne cannot be used with atom attribute diameter");

	// set the GPU ID

	int my_gpu=comm->me+multi_gpu_param;
	int ngpus=universe->nprocs;
	if (multi_gpu_mode==ONE_GPU) {
	my_gpu=multi_gpu_param;
	ngpus=1;
	} else if (multi_gpu_mode==MULTI_GPU) {
	ngpus=multi_gpu_param;
	my_gpu=comm->me%ngpus;
	if (ngpus>universe->nprocs)
	ngpus=universe->nprocs;
	}

	int irequest = neighbor->request(this);

	// per-type shape precalculations

	for (int i = 1; i <= atom->ntypes; i++) {
	if (setwell[i]) {
	double *one = atom->shape[i];
	shape[i][0] = one[0]*one[0];
	shape[i][1] = one[1]*one[1];
	shape[i][2] = one[2]*one[2];
	lshape[i] = (one[0]one[1]+one[2]one[2])sqrt(one[0]one[1]);
	}
	}

	// Repeat cutsq calculation because done after call to init_style
	double cut;
	for (int i = 1; i <= atom->ntypes; i++)
	for (int j = i; j <= atom->ntypes; j++) {
	cut = init_one(i,j);
	cutsq[i][j] = cutsq[j][i] = cut*cut;
	}

	// If compiled with OpenMP and only 1 proc, try to use multiple GPUs w/threads
	#ifdef GB_GPU_OMP
	if (multi_gpu_mode!=ONE_GPU)
	nthreads=ngpus=gb_gpu_num_devices();
	else
	nthreads=ngpus=1;
	if (nthreads>MAX_GPU_THREADS)
	nthreads=MAX_GPU_THREADS;
	omp_set_num_threads(nthreads);
	#endif

	#pragma omp parallel firstprivate(my_gpu)
	{
	#ifdef GB_GPU_OMP
	int my_thread = omp_get_thread_num();
	if (multi_gpu_mode!=ONE_GPU)
	my_gpu=my_thread;
	if (multi_gpu_mode==ONE_NODE)
	my_gpu+=multi_gpu_param;
	#endif

	bool init_ok=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu,
	shape, well, cutsq, sigma, epsilon, lshape, form,
	lj1, lj2, lj3, lj4, offset, force->special_lj,
	neighbor->oneatom, my_thread, my_gpu);
	if (!init_ok)
	error->one("At least one process could not allocate a CUDA-enabled gpu");

	if (ij_new[my_thread]!=NULL)
	delete [] ij_new[my_thread];
	ij_new[my_thread]=new int[ij_size];
	}

	last_neighbor = -1;
	neighbor->requests[irequest]->half = 0;
	neighbor->requests[irequest]->full = 1;
	if (force->newton_pair)
	error->all("Cannot use newton pair with GPU GayBerne pair style");

	if (comm->me == 0 && screen) {
	printf("\n-------------------------------------");
	printf("-------------------------------------\n");
	printf("- Using GPGPU acceleration for Gay-Berne:\n");
	printf("-------------------------------------");
	printf("-------------------------------------\n");

	for (int i=0; i<ngpus; i++) {
	int gpui=my_gpu;
	if (multi_gpu_mode==ONE_NODE)
	gpui=i+multi_gpu_param;
	else if (multi_gpu_mode==MULTI_GPU)
	gpui=i;
	std::string gpu_string=gb_gpu_name(gpui,neighbor->oneatom);
	printf("GPU %d: %s\n",gpui,gpu_string.c_str());
	}
	printf("-------------------------------------");
	printf("-------------------------------------\n\n");
	}
	}

	/* ---------------------------------------------------------------------- */

	double PairGayBerneGPU::memory_usage()
	{
	double bytes=Pair::memory_usage()+nthreadsij_sizesizeof(int);
	return bytes+gb_gpu_bytes();
	}

pair_gayberne_gpu.cppNo OneTemporaryActions

File Metadata

pair_gayberne_gpu.cppView Options

Event Timeline

pair_gayberne_gpu.cpp
No OneTemporary
Actions

pair_gayberne_gpu.cpp
View Options