Page MenuHomec4science

modify_cuda.cpp
No OneTemporary

File Metadata

Created
Thu, Nov 7, 12:47

modify_cuda.cpp

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdio>
#include <cstring>
#include "modify_cuda.h"
#include "style_compute.h"
#include "style_fix.h"
#include "atom.h"
#include "comm.h"
#include "fix.h"
#include "compute.h"
#include "group.h"
#include "update.h"
#include "domain.h"
#include "cuda.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define DELTA 4
// mask settings - same as in fix.cpp
#define INITIAL_INTEGRATE 1
#define POST_INTEGRATE 2
#define PRE_EXCHANGE 4
#define PRE_NEIGHBOR 8
#define PRE_FORCE 16
#define POST_FORCE 32
#define FINAL_INTEGRATE 64
#define END_OF_STEP 128
#define THERMO_ENERGY 256
#define INITIAL_INTEGRATE_RESPA 512
#define POST_INTEGRATE_RESPA 1024
#define PRE_FORCE_RESPA 2048
#define POST_FORCE_RESPA 4096
#define FINAL_INTEGRATE_RESPA 8192
#define MIN_PRE_EXCHANGE 16384
#define MIN_POST_FORCE 32768
#define MIN_ENERGY 65536
#include "cuda_modify_flags.h"
#define BIG 1.0e20
/* ---------------------------------------------------------------------- */
ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
n_initial_integrate_cuda = 0;
n_post_integrate_cuda = 0;
n_pre_exchange = 0;
n_pre_neighbor_cuda = 0;
n_pre_force_cuda = 0;
n_post_force_cuda = 0;
n_final_integrate_cuda = 0;
n_end_of_step_cuda = 0;
n_thermo_energy_cuda = 0;
n_initial_integrate_host = 0;
n_post_integrate_host = 0;
n_pre_exchange = 0;
n_pre_neighbor_host = 0;
n_pre_force_host = 0;
n_post_force_host = 0;
n_final_integrate_host = 0;
n_end_of_step_host = 0;
n_thermo_energy_host = 0;
list_initial_integrate_cuda = NULL;
list_post_integrate_cuda = NULL;
list_pre_exchange_cuda = NULL;
list_pre_neighbor_cuda = NULL;
list_pre_force_cuda = NULL;
list_post_force_cuda = NULL;
list_final_integrate_cuda = NULL;
list_end_of_step_cuda = NULL;
list_thermo_energy_cuda = NULL;
end_of_step_every_cuda = NULL;
}
/* ---------------------------------------------------------------------- */
ModifyCuda::~ModifyCuda()
{
delete [] list_initial_integrate_cuda;
delete [] list_post_integrate_cuda;
delete [] list_pre_exchange_cuda;
delete [] list_pre_neighbor_cuda;
delete [] list_pre_force_cuda;
delete [] list_post_force_cuda;
delete [] list_final_integrate_cuda;
delete [] list_end_of_step_cuda;
delete [] list_thermo_energy_cuda;
delete [] end_of_step_every_cuda;
}
/* ----------------------------------------------------------------------
initialize all fixes and computes
------------------------------------------------------------------------- */
void ModifyCuda::init()
{
int i,j;
// delete storage of restart info since it is not valid after 1st run
restart_deallocate();
// create lists of fixes to call at each stage of run
list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate);
list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate);
list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange);
list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor);
list_init(PRE_FORCE,n_pre_force,list_pre_force);
list_init(POST_FORCE,n_post_force,list_post_force);
list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate);
list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step);
list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy);
list_init(INITIAL_INTEGRATE_CUDA, n_initial_integrate_cuda, list_initial_integrate_cuda);
list_init(POST_INTEGRATE_CUDA, n_post_integrate_cuda, list_post_integrate_cuda);
list_init(PRE_EXCHANGE_CUDA, n_pre_exchange_cuda, list_pre_exchange_cuda);
list_init(PRE_NEIGHBOR_CUDA, n_pre_neighbor_cuda, list_pre_neighbor_cuda);
list_init(PRE_FORCE_CUDA, n_pre_force_cuda, list_pre_force_cuda);
list_init(POST_FORCE_CUDA, n_post_force_cuda, list_post_force_cuda);
list_init(FINAL_INTEGRATE_CUDA, n_final_integrate_cuda, list_final_integrate_cuda);
list_init_end_of_step_cuda(END_OF_STEP_CUDA, n_end_of_step_cuda, list_end_of_step_cuda);
list_init_thermo_energy(THERMO_ENERGY_CUDA, n_thermo_energy_cuda, list_thermo_energy_cuda);
n_initial_integrate_host = n_initial_integrate;
n_post_integrate_host = n_post_integrate;
n_pre_exchange_host = n_pre_exchange;
n_pre_neighbor_host = n_pre_neighbor;
n_pre_force_host = n_pre_force;
n_post_force_host = n_post_force;
n_final_integrate_host = n_final_integrate;
n_end_of_step_host = n_end_of_step;
n_thermo_energy_host = n_thermo_energy;
n_initial_integrate = n_initial_integrate_cuda+n_initial_integrate_host;
n_post_integrate = n_post_integrate_cuda+n_post_integrate_host;
n_pre_exchange = n_pre_exchange_cuda+n_pre_exchange_host;
n_pre_neighbor = n_pre_neighbor_cuda+n_pre_neighbor_host;
n_pre_force = n_pre_force_cuda+n_pre_force_host;
n_post_force = n_post_force_cuda+n_post_force_host;
n_final_integrate = n_final_integrate_cuda+n_final_integrate_host;
n_end_of_step = n_end_of_step_cuda+n_end_of_step_host;
n_thermo_energy = n_thermo_energy_cuda+n_thermo_energy_host;
list_init(INITIAL_INTEGRATE_RESPA,
n_initial_integrate_respa,list_initial_integrate_respa);
list_init(POST_INTEGRATE_RESPA,
n_post_integrate_respa,list_post_integrate_respa);
list_init(POST_FORCE_RESPA,
n_post_force_respa,list_post_force_respa);
list_init(PRE_FORCE_RESPA,
n_pre_force_respa,list_pre_force_respa);
list_init(FINAL_INTEGRATE_RESPA,
n_final_integrate_respa,list_final_integrate_respa);
list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange);
list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force);
list_init(MIN_ENERGY,n_min_energy,list_min_energy);
// init each fix
// needs to come before compute init
// this is b/c some computes call fix->dof()
// FixRigid::dof() depends on its own init having been called
for (i = 0; i < nfix; i++) fix[i]->init();
// set global flag if any fix has its restart_pbc flag set
restart_pbc_any = 0;
for (i = 0; i < nfix; i++)
if (fix[i]->restart_pbc) restart_pbc_any = 1;
// create list of computes that store invocation times
list_init_compute();
// init each compute
// set invoked_scalar,vector,etc to -1 to force new run to re-compute them
// add initial timestep to all computes that store invocation times
// since any of them may be invoked by initial thermo
// do not clear out invocation times stored within a compute,
// b/c some may be holdovers from previous run, like for ave fixes
for (i = 0; i < ncompute; i++) {
compute[i]->init();
compute[i]->invoked_scalar = -1;
compute[i]->invoked_vector = -1;
compute[i]->invoked_array = -1;
compute[i]->invoked_peratom = -1;
compute[i]->invoked_local = -1;
}
addstep_compute_all(update->ntimestep);
// warn if any particle is time integrated more than once
int nlocal = atom->nlocal;
int *mask = atom->mask;
int *flag = new int[nlocal];
for (i = 0; i < nlocal; i++) flag[i] = 0;
int groupbit;
for (i = 0; i < nfix; i++) {
if (fix[i]->time_integrate == 0) continue;
groupbit = fix[i]->groupbit;
for (j = 0; j < nlocal; j++)
if (mask[j] & groupbit) flag[j]++;
}
int check = 0;
for (i = 0; i < nlocal; i++)
if (flag[i] > 1) check = 1;
delete [] flag;
int checkall;
MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world);
if (comm->me == 0 && checkall)
error->warning(FLERR,"One or more atoms are time integrated more than once");
}
/* ----------------------------------------------------------------------
1st half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::initial_integrate(int vflag)
{
for(int i = 0; i < n_initial_integrate_cuda; i++)
fix[list_initial_integrate_cuda[i]]->initial_integrate(vflag);
if(n_initial_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_initial_integrate_host; i++)
fix[list_initial_integrate[i]]->initial_integrate(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
post_integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::post_integrate()
{
for(int i = 0; i < n_post_integrate_cuda; i++)
fix[list_post_integrate_cuda[i]]->post_integrate();
if(n_post_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_post_integrate_host; i++)
fix[list_post_integrate[i]]->post_integrate();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_exchange call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_exchange()
{
for(int i = 0; i < n_pre_exchange_cuda; i++)
fix[list_pre_exchange_cuda[i]]->pre_exchange();
if(n_pre_exchange_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_exchange_host; i++)
fix[list_pre_exchange[i]]->pre_exchange();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_neighbor call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_neighbor()
{
for(int i = 0; i < n_pre_neighbor_cuda; i++)
fix[list_pre_neighbor_cuda[i]]->pre_neighbor();
if(n_pre_neighbor_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_neighbor_host; i++)
fix[list_pre_neighbor[i]]->pre_neighbor();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_force(int vflag)
{
for(int i = 0; i < n_pre_force_cuda; i++)
fix[list_pre_force_cuda[i]]->pre_force(vflag);
if(n_pre_force_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_force_host; i++)
fix[list_pre_force[i]]->pre_force(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
post_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::post_force(int vflag)
{
for(int i = 0; i < n_post_force_cuda; i++)
fix[list_post_force_cuda[i]]->post_force(vflag);
if(n_post_force_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_post_force_host; i++)
fix[list_post_force[i]]->post_force(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
2nd half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::final_integrate()
{
for (int i = 0; i < n_final_integrate_cuda; i++)
fix[list_final_integrate_cuda[i]]->final_integrate();
if(n_final_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_final_integrate_host; i++)
fix[list_final_integrate[i]]->final_integrate();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
end-of-timestep call, only for relevant fixes
only call fix->end_of_step() on timesteps that are multiples of nevery
------------------------------------------------------------------------- */
void ModifyCuda::end_of_step()
{
for (int i = 0; i < n_end_of_step_cuda; i++)
if (update->ntimestep % end_of_step_every_cuda[i] == 0)
fix[list_end_of_step_cuda[i]]->end_of_step();
if(n_end_of_step_host != 0)
{
int do_thisstep=0;
for (int i = 0; i < n_end_of_step_host; i++)
if (update->ntimestep % end_of_step_every[i] == 0) do_thisstep=1;
if(do_thisstep)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_end_of_step_host; i++)
if (update->ntimestep % end_of_step_every[i] == 0)
fix[list_end_of_step[i]]->end_of_step();
cuda->uploadAll(); cuda->oncpu = false;
}
}
}
/* ----------------------------------------------------------------------
thermo energy call, only for relevant fixes
called by Thermo class
compute_scalar() is fix call to return energy
------------------------------------------------------------------------- */
double ModifyCuda::thermo_energy()
{
double energy = 0.0;
for (int i = 0; i < n_thermo_energy_cuda; i++)
energy += fix[list_thermo_energy_cuda[i]]->compute_scalar();
if(n_thermo_energy_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_thermo_energy_host; i++)
energy += fix[list_thermo_energy[i]]->compute_scalar();
cuda->uploadAll(); cuda->oncpu = false;
}
return energy;
}
void ModifyCuda::list_init_end_of_step_cuda(int mask, int &n, int *&list)
{
delete [] list;
delete [] end_of_step_every_cuda;
n = 0;
for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++;
list = new int[n];
end_of_step_every_cuda = new int[n];
n = 0;
for (int i = 0; i < nfix; i++)
if (fmask[i] & mask) {
list[n] = i;
end_of_step_every_cuda[n++] = fix[i]->nevery;
}
}

Event Timeline