Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91066824
modify_cuda.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Nov 7, 12:47
Size
13 KB
Mime Type
text/x-c
Expires
Sat, Nov 9, 12:47 (2 d)
Engine
blob
Format
Raw Data
Handle
22066146
Attached To
rLAMMPS lammps
modify_cuda.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <cstdio>
#include <cstring>
#include "modify_cuda.h"
#include "style_compute.h"
#include "style_fix.h"
#include "atom.h"
#include "comm.h"
#include "fix.h"
#include "compute.h"
#include "group.h"
#include "update.h"
#include "domain.h"
#include "cuda.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define DELTA 4
// mask settings - same as in fix.cpp
#define INITIAL_INTEGRATE 1
#define POST_INTEGRATE 2
#define PRE_EXCHANGE 4
#define PRE_NEIGHBOR 8
#define PRE_FORCE 16
#define POST_FORCE 32
#define FINAL_INTEGRATE 64
#define END_OF_STEP 128
#define THERMO_ENERGY 256
#define INITIAL_INTEGRATE_RESPA 512
#define POST_INTEGRATE_RESPA 1024
#define PRE_FORCE_RESPA 2048
#define POST_FORCE_RESPA 4096
#define FINAL_INTEGRATE_RESPA 8192
#define MIN_PRE_EXCHANGE 16384
#define MIN_POST_FORCE 32768
#define MIN_ENERGY 65536
#include "cuda_modify_flags.h"
#define BIG 1.0e20
/* ---------------------------------------------------------------------- */
ModifyCuda::ModifyCuda(LAMMPS *lmp) : Modify(lmp)
{
cuda = lmp->cuda;
if(cuda == NULL)
error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
n_initial_integrate_cuda = 0;
n_post_integrate_cuda = 0;
n_pre_exchange = 0;
n_pre_neighbor_cuda = 0;
n_pre_force_cuda = 0;
n_post_force_cuda = 0;
n_final_integrate_cuda = 0;
n_end_of_step_cuda = 0;
n_thermo_energy_cuda = 0;
n_initial_integrate_host = 0;
n_post_integrate_host = 0;
n_pre_exchange = 0;
n_pre_neighbor_host = 0;
n_pre_force_host = 0;
n_post_force_host = 0;
n_final_integrate_host = 0;
n_end_of_step_host = 0;
n_thermo_energy_host = 0;
list_initial_integrate_cuda = NULL;
list_post_integrate_cuda = NULL;
list_pre_exchange_cuda = NULL;
list_pre_neighbor_cuda = NULL;
list_pre_force_cuda = NULL;
list_post_force_cuda = NULL;
list_final_integrate_cuda = NULL;
list_end_of_step_cuda = NULL;
list_thermo_energy_cuda = NULL;
end_of_step_every_cuda = NULL;
}
/* ---------------------------------------------------------------------- */
ModifyCuda::~ModifyCuda()
{
delete [] list_initial_integrate_cuda;
delete [] list_post_integrate_cuda;
delete [] list_pre_exchange_cuda;
delete [] list_pre_neighbor_cuda;
delete [] list_pre_force_cuda;
delete [] list_post_force_cuda;
delete [] list_final_integrate_cuda;
delete [] list_end_of_step_cuda;
delete [] list_thermo_energy_cuda;
delete [] end_of_step_every_cuda;
}
/* ----------------------------------------------------------------------
initialize all fixes and computes
------------------------------------------------------------------------- */
void ModifyCuda::init()
{
int i,j;
// delete storage of restart info since it is not valid after 1st run
restart_deallocate();
// create lists of fixes to call at each stage of run
list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate);
list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate);
list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange);
list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor);
list_init(PRE_FORCE,n_pre_force,list_pre_force);
list_init(POST_FORCE,n_post_force,list_post_force);
list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate);
list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step);
list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy);
list_init(INITIAL_INTEGRATE_CUDA, n_initial_integrate_cuda, list_initial_integrate_cuda);
list_init(POST_INTEGRATE_CUDA, n_post_integrate_cuda, list_post_integrate_cuda);
list_init(PRE_EXCHANGE_CUDA, n_pre_exchange_cuda, list_pre_exchange_cuda);
list_init(PRE_NEIGHBOR_CUDA, n_pre_neighbor_cuda, list_pre_neighbor_cuda);
list_init(PRE_FORCE_CUDA, n_pre_force_cuda, list_pre_force_cuda);
list_init(POST_FORCE_CUDA, n_post_force_cuda, list_post_force_cuda);
list_init(FINAL_INTEGRATE_CUDA, n_final_integrate_cuda, list_final_integrate_cuda);
list_init_end_of_step_cuda(END_OF_STEP_CUDA, n_end_of_step_cuda, list_end_of_step_cuda);
list_init_thermo_energy(THERMO_ENERGY_CUDA, n_thermo_energy_cuda, list_thermo_energy_cuda);
n_initial_integrate_host = n_initial_integrate;
n_post_integrate_host = n_post_integrate;
n_pre_exchange_host = n_pre_exchange;
n_pre_neighbor_host = n_pre_neighbor;
n_pre_force_host = n_pre_force;
n_post_force_host = n_post_force;
n_final_integrate_host = n_final_integrate;
n_end_of_step_host = n_end_of_step;
n_thermo_energy_host = n_thermo_energy;
n_initial_integrate = n_initial_integrate_cuda+n_initial_integrate_host;
n_post_integrate = n_post_integrate_cuda+n_post_integrate_host;
n_pre_exchange = n_pre_exchange_cuda+n_pre_exchange_host;
n_pre_neighbor = n_pre_neighbor_cuda+n_pre_neighbor_host;
n_pre_force = n_pre_force_cuda+n_pre_force_host;
n_post_force = n_post_force_cuda+n_post_force_host;
n_final_integrate = n_final_integrate_cuda+n_final_integrate_host;
n_end_of_step = n_end_of_step_cuda+n_end_of_step_host;
n_thermo_energy = n_thermo_energy_cuda+n_thermo_energy_host;
list_init(INITIAL_INTEGRATE_RESPA,
n_initial_integrate_respa,list_initial_integrate_respa);
list_init(POST_INTEGRATE_RESPA,
n_post_integrate_respa,list_post_integrate_respa);
list_init(POST_FORCE_RESPA,
n_post_force_respa,list_post_force_respa);
list_init(PRE_FORCE_RESPA,
n_pre_force_respa,list_pre_force_respa);
list_init(FINAL_INTEGRATE_RESPA,
n_final_integrate_respa,list_final_integrate_respa);
list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange);
list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force);
list_init(MIN_ENERGY,n_min_energy,list_min_energy);
// init each fix
// needs to come before compute init
// this is b/c some computes call fix->dof()
// FixRigid::dof() depends on its own init having been called
for (i = 0; i < nfix; i++) fix[i]->init();
// set global flag if any fix has its restart_pbc flag set
restart_pbc_any = 0;
for (i = 0; i < nfix; i++)
if (fix[i]->restart_pbc) restart_pbc_any = 1;
// create list of computes that store invocation times
list_init_compute();
// init each compute
// set invoked_scalar,vector,etc to -1 to force new run to re-compute them
// add initial timestep to all computes that store invocation times
// since any of them may be invoked by initial thermo
// do not clear out invocation times stored within a compute,
// b/c some may be holdovers from previous run, like for ave fixes
for (i = 0; i < ncompute; i++) {
compute[i]->init();
compute[i]->invoked_scalar = -1;
compute[i]->invoked_vector = -1;
compute[i]->invoked_array = -1;
compute[i]->invoked_peratom = -1;
compute[i]->invoked_local = -1;
}
addstep_compute_all(update->ntimestep);
// warn if any particle is time integrated more than once
int nlocal = atom->nlocal;
int *mask = atom->mask;
int *flag = new int[nlocal];
for (i = 0; i < nlocal; i++) flag[i] = 0;
int groupbit;
for (i = 0; i < nfix; i++) {
if (fix[i]->time_integrate == 0) continue;
groupbit = fix[i]->groupbit;
for (j = 0; j < nlocal; j++)
if (mask[j] & groupbit) flag[j]++;
}
int check = 0;
for (i = 0; i < nlocal; i++)
if (flag[i] > 1) check = 1;
delete [] flag;
int checkall;
MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world);
if (comm->me == 0 && checkall)
error->warning(FLERR,"One or more atoms are time integrated more than once");
}
/* ----------------------------------------------------------------------
1st half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::initial_integrate(int vflag)
{
for(int i = 0; i < n_initial_integrate_cuda; i++)
fix[list_initial_integrate_cuda[i]]->initial_integrate(vflag);
if(n_initial_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_initial_integrate_host; i++)
fix[list_initial_integrate[i]]->initial_integrate(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
post_integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::post_integrate()
{
for(int i = 0; i < n_post_integrate_cuda; i++)
fix[list_post_integrate_cuda[i]]->post_integrate();
if(n_post_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_post_integrate_host; i++)
fix[list_post_integrate[i]]->post_integrate();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_exchange call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_exchange()
{
for(int i = 0; i < n_pre_exchange_cuda; i++)
fix[list_pre_exchange_cuda[i]]->pre_exchange();
if(n_pre_exchange_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_exchange_host; i++)
fix[list_pre_exchange[i]]->pre_exchange();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_neighbor call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_neighbor()
{
for(int i = 0; i < n_pre_neighbor_cuda; i++)
fix[list_pre_neighbor_cuda[i]]->pre_neighbor();
if(n_pre_neighbor_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_neighbor_host; i++)
fix[list_pre_neighbor[i]]->pre_neighbor();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
pre_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::pre_force(int vflag)
{
for(int i = 0; i < n_pre_force_cuda; i++)
fix[list_pre_force_cuda[i]]->pre_force(vflag);
if(n_pre_force_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_pre_force_host; i++)
fix[list_pre_force[i]]->pre_force(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
post_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::post_force(int vflag)
{
for(int i = 0; i < n_post_force_cuda; i++)
fix[list_post_force_cuda[i]]->post_force(vflag);
if(n_post_force_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_post_force_host; i++)
fix[list_post_force[i]]->post_force(vflag);
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
2nd half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyCuda::final_integrate()
{
for (int i = 0; i < n_final_integrate_cuda; i++)
fix[list_final_integrate_cuda[i]]->final_integrate();
if(n_final_integrate_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_final_integrate_host; i++)
fix[list_final_integrate[i]]->final_integrate();
cuda->uploadAll(); cuda->oncpu = false;
}
}
/* ----------------------------------------------------------------------
end-of-timestep call, only for relevant fixes
only call fix->end_of_step() on timesteps that are multiples of nevery
------------------------------------------------------------------------- */
void ModifyCuda::end_of_step()
{
for (int i = 0; i < n_end_of_step_cuda; i++)
if (update->ntimestep % end_of_step_every_cuda[i] == 0)
fix[list_end_of_step_cuda[i]]->end_of_step();
if(n_end_of_step_host != 0)
{
int do_thisstep=0;
for (int i = 0; i < n_end_of_step_host; i++)
if (update->ntimestep % end_of_step_every[i] == 0) do_thisstep=1;
if(do_thisstep)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_end_of_step_host; i++)
if (update->ntimestep % end_of_step_every[i] == 0)
fix[list_end_of_step[i]]->end_of_step();
cuda->uploadAll(); cuda->oncpu = false;
}
}
}
/* ----------------------------------------------------------------------
thermo energy call, only for relevant fixes
called by Thermo class
compute_scalar() is fix call to return energy
------------------------------------------------------------------------- */
double ModifyCuda::thermo_energy()
{
double energy = 0.0;
for (int i = 0; i < n_thermo_energy_cuda; i++)
energy += fix[list_thermo_energy_cuda[i]]->compute_scalar();
if(n_thermo_energy_host != 0)
{
cuda->downloadAll(); cuda->oncpu = true;
for (int i = 0; i < n_thermo_energy_host; i++)
energy += fix[list_thermo_energy[i]]->compute_scalar();
cuda->uploadAll(); cuda->oncpu = false;
}
return energy;
}
void ModifyCuda::list_init_end_of_step_cuda(int mask, int &n, int *&list)
{
delete [] list;
delete [] end_of_step_every_cuda;
n = 0;
for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++;
list = new int[n];
end_of_step_every_cuda = new int[n];
n = 0;
for (int i = 0; i < nfix; i++)
if (fmask[i] & mask) {
list[n] = i;
end_of_step_every_cuda[n++] = fix[i]->nevery;
}
}
Event Timeline
Log In to Comment