Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93932138
cuda_shared.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Dec 2, 14:52
Size
9 KB
Mime Type
text/x-c
Expires
Wed, Dec 4, 14:52 (1 d, 22 h)
Engine
blob
Format
Raw Data
Handle
22724156
Attached To
rLAMMPS lammps
cuda_shared.h
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifndef _CUDA_SHARED_H_
#define _CUDA_SHARED_H_
#include "cuda_precision.h"
#define CUDA_MAX_DEBUG_SIZE 1000
//size of debugdata array (allows for so many doubles or twice as many int)
struct
dev_array
{
void
*
dev_data
;
// pointer to memory address on cuda device
unsigned
dim
[
3
];
// array dimensions
};
struct
cuda_shared_atom
// relevent data from atom class
{
dev_array
dx
;
// cumulated distance for binning settings
dev_array
x
;
// position
dev_array
v
;
// velocity
dev_array
f
;
// force
dev_array
tag
;
dev_array
type
;
// global ID number, there are ghosttype = ntypes (ntypescuda=ntypes+1)
dev_array
mask
;
dev_array
image
;
dev_array
q
;
// charges
dev_array
mass
;
// per-type masses
dev_array
rmass
;
// per-atom masses
dev_array
radius
;
// per-atom radius
dev_array
density
;
dev_array
omega
;
dev_array
torque
;
dev_array
molecule
;
dev_array
special
;
int
maxspecial
;
dev_array
nspecial
;
int
*
special_flag
;
int
molecular
;
dev_array
eatom
;
// per-atom energy
dev_array
vatom
;
// per-atom virial
int
need_eatom
;
int
need_vatom
;
dev_array
x_type
;
// position + type in X_FLOAT4 struct
dev_array
v_radius
;
// velociyt + radius in V_FLOAT4 struct currently only used for granular atom_style
dev_array
omega_rmass
;
// velociyt + radius in V_FLOAT4 struct currently only used for granular atom_style
double
*
mass_host
;
// remember per-type host pointer to masses
//int natoms; // total # of atoms in system, could be 0
int
nghost
;
// and ghost atoms on this proc
int
nlocal
;
// # of owned
int
nall
;
// total # of atoms in this proc
int
nmax
;
// max # of owned+ghost in arrays on this proc
int
ntypes
;
int
q_flag
;
// do we have charges?
int
rmass_flag
;
// do we have per-atom masses?
int
firstgroup
;
int
nfirst
;
int
update_nlocal
;
int
update_nmax
;
dev_array
xhold
;
// position at last neighboring
X_FLOAT
triggerneighsq
;
// maximum square movement before reneighboring
int
reneigh_flag
;
// is reneighboring necessary
int
maxhold
;
// size of xhold
int
dist_check
;
//perform distance check for reneighboring
dev_array
binned_id
;
//id of each binned atom (not tag!!)
dev_array
binned_idnew
;
//new id of each binned atom for sorting basically setting atom[binned_id[k]] at atom[binned_newid[k]]
float
bin_extraspace
;
int
bin_dim
[
3
];
int
bin_nmax
;
dev_array
map_array
;
};
struct
cuda_shared_pair
// relevent data from pair class
{
char
cudable_force
;
// check for (cudable_force!=0)
X_FLOAT
cut_global
;
X_FLOAT
cut_inner_global
;
X_FLOAT
cut_coul_global
;
double
**
cut
;
// type-type cutoff
double
**
cutsq
;
// type-type cutoff
double
**
cut_inner
;
// type-type cutoff for coul
double
**
cut_coul
;
// type-type cutoff for coul
double
**
coeff1
;
// tpye-type pair parameters
double
**
coeff2
;
double
**
coeff3
;
double
**
coeff4
;
double
**
coeff5
;
double
**
coeff6
;
double
**
coeff7
;
double
**
coeff8
;
double
**
coeff9
;
double
**
coeff10
;
double
**
offset
;
double
*
special_lj
;
double
*
special_coul
;
dev_array
virial
;
// ENERGY_FLOAT
dev_array
eng_vdwl
;
// ENERGY_FLOAT
dev_array
eng_coul
;
// ENERGY_FLOAT
X_FLOAT
cut_coulsq_global
;
F_FLOAT
g_ewald
,
kappa
;
int
freeze_group_bit
;
dev_array
coeff1_gm
;
dev_array
coeff2_gm
;
dev_array
coeff3_gm
;
dev_array
coeff4_gm
;
dev_array
coeff5_gm
;
dev_array
coeff6_gm
;
dev_array
coeff7_gm
;
dev_array
coeff8_gm
;
dev_array
coeff9_gm
;
dev_array
coeff10_gm
;
int
lastgridsize
;
int
n_energy_virial
;
int
collect_forces_later
;
int
use_block_per_atom
;
int
override_block_per_atom
;
};
struct
cuda_shared_domain
// relevent data from domain class
{
X_FLOAT
sublo
[
3
];
// orthogonal box -> sub-box bounds on this proc
X_FLOAT
subhi
[
3
];
X_FLOAT
boxlo
[
3
];
X_FLOAT
boxhi
[
3
];
X_FLOAT
prd
[
3
];
int
periodicity
[
3
];
// xyz periodicity as array
int
triclinic
;
X_FLOAT
xy
;
X_FLOAT
xz
;
X_FLOAT
yz
;
X_FLOAT
boxlo_lamda
[
3
];
X_FLOAT
boxhi_lamda
[
3
];
X_FLOAT
prd_lamda
[
3
];
X_FLOAT
h
[
6
];
X_FLOAT
h_inv
[
6
];
V_FLOAT
h_rate
[
6
];
int
update
;
};
struct
cuda_shared_pppm
{
char
cudable_force
;
#ifdef FFT_CUFFT
FFT_FLOAT
*
work1
;
FFT_FLOAT
*
work2
;
FFT_FLOAT
*
work3
;
PPPM_FLOAT
*
greensfn
;
PPPM_FLOAT
*
fkx
;
PPPM_FLOAT
*
fky
;
PPPM_FLOAT
*
fkz
;
PPPM_FLOAT
*
vg
;
#endif
int
*
part2grid
;
PPPM_FLOAT
*
density_brick
;
int
*
density_brick_int
;
PPPM_FLOAT
density_intScale
;
PPPM_FLOAT
*
vdx_brick
;
PPPM_FLOAT
*
vdy_brick
;
PPPM_FLOAT
*
vdz_brick
;
PPPM_FLOAT
*
density_fft
;
ENERGY_FLOAT
*
energy
;
ENERGY_FLOAT
*
virial
;
int
nxlo_in
;
int
nxhi_in
;
int
nxlo_out
;
int
nxhi_out
;
int
nylo_in
;
int
nyhi_in
;
int
nylo_out
;
int
nyhi_out
;
int
nzlo_in
;
int
nzhi_in
;
int
nzlo_out
;
int
nzhi_out
;
int
nx_pppm
;
int
ny_pppm
;
int
nz_pppm
;
PPPM_FLOAT
qqrd2e
;
int
order
;
// float3 sublo;
PPPM_FLOAT
*
rho_coeff
;
int
nmax
;
int
nlocal
;
PPPM_FLOAT
*
debugdata
;
PPPM_FLOAT
delxinv
;
PPPM_FLOAT
delyinv
;
PPPM_FLOAT
delzinv
;
int
nlower
;
int
nupper
;
PPPM_FLOAT
shiftone
;
};
struct
cuda_shared_comm
{
int
maxswap
;
int
maxlistlength
;
dev_array
pbc
;
dev_array
slablo
;
dev_array
slabhi
;
dev_array
multilo
;
dev_array
multihi
;
dev_array
sendlist
;
int
grow_flag
;
int
comm_phase
;
int
nsend
;
int
*
nsend_swap
;
int
*
send_size
;
int
*
recv_size
;
double
**
buf_send
;
void
**
buf_send_dev
;
double
**
buf_recv
;
void
**
buf_recv_dev
;
void
*
buffer
;
int
buffer_size
;
double
overlap_split_ratio
;
};
struct
cuda_shared_neighlist
// member of CudaNeighList, has no instance in cuda_shared_data
{
int
maxlocal
;
int
inum
;
// # of I atoms neighbors are stored for local indices of I atoms
int
inum_border2
;
dev_array
inum_border
;
// # of atoms which interact with border atoms
dev_array
ilist
;
dev_array
ilist_border
;
dev_array
numneigh
;
dev_array
numneigh_inner
;
dev_array
numneigh_border
;
dev_array
firstneigh
;
dev_array
neighbors
;
dev_array
neighbors_border
;
dev_array
neighbors_inner
;
int
maxpage
;
dev_array
page_pointers
;
dev_array
*
pages
;
int
maxneighbors
;
int
neigh_lists_per_page
;
double
**
cutneighsq
;
CUDA_FLOAT
*
cu_cutneighsq
;
int
*
binned_id
;
int
*
bin_dim
;
int
bin_nmax
;
float
bin_extraspace
;
double
maxcut
;
dev_array
ex_type
;
int
nex_type
;
dev_array
ex1_bit
;
dev_array
ex2_bit
;
int
nex_group
;
dev_array
ex_mol_bit
;
int
nex_mol
;
};
struct
cuda_compile_settings
// this is used to compare compile settings (i.e. precision) of the cu files, and the cpp files
{
int
prec_glob
;
int
prec_x
;
int
prec_v
;
int
prec_f
;
int
prec_pppm
;
int
prec_fft
;
int
cufft
;
int
arch
;
};
struct
cuda_timings_struct
{
//Debug:
double
test1
;
double
test2
;
//transfers
double
transfer_upload_tmp_constr
;
double
transfer_download_tmp_deconstr
;
//communication
double
comm_forward_total
;
double
comm_forward_mpi_upper
;
double
comm_forward_mpi_lower
;
double
comm_forward_kernel_pack
;
double
comm_forward_kernel_unpack
;
double
comm_forward_kernel_self
;
double
comm_forward_upload
;
double
comm_forward_download
;
double
comm_exchange_total
;
double
comm_exchange_mpi
;
double
comm_exchange_kernel_pack
;
double
comm_exchange_kernel_unpack
;
double
comm_exchange_kernel_fill
;
double
comm_exchange_cpu_pack
;
double
comm_exchange_upload
;
double
comm_exchange_download
;
double
comm_border_total
;
double
comm_border_mpi
;
double
comm_border_kernel_pack
;
double
comm_border_kernel_unpack
;
double
comm_border_kernel_self
;
double
comm_border_kernel_buildlist
;
double
comm_border_upload
;
double
comm_border_download
;
//pair forces
double
pair_xtype_conversion
;
double
pair_kernel
;
double
pair_virial
;
double
pair_force_collection
;
//neighbor
double
neigh_bin
;
double
neigh_build
;
double
neigh_special
;
//PPPM
double
pppm_particle_map
;
double
pppm_make_rho
;
double
pppm_brick2fft
;
double
pppm_poisson
;
double
pppm_fillbrick
;
double
pppm_fieldforce
;
double
pppm_compute
;
};
struct
cuda_shared_data
// holds space for all relevent data from the different classes
{
void
*
buffer
;
//holds temporary GPU data [data used in subroutines, which has not to be consistend outside of that routine]
int
buffersize
;
//maxsize of buffer
int
buffer_new
;
//should be 1 if the pointer to buffer has changed
void
*
flag
;
void
*
debugdata
;
//array for easily collecting debugdata from device class cuda contains the corresponding cu_debugdata and host array
cuda_shared_atom
atom
;
cuda_shared_pair
pair
;
cuda_shared_domain
domain
;
cuda_shared_pppm
pppm
;
cuda_shared_comm
comm
;
cuda_compile_settings
compile_settings
;
cuda_timings_struct
cuda_timings
;
int
exchange_dim
;
int
me
;
//mpi rank
unsigned
int
datamask
;
int
overlap_comm
;
};
#endif
// #ifndef _CUDA_SHARED_H_
Event Timeline
Log In to Comment