Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F68961997
pair_gpu_ans.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Jun 29, 16:30
Size
5 KB
Mime Type
text/x-c++
Expires
Mon, Jul 1, 16:30 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
18447053
Attached To
rLAMMPS lammps
pair_gpu_ans.h
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifndef PAIR_GPU_ANS_H
#define PAIR_GPU_ANS_H
#include <math.h>
#include "mpi.h"
#ifdef USE_OPENCL
#include "geryon/ocl_timer.h"
#include "geryon/ocl_mat.h"
using
namespace
ucl_opencl
;
#else
#include "geryon/nvd_timer.h"
#include "geryon/nvd_mat.h"
using
namespace
ucl_cudadr
;
#endif
#include "pair_gpu_precision.h"
template
<
class
numtyp
,
class
acctyp
>
class
PairGPUAns
{
public:
PairGPUAns
();
~
PairGPUAns
()
{
clear
();
}
/// Current number of local atoms stored
inline
int
inum
()
const
{
return
_inum
;
}
/// Set number of local atoms for future copy operations
inline
void
inum
(
const
int
n
)
{
_inum
=
n
;
}
/// Memory usage per atom in this class
int
bytes_per_atom
()
const
;
/// Clear any previous data and set up for a new LAMMPS run
/** \param rot True if atom storage needs quaternions
* \param gpu_nbor True if neighboring will be performed on device **/
bool
init
(
const
int
inum
,
const
bool
charge
,
const
bool
rot
,
UCL_Device
&
dev
);
/// Check if we have enough device storage and realloc if not
inline
void
resize
(
const
int
inum
,
bool
&
success
)
{
_inum
=
inum
;
if
(
inum
>
_max_local
)
{
clear_resize
();
success
=
success
&&
alloc
(
inum
);
}
}
/// If already initialized by another LAMMPS style, add fields as necessary
/** \param rot True if atom storage needs quaternions
* \param gpu_nbor True if neighboring will be performed on device **/
bool
add_fields
(
const
bool
charge
,
const
bool
rot
);
/// Free all memory on host and device needed to realloc for more atoms
void
clear_resize
();
/// Free all memory on host and device
void
clear
();
/// Return the total amount of host memory used by class in bytes
double
host_memory_usage
()
const
;
/// Add copy times to timers
inline
void
acc_timers
()
{
time_answer
.
add_to_total
();
}
/// Add copy times to timers
inline
void
zero_timers
()
{
time_answer
.
zero
();
}
/// Return the total time for host/device data transfer
inline
double
transfer_time
()
{
return
time_answer
.
total_seconds
();
}
/// Return the total time for data cast/pack
inline
double
cast_time
()
{
return
_time_cast
;
}
/// Return number of bytes used on device
inline
double
gpu_bytes
()
{
return
_gpu_bytes
;
}
// -------------------------COPY FROM GPU -------------------------------
/// Copy answers from device into read buffer asynchronously
void
copy_answers
(
const
bool
eflag
,
const
bool
vflag
,
const
bool
ef_atom
,
const
bool
vf_atom
);
/// Copy answers from device into read buffer asynchronously
void
copy_answers
(
const
bool
eflag
,
const
bool
vflag
,
const
bool
ef_atom
,
const
bool
vf_atom
,
int
*
ilist
);
/// Copy energy and virial data into LAMMPS memory
double
energy_virial
(
double
*
eatom
,
double
**
vatom
,
double
*
virial
);
/// Copy energy and virial data into LAMMPS memory
double
energy_virial
(
double
*
eatom
,
double
**
vatom
,
double
*
virial
,
double
&
ecoul
);
/// Add forces and torques from the GPU into a LAMMPS pointer
void
get_answers
(
double
**
f
,
double
**
tor
);
inline
double
get_answers
(
double
**
f
,
double
**
tor
,
double
*
eatom
,
double
**
vatom
,
double
*
virial
,
double
&
ecoul
)
{
double
ta
=
MPI_Wtime
();
time_answer
.
sync_stop
();
_time_cpu_idle
+=
MPI_Wtime
()
-
ta
;
double
ts
=
MPI_Wtime
();
double
evdw
=
energy_virial
(
eatom
,
vatom
,
virial
,
ecoul
);
get_answers
(
f
,
tor
);
_time_cast
+=
MPI_Wtime
()
-
ts
;
return
evdw
;
}
/// Return the time the CPU was idle waiting for GPU
inline
double
cpu_idle_time
()
{
return
_time_cpu_idle
;
}
// ------------------------------ DATA ----------------------------------
/// Force and possibly torque
UCL_D_Vec
<
acctyp
>
dev_ans
;
/// Energy and virial per-atom storage
UCL_D_Vec
<
acctyp
>
dev_engv
;
/// Force and possibly torque data on host
UCL_H_Vec
<
acctyp
>
host_ans
;
/// Energy/virial data on host
UCL_H_Vec
<
acctyp
>
host_engv
;
/// Device timers
UCL_Timer
time_answer
;
/// Geryon device
UCL_Device
*
dev
;
private:
bool
alloc
(
const
int
inum
);
bool
_allocated
,
_eflag
,
_vflag
,
_ef_atom
,
_vf_atom
,
_rot
,
_charge
,
_other
;
int
_max_local
,
_inum
,
_e_fields
,
_ev_fields
;
int
*
_ilist
;
double
_time_cast
,
_time_cpu_idle
;
double
_gpu_bytes
;
bool
_newton
;
};
#endif
Event Timeline
Log In to Comment