Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F121358836
pair_gpu_ans.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Jul 10, 06:52
Size
9 KB
Mime Type
text/x-c++
Expires
Sat, Jul 12, 06:52 (2 d)
Engine
blob
Format
Raw Data
Handle
27314950
Attached To
rLAMMPS lammps
pair_gpu_ans.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#include "pair_gpu_ans.h"
#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
template
<
class
numtyp
,
class
acctyp
>
PairGPUAnsT
::
PairGPUAns
()
:
_allocated
(
false
),
_eflag
(
false
),
_vflag
(
false
),
_inum
(
0
),
_ilist
(
NULL
),
_newton
(
false
)
{
}
template
<
class
numtyp
,
class
acctyp
>
int
PairGPUAnsT
::
bytes_per_atom
()
const
{
int
bytes
=
11
*
sizeof
(
acctyp
);
if
(
_rot
)
bytes
+=
4
*
sizeof
(
acctyp
);
if
(
_charge
)
bytes
+=
sizeof
(
acctyp
);
return
bytes
;
}
template
<
class
numtyp
,
class
acctyp
>
bool
PairGPUAnsT
::
alloc
(
const
int
inum
)
{
_max_local
=
static_cast
<
int
>
(
static_cast
<
double
>
(
inum
)
*
1.10
);
bool
success
=
true
;
int
ans_elements
=
4
;
if
(
_rot
)
ans_elements
+=
4
;
// Ignore host/device transfers?
bool
cpuview
=
false
;
if
(
dev
->
device_type
()
==
UCL_CPU
)
cpuview
=
true
;
// -------------------------- Host allocations
success
=
success
&&
(
host_ans
.
alloc
(
ans_elements
*
_max_local
,
*
dev
)
==
UCL_SUCCESS
);
success
=
success
&&
(
host_engv
.
alloc
(
_ev_fields
*
_max_local
,
*
dev
)
==
UCL_SUCCESS
);
// --------------------------- Device allocations
if
(
cpuview
)
{
dev_engv
.
view
(
host_engv
);
dev_ans
.
view
(
host_ans
);
}
else
{
success
=
success
&&
(
dev_engv
.
alloc
(
_ev_fields
*
_max_local
,
*
dev
,
UCL_WRITE_ONLY
)
==
UCL_SUCCESS
);
success
=
success
&&
(
dev_ans
.
alloc
(
ans_elements
*
_max_local
,
*
dev
,
UCL_WRITE_ONLY
)
==
UCL_SUCCESS
);
}
_gpu_bytes
=
dev_engv
.
row_bytes
()
+
dev_ans
.
row_bytes
();
_allocated
=
true
;
return
success
;
}
template
<
class
numtyp
,
class
acctyp
>
bool
PairGPUAnsT
::
init
(
const
int
inum
,
const
bool
charge
,
const
bool
rot
,
UCL_Device
&
devi
)
{
clear
();
bool
success
=
true
;
_charge
=
charge
;
_rot
=
rot
;
_other
=
_charge
||
_rot
;
dev
=&
devi
;
_e_fields
=
1
;
if
(
_charge
)
_e_fields
++
;
_ev_fields
=
6
+
_e_fields
;
// Initialize atom and nbor data
int
ef_inum
=
inum
;
if
(
ef_inum
==
0
)
ef_inum
=
1000
;
// Initialize timers for the selected device
time_answer
.
init
(
*
dev
);
time_answer
.
zero
();
_time_cast
=
0.0
;
_time_cpu_idle
=
0.0
;
return
success
&&
alloc
(
ef_inum
);
}
template
<
class
numtyp
,
class
acctyp
>
bool
PairGPUAnsT
::
add_fields
(
const
bool
charge
,
const
bool
rot
)
{
bool
realloc
=
false
;
if
(
charge
&&
_charge
==
false
)
{
_charge
=
true
;
_e_fields
++
;
_ev_fields
++
;
realloc
=
true
;
}
if
(
rot
&&
_rot
==
false
)
{
_rot
=
true
;
realloc
=
true
;
}
if
(
realloc
)
{
_other
=
_charge
||
_rot
;
int
inum
=
_max_local
;
clear_resize
();
return
alloc
(
inum
);
}
return
true
;
}
template
<
class
numtyp
,
class
acctyp
>
void
PairGPUAnsT
::
clear_resize
()
{
if
(
!
_allocated
)
return
;
_allocated
=
false
;
dev_ans
.
clear
();
dev_engv
.
clear
();
host_ans
.
clear
();
host_engv
.
clear
();
}
template
<
class
numtyp
,
class
acctyp
>
void
PairGPUAnsT
::
clear
()
{
_gpu_bytes
=
0
;
if
(
!
_allocated
)
return
;
time_answer
.
clear
();
clear_resize
();
_inum
=
0
;
_ilist
=
NULL
;
_eflag
=
false
;
_vflag
=
false
;
}
template
<
class
numtyp
,
class
acctyp
>
double
PairGPUAnsT
::
host_memory_usage
()
const
{
int
atom_bytes
=
4
;
if
(
_charge
)
atom_bytes
+=
1
;
if
(
_rot
)
atom_bytes
+=
4
;
int
ans_bytes
=
atom_bytes
+
_ev_fields
;
return
ans_bytes
*
(
_max_local
)
*
sizeof
(
acctyp
)
+
sizeof
(
PairGPUAns
<
numtyp
,
acctyp
>
);
}
template
<
class
numtyp
,
class
acctyp
>
void
PairGPUAnsT
::
copy_answers
(
const
bool
eflag
,
const
bool
vflag
,
const
bool
ef_atom
,
const
bool
vf_atom
)
{
time_answer
.
start
();
_eflag
=
eflag
;
_vflag
=
vflag
;
_ef_atom
=
ef_atom
;
_vf_atom
=
vf_atom
;
int
csize
=
_ev_fields
;
if
(
!
eflag
)
csize
-=
_e_fields
;
if
(
!
vflag
)
csize
-=
6
;
if
(
csize
>
0
)
ucl_copy
(
host_engv
,
dev_engv
,
_inum
*
csize
,
true
);
if
(
_rot
)
ucl_copy
(
host_ans
,
dev_ans
,
_inum
*
4
*
2
,
true
);
else
ucl_copy
(
host_ans
,
dev_ans
,
_inum
*
4
,
true
);
time_answer
.
stop
();
}
template
<
class
numtyp
,
class
acctyp
>
void
PairGPUAnsT
::
copy_answers
(
const
bool
eflag
,
const
bool
vflag
,
const
bool
ef_atom
,
const
bool
vf_atom
,
int
*
ilist
)
{
_ilist
=
ilist
;
copy_answers
(
eflag
,
vflag
,
ef_atom
,
vf_atom
);
}
template
<
class
numtyp
,
class
acctyp
>
double
PairGPUAnsT
::
energy_virial
(
double
*
eatom
,
double
**
vatom
,
double
*
virial
)
{
if
(
_eflag
==
false
&&
_vflag
==
false
)
return
0.0
;
double
evdwl
=
0.0
;
double
virial_acc
[
6
];
for
(
int
i
=
0
;
i
<
6
;
i
++
)
virial_acc
[
i
]
=
0.0
;
if
(
_ilist
==
NULL
)
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
acctyp
*
ap
=
host_engv
.
begin
()
+
i
;
if
(
_eflag
)
{
if
(
_ef_atom
)
{
evdwl
+=*
ap
;
eatom
[
i
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
}
else
{
evdwl
+=*
ap
;
ap
+=
_inum
;
}
}
if
(
_vflag
)
{
if
(
_vf_atom
)
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
vatom
[
i
][
j
]
+=*
ap
*
0.5
;
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
else
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
}
}
for
(
int
j
=
0
;
j
<
6
;
j
++
)
virial
[
j
]
+=
virial_acc
[
j
]
*
0.5
;
}
else
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
acctyp
*
ap
=
host_engv
.
begin
()
+
i
;
int
ii
=
_ilist
[
i
];
if
(
_eflag
)
{
if
(
_ef_atom
)
{
evdwl
+=*
ap
;
eatom
[
ii
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
}
else
{
evdwl
+=*
ap
;
ap
+=
_inum
;
}
}
if
(
_vflag
)
{
if
(
_vf_atom
)
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
vatom
[
ii
][
j
]
+=*
ap
*
0.5
;
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
else
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
}
}
for
(
int
j
=
0
;
j
<
6
;
j
++
)
virial
[
j
]
+=
virial_acc
[
j
]
*
0.5
;
}
evdwl
*=
0.5
;
return
evdwl
;
}
template
<
class
numtyp
,
class
acctyp
>
double
PairGPUAnsT
::
energy_virial
(
double
*
eatom
,
double
**
vatom
,
double
*
virial
,
double
&
ecoul
)
{
if
(
_eflag
==
false
&&
_vflag
==
false
)
return
0.0
;
if
(
_charge
==
false
)
return
energy_virial
(
eatom
,
vatom
,
virial
);
double
evdwl
=
0.0
;
double
_ecoul
=
0.0
;
double
virial_acc
[
6
];
for
(
int
i
=
0
;
i
<
6
;
i
++
)
virial_acc
[
i
]
=
0.0
;
if
(
_ilist
==
NULL
)
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
acctyp
*
ap
=
host_engv
.
begin
()
+
i
;
if
(
_eflag
)
{
if
(
_ef_atom
)
{
evdwl
+=*
ap
;
eatom
[
i
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
_ecoul
+=*
ap
;
eatom
[
i
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
}
else
{
evdwl
+=*
ap
;
ap
+=
_inum
;
_ecoul
+=*
ap
;
ap
+=
_inum
;
}
}
if
(
_vflag
)
{
if
(
_vf_atom
)
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
vatom
[
i
][
j
]
+=*
ap
*
0.5
;
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
else
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
}
}
for
(
int
j
=
0
;
j
<
6
;
j
++
)
virial
[
j
]
+=
virial_acc
[
j
]
*
0.5
;
}
else
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
acctyp
*
ap
=
host_engv
.
begin
()
+
i
;
int
ii
=
_ilist
[
i
];
if
(
_eflag
)
{
if
(
_ef_atom
)
{
evdwl
+=*
ap
;
eatom
[
ii
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
_ecoul
+=*
ap
;
eatom
[
ii
]
+=*
ap
*
0.5
;
ap
+=
_inum
;
}
else
{
evdwl
+=*
ap
;
ap
+=
_inum
;
_ecoul
+=*
ap
;
ap
+=
_inum
;
}
}
if
(
_vflag
)
{
if
(
_vf_atom
)
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
vatom
[
ii
][
j
]
+=*
ap
*
0.5
;
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
else
{
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
virial_acc
[
j
]
+=*
ap
;
ap
+=
_inum
;
}
}
}
}
for
(
int
j
=
0
;
j
<
6
;
j
++
)
virial
[
j
]
+=
virial_acc
[
j
]
*
0.5
;
}
evdwl
*=
0.5
;
ecoul
+=
_ecoul
*
0.5
;
return
evdwl
;
}
template
<
class
numtyp
,
class
acctyp
>
void
PairGPUAnsT
::
get_answers
(
double
**
f
,
double
**
tor
)
{
acctyp
*
ap
=
host_ans
.
begin
();
if
(
_ilist
==
NULL
)
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
f
[
i
][
0
]
+=*
ap
;
ap
++
;
f
[
i
][
1
]
+=*
ap
;
ap
++
;
f
[
i
][
2
]
+=*
ap
;
ap
+=
2
;
}
if
(
_rot
)
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
tor
[
i
][
0
]
+=*
ap
;
ap
++
;
tor
[
i
][
1
]
+=*
ap
;
ap
++
;
tor
[
i
][
2
]
+=*
ap
;
ap
+=
2
;
}
}
}
else
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
int
ii
=
_ilist
[
i
];
f
[
ii
][
0
]
+=*
ap
;
ap
++
;
f
[
ii
][
1
]
+=*
ap
;
ap
++
;
f
[
ii
][
2
]
+=*
ap
;
ap
+=
2
;
}
if
(
_rot
)
{
for
(
int
i
=
0
;
i
<
_inum
;
i
++
)
{
int
ii
=
_ilist
[
i
];
tor
[
ii
][
0
]
+=*
ap
;
ap
++
;
tor
[
ii
][
1
]
+=*
ap
;
ap
++
;
tor
[
ii
][
2
]
+=*
ap
;
ap
+=
2
;
}
}
}
}
template
class
PairGPUAns
<
PRECISION
,
ACC_PRECISION
>
;
Event Timeline
Log In to Comment