Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91466372
fix_intel.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 11, 10:01
Size
16 KB
Mime Type
text/x-c++
Expires
Wed, Nov 13, 10:01 (2 d)
Engine
blob
Format
Raw Data
Handle
22267128
Attached To
rLAMMPS lammps
fix_intel.h
View Options
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle
(
INTEL
,
FixIntel
)
#else
#ifndef LMP_FIX_INTEL_H
#define LMP_FIX_INTEL_H
#include "fix.h"
#include "intel_buffers.h"
#include "force.h"
#include "pair.h"
#include "error.h"
#include "update.h"
namespace
LAMMPS_NS
{
class
IntelData
;
template
<
class
flt_t
,
class
acc_t
>
class
IntelBuffers
;
class
FixIntel
:
public
Fix
{
public:
FixIntel
(
class
LAMMPS
*
,
int
,
char
**
);
virtual
~
FixIntel
();
virtual
int
setmask
();
virtual
void
init
();
virtual
void
setup
(
int
);
void
pair_init_check
(
const
bool
cdmessage
=
false
);
void
bond_init_check
();
void
kspace_init_check
();
void
pre_reverse
(
int
eflag
=
0
,
int
vflag
=
0
);
// Get all forces, calculation results from coprocesser
void
sync_coprocessor
();
double
memory_usage
();
typedef
struct
{
double
x
,
y
,
z
;
}
lmp_ft
;
enum
{
PREC_MODE_SINGLE
,
PREC_MODE_MIXED
,
PREC_MODE_DOUBLE
};
inline
int
precision
()
{
return
_precision_mode
;
}
inline
IntelBuffers
<
float
,
float
>
*
get_single_buffers
()
{
return
_single_buffers
;
}
inline
IntelBuffers
<
float
,
double
>
*
get_mixed_buffers
()
{
return
_mixed_buffers
;
}
inline
IntelBuffers
<
double
,
double
>
*
get_double_buffers
()
{
return
_double_buffers
;
}
inline
int
nbor_pack_width
()
const
{
return
_nbor_pack_width
;
}
inline
void
nbor_pack_width
(
const
int
w
)
{
_nbor_pack_width
=
w
;
}
inline
int
need_zero
(
const
int
tid
)
{
if
(
_need_reduce
==
0
&&
tid
>
0
)
return
1
;
return
0
;
}
inline
void
set_reduce_flag
()
{
_need_reduce
=
1
;
}
inline
int
lrt
()
{
if
(
force
->
kspace_match
(
"pppm/intel"
,
0
))
return
_lrt
;
else
return
0
;
}
protected:
IntelBuffers
<
float
,
float
>
*
_single_buffers
;
IntelBuffers
<
float
,
double
>
*
_mixed_buffers
;
IntelBuffers
<
double
,
double
>
*
_double_buffers
;
int
_precision_mode
,
_nthreads
,
_nbor_pack_width
;
public:
inline
int
*
get_overflow_flag
()
{
return
_overflow_flag
;
}
inline
int
*
get_off_overflow_flag
()
{
return
_off_overflow_flag
;
}
inline
void
add_result_array
(
IntelBuffers
<
double
,
double
>::
vec3_acc_t
*
f_in
,
double
*
ev_in
,
const
int
offload
,
const
int
eatom
=
0
,
const
int
vatom
=
0
,
const
int
rflag
=
0
);
inline
void
add_result_array
(
IntelBuffers
<
float
,
double
>::
vec3_acc_t
*
f_in
,
double
*
ev_in
,
const
int
offload
,
const
int
eatom
=
0
,
const
int
vatom
=
0
,
const
int
rflag
=
0
);
inline
void
add_result_array
(
IntelBuffers
<
float
,
float
>::
vec3_acc_t
*
f_in
,
float
*
ev_in
,
const
int
offload
,
const
int
eatom
=
0
,
const
int
vatom
=
0
,
const
int
rflag
=
0
);
inline
void
get_buffern
(
const
int
offload
,
int
&
nlocal
,
int
&
nall
,
int
&
minlocal
);
#ifdef _LMP_INTEL_OFFLOAD
void
post_force
(
int
vflag
);
inline
int
coprocessor_number
()
{
return
_cop
;
}
inline
int
full_host_list
()
{
return
_full_host_list
;
}
void
set_offload_affinity
();
inline
double
offload_balance
()
{
return
_offload_balance
;
}
inline
int
offload_end_neighbor
();
inline
int
offload_end_pair
();
inline
int
host_start_neighbor
()
{
if
(
_offload_noghost
)
return
0
;
else
return
offload_end_neighbor
();
}
inline
int
host_start_pair
()
{
if
(
_offload_noghost
)
return
0
;
else
return
offload_end_pair
();
}
inline
int
offload_nlocal
()
{
return
_offload_nlocal
;
}
inline
int
offload_nall
()
{
return
_offload_nall
;
}
inline
int
offload_min_ghost
()
{
return
_offload_min_ghost
;
}
inline
int
host_min_local
()
{
return
_host_min_local
;
}
inline
int
host_min_ghost
()
{
return
_host_min_ghost
;
}
inline
int
host_used_local
()
{
return
_host_used_local
;
}
inline
int
host_used_ghost
()
{
return
_host_used_ghost
;
}
inline
int
host_nall
()
{
return
_host_nall
;
}
inline
int
separate_buffers
()
{
return
_separate_buffers
;
}
inline
int
offload_noghost
()
{
return
_offload_noghost
;
}
inline
void
set_offload_noghost
(
const
int
v
)
{
if
(
_offload_ghost
<
0
)
_offload_noghost
=
v
;
}
inline
void
set_neighbor_host_sizes
();
inline
void
zero_timers
()
{
memset
(
_timers
,
0
,
sizeof
(
double
)
*
NUM_ITIMERS
);
}
inline
void
start_watch
(
const
int
which
)
{
_stopwatch
[
which
]
=
MPI_Wtime
();
}
inline
double
stop_watch
(
const
int
which
);
inline
double
*
off_watch_pair
()
{
return
_stopwatch_offload_pair
;
}
inline
double
*
off_watch_neighbor
()
{
return
_stopwatch_offload_neighbor
;
}
inline
void
balance_stamp
();
inline
void
acc_timers
();
#else
inline
int
offload_end_neighbor
()
{
return
0
;
}
inline
int
offload_end_pair
()
{
return
0
;
}
inline
int
host_start_neighbor
()
{
return
0
;
}
inline
int
host_start_pair
()
{
return
0
;
}
inline
void
zero_timers
()
{}
inline
void
start_watch
(
const
int
which
)
{}
inline
double
stop_watch
(
const
int
which
)
{
return
0.0
;
}
double
*
off_watch_pair
()
{
return
NULL
;
}
double
*
off_watch_neighbor
()
{
return
NULL
;
}
inline
void
balance_stamp
()
{}
inline
void
acc_timers
()
{}
inline
int
separate_buffers
()
{
return
0
;
}
#endif
protected:
int
_overflow_flag
[
5
];
_alignvar
(
int
_off_overflow_flag
[
5
],
64
);
int
_allow_separate_buffers
,
_offload_ghost
,
_lrt
;
IntelBuffers
<
float
,
float
>::
vec3_acc_t
*
_force_array_s
;
IntelBuffers
<
float
,
double
>::
vec3_acc_t
*
_force_array_m
;
IntelBuffers
<
double
,
double
>::
vec3_acc_t
*
_force_array_d
;
float
*
_ev_array_s
;
double
*
_ev_array_d
;
int
_results_eatom
,
_results_vatom
;
int
_need_reduce
;
#ifdef _LMP_INTEL_OFFLOAD
double
_balance_pair_time
,
_balance_other_time
;
int
_offload_nlocal
,
_offload_nall
,
_offload_min_ghost
,
_offload_nghost
;
int
_host_min_local
,
_host_min_ghost
,
_host_nall
;
int
_host_used_local
,
_host_used_ghost
,
_sync_mode
;
int
_separate_buffers
,
_offload_noghost
,
_separate_coi
;
bool
_setup_time_cleared
,
_timers_allocated
;
void
output_timing_data
();
FILE
*
_tscreen
;
IntelBuffers
<
float
,
float
>::
vec3_acc_t
*
_off_force_array_s
;
IntelBuffers
<
float
,
double
>::
vec3_acc_t
*
_off_force_array_m
;
IntelBuffers
<
double
,
double
>::
vec3_acc_t
*
_off_force_array_d
;
float
*
_off_ev_array_s
;
double
*
_off_ev_array_d
;
int
_off_results_eatom
,
_off_results_vatom
;
int
_full_host_list
,
_cop
,
_ncops
;
int
get_ppn
(
int
&
);
int
set_host_affinity
(
const
int
);
#endif
void
check_neighbor_intel
();
double
_offload_balance
,
_balance_neighbor
,
_balance_pair
,
_balance_fixed
;
double
_timers
[
NUM_ITIMERS
];
double
_stopwatch
[
NUM_ITIMERS
];
_alignvar
(
double
_stopwatch_offload_neighbor
[
1
],
64
);
_alignvar
(
double
_stopwatch_offload_pair
[
1
],
64
);
template
<
class
ft
>
void
reduce_results
(
ft
*
_noalias
const
f_in
);
template
<
class
ft
,
class
acc_t
>
inline
void
add_results
(
const
ft
*
_noalias
const
f_in
,
const
acc_t
*
_noalias
const
ev_global
,
const
int
eatom
,
const
int
vatom
,
const
int
offload
);
template
<
class
ft
,
class
acc_t
>
inline
void
add_oresults
(
const
ft
*
_noalias
const
f_in
,
const
acc_t
*
_noalias
const
ev_global
,
const
int
eatom
,
const
int
vatom
,
const
int
out_offset
,
const
int
nall
);
int
_offload_affinity_balanced
,
_offload_threads
,
_offload_tpc
;
#ifdef _LMP_INTEL_OFFLOAD
int
_max_offload_threads
,
_offload_cores
,
_offload_affinity_set
;
int
_im_real_space_task
;
MPI_Comm
_real_space_comm
;
template
<
class
ft
,
class
acc_t
>
inline
void
add_off_results
(
const
ft
*
_noalias
const
f_in
,
const
acc_t
*
_noalias
const
ev_global
);
#endif
};
/* ---------------------------------------------------------------------- */
void
FixIntel
::
get_buffern
(
const
int
offload
,
int
&
nlocal
,
int
&
nall
,
int
&
minlocal
)
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
_separate_buffers
)
{
if
(
offload
)
{
if
(
neighbor
->
ago
!=
0
)
{
nlocal
=
_offload_nlocal
;
nall
=
_offload_nall
;
}
else
{
nlocal
=
atom
->
nlocal
;
nall
=
nlocal
+
atom
->
nghost
;
}
minlocal
=
0
;
}
else
{
nlocal
=
atom
->
nlocal
;
nall
=
_host_nall
;
minlocal
=
_host_min_local
;
}
return
;
}
if
(
_offload_noghost
&&
offload
)
nall
=
atom
->
nlocal
;
else
#endif
nall
=
atom
->
nlocal
+
atom
->
nghost
;
nlocal
=
atom
->
nlocal
;
minlocal
=
0
;
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
add_result_array
(
IntelBuffers
<
double
,
double
>::
vec3_acc_t
*
f_in
,
double
*
ev_in
,
const
int
offload
,
const
int
eatom
,
const
int
vatom
,
const
int
rflag
)
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload
)
{
_off_results_eatom
=
eatom
;
_off_results_vatom
=
vatom
;
_off_force_array_d
=
f_in
;
_off_ev_array_d
=
ev_in
;
return
;
}
#endif
_force_array_d
=
f_in
;
_ev_array_d
=
ev_in
;
_results_eatom
=
eatom
;
_results_vatom
=
vatom
;
#ifndef _LMP_INTEL_OFFLOAD
if
(
rflag
!=
2
&&
_nthreads
>
1
)
_need_reduce
=
1
;
#endif
if
(
_overflow_flag
[
LMP_OVERFLOW
])
error
->
one
(
FLERR
,
"Neighbor list overflow, boost neigh_modify one"
);
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
add_result_array
(
IntelBuffers
<
float
,
double
>::
vec3_acc_t
*
f_in
,
double
*
ev_in
,
const
int
offload
,
const
int
eatom
,
const
int
vatom
,
const
int
rflag
)
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload
)
{
_off_results_eatom
=
eatom
;
_off_results_vatom
=
vatom
;
_off_force_array_m
=
f_in
;
_off_ev_array_d
=
ev_in
;
return
;
}
#endif
_force_array_m
=
f_in
;
_ev_array_d
=
ev_in
;
_results_eatom
=
eatom
;
_results_vatom
=
vatom
;
#ifndef _LMP_INTEL_OFFLOAD
if
(
rflag
!=
2
&&
_nthreads
>
1
)
_need_reduce
=
1
;
#endif
if
(
_overflow_flag
[
LMP_OVERFLOW
])
error
->
one
(
FLERR
,
"Neighbor list overflow, boost neigh_modify one"
);
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
add_result_array
(
IntelBuffers
<
float
,
float
>::
vec3_acc_t
*
f_in
,
float
*
ev_in
,
const
int
offload
,
const
int
eatom
,
const
int
vatom
,
const
int
rflag
)
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload
)
{
_off_results_eatom
=
eatom
;
_off_results_vatom
=
vatom
;
_off_force_array_s
=
f_in
;
_off_ev_array_s
=
ev_in
;
return
;
}
#endif
_force_array_s
=
f_in
;
_ev_array_s
=
ev_in
;
_results_eatom
=
eatom
;
_results_vatom
=
vatom
;
#ifndef _LMP_INTEL_OFFLOAD
if
(
rflag
!=
2
&&
_nthreads
>
1
)
_need_reduce
=
1
;
#endif
if
(
_overflow_flag
[
LMP_OVERFLOW
])
error
->
one
(
FLERR
,
"Neighbor list overflow, boost neigh_modify one"
);
}
/* ---------------------------------------------------------------------- */
#ifdef _LMP_INTEL_OFFLOAD
/* ---------------------------------------------------------------------- */
int
FixIntel
::
offload_end_neighbor
()
{
if
(
_offload_balance
<
0.0
)
{
if
(
atom
->
nlocal
<
2
)
error
->
one
(
FLERR
,
"Too few atoms for load balancing offload"
);
double
granularity
=
1.0
/
atom
->
nlocal
;
if
(
_balance_neighbor
<
granularity
)
_balance_neighbor
=
granularity
+
1e-10
;
else
if
(
_balance_neighbor
>
1.0
-
granularity
)
_balance_neighbor
=
1.0
-
granularity
+
1e-10
;
}
return
_balance_neighbor
*
atom
->
nlocal
;
}
int
FixIntel
::
offload_end_pair
()
{
if
(
neighbor
->
ago
==
0
)
return
_balance_neighbor
*
atom
->
nlocal
;
else
return
_balance_pair
*
atom
->
nlocal
;
}
/* ---------------------------------------------------------------------- */
double
FixIntel
::
stop_watch
(
const
int
which
)
{
double
elapsed
=
MPI_Wtime
()
-
_stopwatch
[
which
];
_timers
[
which
]
+=
elapsed
;
return
elapsed
;
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
balance_stamp
()
{
if
(
_offload_balance
<
0.0
)
{
double
ct
=
MPI_Wtime
();
_balance_other_time
=
ct
;
_balance_pair_time
=
ct
-
_stopwatch
[
TIME_HOST_PAIR
];
}
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
acc_timers
()
{
_timers
[
TIME_OFFLOAD_PAIR
]
+=
*
_stopwatch_offload_pair
;
if
(
neighbor
->
ago
==
0
)
{
_timers
[
TIME_OFFLOAD_NEIGHBOR
]
+=
*
_stopwatch_offload_neighbor
;
if
(
_setup_time_cleared
==
false
)
{
zero_timers
();
_setup_time_cleared
=
true
;
}
}
}
/* ---------------------------------------------------------------------- */
void
FixIntel
::
set_neighbor_host_sizes
()
{
_host_min_local
=
_overflow_flag
[
LMP_LOCAL_MIN
];
_host_min_ghost
=
_overflow_flag
[
LMP_GHOST_MIN
];
_host_used_local
=
atom
->
nlocal
-
_host_min_local
;
_host_used_ghost
=
_overflow_flag
[
LMP_GHOST_MAX
]
+
1
-
_host_min_ghost
;
if
(
_host_used_ghost
<
0
)
_host_used_ghost
=
0
;
_host_nall
=
atom
->
nlocal
+
_host_used_ghost
;
}
/* ---------------------------------------------------------------------- */
#endif
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
W: Could not set host affinity for offload tasks
When using offload to a coprocessor, the application will try to set affinity
for host MPI tasks and OpenMP threads and will generate a warning if unable
to do so successfully. In the unsuccessful case, you might wish to set
affinity outside of the application and performance might suffer if
hyperthreading is disable on the CPU.
E: Neighbor list overflow, boost neigh_modify one
Increase the value for neigh_modify one to allow for larger allocations for
neighbor list builds. The value required can be different for the Intel
package in order to support offload to a coprocessor.
E: Bad matrix inversion in mldivide3
This error should not occur unless the matrix is badly formed.
E: Illegal package intel command
The format for the package intel command is incorrect. Please see the
documentation.
E: fix intel has to operate on group 'all'
Self explanatory.
E: Illegal package intel mode requested
The format for the package intel command is incorrect. Please see the
documentation.
E: Currently, neighbor style BIN must be used with Intel package.
This is the only neighbor style that has been implemented for the Intel
package.
E: Currently, cannot use neigh_modify exclude with Intel package.
This is a current restriction of the Intel package.
W: Unknown Intel Compiler Version
The compiler version used to build LAMMPS has not been tested with
offload to a coprocessor.
W: Unsupported Intel Compiler
The compiler version used to build LAMMPS is not supported when using
offload to a coprocessor. There could be performance or correctness
issues. Please use 14.0.1.106 or 15.1.133 or later.
E: Currently, cannot use more than one intel style with hybrid.
Currently, hybrid pair styles can only use the intel suffix for one of the
pair styles.
E: Cannot yet use hybrid styles with Intel package.
The hybrid pair style configuration is not yet supported by the Intel
package. Support is limited to hybrid/overlay or a hybrid style that does
not require a skip list.
W: Leaving a core/node free can improve performance for offload
When each CPU is fully subscribed with MPI tasks and OpenMP threads,
context switching with threads used for offload can sometimes decrease
performance. If you see this warning, try using fewer MPI tasks/OpenMP threads
per node to leave a physical CPU core free on each node.
E: MPI tasks per node must be multiple of offload_cards
For offload to multiple coprocessors on a single node, the Intel package
requires that each coprocessor is used by the same number of MPI tasks.
W: More MPI tasks/OpenMP threads than available cores
Using more MPI tasks/OpenMP threads than available cores will typically
decrease performance.
E: USER-INTEL package requires same setting for newton bond and non-bond.
The newton setting must be the same for both pairwise and bonded forces.
E: Intel styles for bond/angle/dihedral/improper require intel pair style."
You cannot use the USER-INTEL package for bond calculations without a
USER-INTEL supported pair style.
E: Intel styles for kspace require intel pair style.
You cannot use the USER-INTEL package for kspace calculations without a
USER-INTEL supported pair style.
E: Cannot currently get per-atom virials with intel package.
The Intel package does not yet support per-atom virial calculation.
E: Too few atoms for load balancing offload.
When using offload to a coprocessor, each MPI task must have at least 2
atoms throughout the simulation.
*/
Event Timeline
Log In to Comment