Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91367546
finish.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 10, 10:03
Size
30 KB
Mime Type
text/x-c
Expires
Tue, Nov 12, 10:03 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22251353
Attached To
rLAMMPS lammps
finish.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "finish.h"
#include "timer.h"
#include "universe.h"
#include "accelerator_kokkos.h"
#include "accelerator_omp.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "update.h"
#include "min.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "output.h"
#include "memory.h"
#include "error.h"
#ifdef LMP_USER_OMP
#include "modify.h"
#include "fix_omp.h"
#include "thr_data.h"
#endif
using
namespace
LAMMPS_NS
;
// local function prototypes, code at end of file
static
void
mpi_timings
(
const
char
*
label
,
Timer
*
t
,
enum
Timer
::
ttype
tt
,
MPI_Comm
world
,
const
int
nprocs
,
const
int
nthreads
,
const
int
me
,
double
time_loop
,
FILE
*
scr
,
FILE
*
log
);
#ifdef LMP_USER_OMP
static
void
omp_times
(
FixOMP
*
fix
,
const
char
*
label
,
enum
Timer
::
ttype
which
,
const
int
nthreads
,
FILE
*
scr
,
FILE
*
log
);
#endif
/* ---------------------------------------------------------------------- */
Finish
::
Finish
(
LAMMPS
*
lmp
)
:
Pointers
(
lmp
)
{}
/* ---------------------------------------------------------------------- */
void
Finish
::
end
(
int
flag
)
{
int
i
,
m
,
nneigh
,
nneighfull
;
int
histo
[
10
];
int
minflag
,
prdflag
,
tadflag
,
timeflag
,
fftflag
,
histoflag
,
neighflag
;
double
time
,
tmp
,
ave
,
max
,
min
;
double
time_loop
,
time_other
,
cpu_loop
;
int
me
,
nprocs
;
MPI_Comm_rank
(
world
,
&
me
);
MPI_Comm_size
(
world
,
&
nprocs
);
const
int
nthreads
=
comm
->
nthreads
;
// recompute natoms in case atoms have been lost
bigint
nblocal
=
atom
->
nlocal
;
MPI_Allreduce
(
&
nblocal
,
&
atom
->
natoms
,
1
,
MPI_LMP_BIGINT
,
MPI_SUM
,
world
);
// choose flavors of statistical output
// flag determines caller
// flag = 0 = just loop summary
// flag = 1 = dynamics or minimization
// flag = 2 = PRD
// flag = 3 = TAD
// turn off neighflag for Kspace partition of verlet/split integrator
minflag
=
prdflag
=
tadflag
=
timeflag
=
fftflag
=
histoflag
=
neighflag
=
0
;
time_loop
=
cpu_loop
=
time_other
=
0.0
;
if
(
flag
==
1
)
{
if
(
update
->
whichflag
==
2
)
minflag
=
1
;
timeflag
=
histoflag
=
1
;
neighflag
=
1
;
if
(
update
->
whichflag
==
1
&&
strncmp
(
update
->
integrate_style
,
"verlet/split"
,
12
)
==
0
&&
universe
->
iworld
==
1
)
neighflag
=
0
;
if
(
force
->
kspace
&&
force
->
kspace_match
(
"pppm"
,
0
)
&&
force
->
kspace
->
fftbench
)
fftflag
=
1
;
}
if
(
flag
==
2
)
prdflag
=
timeflag
=
histoflag
=
neighflag
=
1
;
if
(
flag
==
3
)
tadflag
=
histoflag
=
neighflag
=
1
;
// loop stats
if
(
timer
->
has_loop
())
{
// overall loop time
time_loop
=
timer
->
get_wall
(
Timer
::
TOTAL
);
cpu_loop
=
timer
->
get_cpu
(
Timer
::
TOTAL
);
MPI_Allreduce
(
&
time_loop
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time_loop
=
tmp
/
nprocs
;
MPI_Allreduce
(
&
cpu_loop
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
cpu_loop
=
tmp
/
nprocs
;
if
(
time_loop
>
0.0
)
cpu_loop
=
cpu_loop
/
time_loop
*
100.0
;
if
(
me
==
0
)
{
int
ntasks
=
nprocs
*
nthreads
;
const
char
fmt1
[]
=
"Loop time of %g on %d procs "
"for %d steps with "
BIGINT_FORMAT
" atoms
\n\n
"
;
if
(
screen
)
fprintf
(
screen
,
fmt1
,
time_loop
,
ntasks
,
update
->
nsteps
,
atom
->
natoms
);
if
(
logfile
)
fprintf
(
logfile
,
fmt1
,
time_loop
,
ntasks
,
update
->
nsteps
,
atom
->
natoms
);
// Gromacs/NAMD-style performance metric for suitable unit settings
if
(
timeflag
&&
!
minflag
&&
!
prdflag
&&
!
tadflag
&&
(
update
->
nsteps
>
0
)
&&
(
update
->
dt
!=
0.0
)
&&
((
strcmp
(
update
->
unit_style
,
"lj"
)
==
0
)
||
(
strcmp
(
update
->
unit_style
,
"metal"
)
==
0
)
||
(
strcmp
(
update
->
unit_style
,
"micro"
)
==
0
)
||
(
strcmp
(
update
->
unit_style
,
"nano"
)
==
0
)
||
(
strcmp
(
update
->
unit_style
,
"electron"
)
==
0
)
||
(
strcmp
(
update
->
unit_style
,
"real"
)
==
0
))
)
{
double
one_fs
=
force
->
femtosecond
;
double
t_step
=
((
double
)
time_loop
)
/
((
double
)
update
->
nsteps
);
double
step_t
=
1.0
/
t_step
;
if
(
strcmp
(
update
->
unit_style
,
"lj"
)
==
0
)
{
double
tau_day
=
24.0
*
3600.0
/
t_step
*
update
->
dt
/
one_fs
;
const
char
perf
[]
=
"Performance: %.3f tau/day, %.3f timesteps/s
\n
"
;
if
(
screen
)
fprintf
(
screen
,
perf
,
tau_day
,
step_t
);
if
(
logfile
)
fprintf
(
logfile
,
perf
,
tau_day
,
step_t
);
}
else
{
double
hrs_ns
=
t_step
/
update
->
dt
*
1000000.0
*
one_fs
/
3600.0
;
double
ns_day
=
24.0
*
3600.0
/
t_step
*
update
->
dt
/
one_fs
/
1000000.0
;
const
char
perf
[]
=
"Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s
\n
"
;
if
(
screen
)
fprintf
(
screen
,
perf
,
ns_day
,
hrs_ns
,
step_t
);
if
(
logfile
)
fprintf
(
logfile
,
perf
,
ns_day
,
hrs_ns
,
step_t
);
}
}
// CPU use on MPI tasks and OpenMP threads
#ifdef LMP_USER_OMP
const
char
fmt2
[]
=
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads
\n
"
;
if
(
screen
)
fprintf
(
screen
,
fmt2
,
cpu_loop
,
nprocs
,
nthreads
);
if
(
logfile
)
fprintf
(
logfile
,
fmt2
,
cpu_loop
,
nprocs
,
nthreads
);
#else
if
(
lmp
->
kokkos
)
{
const
char
fmt2
[]
=
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads
\n
"
;
if
(
screen
)
fprintf
(
screen
,
fmt2
,
cpu_loop
,
nprocs
,
lmp
->
kokkos
->
num_threads
);
if
(
logfile
)
fprintf
(
logfile
,
fmt2
,
cpu_loop
,
nprocs
,
lmp
->
kokkos
->
num_threads
);
}
else
{
const
char
fmt2
[]
=
"%.1f%% CPU use with %d MPI tasks x no OpenMP threads
\n
"
;
if
(
screen
)
fprintf
(
screen
,
fmt2
,
cpu_loop
,
nprocs
);
if
(
logfile
)
fprintf
(
logfile
,
fmt2
,
cpu_loop
,
nprocs
);
}
#endif
}
}
// avoid division by zero for very short runs
if
(
time_loop
==
0.0
)
time_loop
=
1.0
;
if
(
cpu_loop
==
0.0
)
cpu_loop
=
100.0
;
// get "Other" wall time for later use
if
(
timer
->
has_normal
())
time_other
=
timer
->
get_wall
(
Timer
::
TOTAL
)
-
timer
->
get_wall
(
Timer
::
ALL
);
// minimization stats
if
(
minflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"Minimization stats:
\n
"
);
fprintf
(
screen
,
" Stopping criterion = %s
\n
"
,
update
->
minimize
->
stopstr
);
fprintf
(
screen
,
" Energy initial, next-to-last, final =
\n
"
" %18.12g %18.12g %18.12g
\n
"
,
update
->
minimize
->
einitial
,
update
->
minimize
->
eprevious
,
update
->
minimize
->
efinal
);
fprintf
(
screen
,
" Force two-norm initial, final = %g %g
\n
"
,
update
->
minimize
->
fnorm2_init
,
update
->
minimize
->
fnorm2_final
);
fprintf
(
screen
,
" Force max component initial, final = %g %g
\n
"
,
update
->
minimize
->
fnorminf_init
,
update
->
minimize
->
fnorminf_final
);
fprintf
(
screen
,
" Final line search alpha, max atom move = %g %g
\n
"
,
update
->
minimize
->
alpha_final
,
update
->
minimize
->
alpha_final
*
update
->
minimize
->
fnorminf_final
);
fprintf
(
screen
,
" Iterations, force evaluations = %d %d
\n
"
,
update
->
minimize
->
niter
,
update
->
minimize
->
neval
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"Minimization stats:
\n
"
);
fprintf
(
logfile
,
" Stopping criterion = %s
\n
"
,
update
->
minimize
->
stopstr
);
fprintf
(
logfile
,
" Energy initial, next-to-last, final =
\n
"
" %18.12g %18.12g %18.12g
\n
"
,
update
->
minimize
->
einitial
,
update
->
minimize
->
eprevious
,
update
->
minimize
->
efinal
);
fprintf
(
logfile
,
" Force two-norm initial, final = %g %g
\n
"
,
update
->
minimize
->
fnorm2_init
,
update
->
minimize
->
fnorm2_final
);
fprintf
(
logfile
,
" Force max component initial, final = %g %g
\n
"
,
update
->
minimize
->
fnorminf_init
,
update
->
minimize
->
fnorminf_final
);
fprintf
(
logfile
,
" Final line search alpha, max atom move = %g %g
\n
"
,
update
->
minimize
->
alpha_final
,
update
->
minimize
->
alpha_final
*
update
->
minimize
->
fnorminf_final
);
fprintf
(
logfile
,
" Iterations, force evaluations = %d %d
\n
"
,
update
->
minimize
->
niter
,
update
->
minimize
->
neval
);
}
}
}
// PRD stats using PAIR,BOND,KSPACE for dephase,dynamics,quench
if
(
prdflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
if
(
screen
)
fprintf
(
screen
,
"PRD stats:
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"PRD stats:
\n
"
);
time
=
timer
->
get_wall
(
Timer
::
DEPHASE
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Dephase time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Dephase time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
DYNAMICS
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Dynamics time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Dynamics time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
QUENCH
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Quench time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Quench time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
REPCOMM
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Comm time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Comm time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
REPOUT
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Output time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Output time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
time_other
;
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
// XXXX: replica comm, replica output
if
(
screen
)
fprintf
(
screen
,
" Other time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Other time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
}
// TAD stats using PAIR,BOND,KSPACE for neb,dynamics,quench
if
(
tadflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
if
(
screen
)
fprintf
(
screen
,
"TAD stats:
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"TAD stats:
\n
"
);
time
=
timer
->
get_wall
(
Timer
::
NEB
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" NEB time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" NEB time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
DYNAMICS
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Dynamics time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Dynamics time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
QUENCH
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Quench time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Quench time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
REPCOMM
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Comm time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Comm time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
timer
->
get_wall
(
Timer
::
REPOUT
);
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Output time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Output time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
time
=
time_other
;
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
" Other time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
" Other time (%%) = %g (%g)
\n
"
,
time
,
time
/
time_loop
*
100.0
);
}
}
if
(
timeflag
&&
timer
->
has_normal
())
{
if
(
timer
->
has_full
())
{
const
char
hdr
[]
=
"
\n
MPI task timing breakdown:
\n
"
"Section | min time | avg time | max time |%varavg| %CPU | %total
\n
"
"-----------------------------------------------------------------------
\n
"
;
if
(
me
==
0
)
{
if
(
screen
)
fputs
(
hdr
,
screen
);
if
(
logfile
)
fputs
(
hdr
,
logfile
);
}
}
else
{
const
char
hdr
[]
=
"
\n
MPI task timing breakdown:
\n
"
"Section | min time | avg time | max time |%varavg| %total
\n
"
"---------------------------------------------------------------
\n
"
;
if
(
me
==
0
)
{
if
(
screen
)
fputs
(
hdr
,
screen
);
if
(
logfile
)
fputs
(
hdr
,
logfile
);
}
}
mpi_timings
(
"Pair"
,
timer
,
Timer
::
PAIR
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
if
(
atom
->
molecular
)
mpi_timings
(
"Bond"
,
timer
,
Timer
::
BOND
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
if
(
force
->
kspace
)
mpi_timings
(
"Kspace"
,
timer
,
Timer
::
KSPACE
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
mpi_timings
(
"Neigh"
,
timer
,
Timer
::
NEIGH
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
mpi_timings
(
"Comm"
,
timer
,
Timer
::
COMM
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
mpi_timings
(
"Output"
,
timer
,
Timer
::
OUTPUT
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
mpi_timings
(
"Modify"
,
timer
,
Timer
::
MODIFY
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
if
(
timer
->
has_sync
())
mpi_timings
(
"Sync"
,
timer
,
Timer
::
SYNC
,
world
,
nprocs
,
nthreads
,
me
,
time_loop
,
screen
,
logfile
);
time
=
time_other
;
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
const
char
*
fmt
;
if
(
timer
->
has_full
())
fmt
=
"Other | |%- 12.4g| | | |%6.2f
\n
"
;
else
fmt
=
"Other | |%- 12.4g| | |%6.2f
\n
"
;
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
fmt
,
time
,
time
/
time_loop
*
100.0
);
if
(
logfile
)
fprintf
(
logfile
,
fmt
,
time
,
time
/
time_loop
*
100.0
);
}
}
#ifdef LMP_USER_OMP
const
char
thr_hdr_fmt
[]
=
"
\n
Thread timing breakdown (MPI rank %d):
\n
Total threaded time %.4g / %.1f%%
\n
"
;
const
char
thr_header
[]
=
"Section | min time | avg time | max time |%varavg| %total
\n
"
"---------------------------------------------------------------
\n
"
;
int
ifix
=
modify
->
find_fix
(
"package_omp"
);
// print thread breakdown only with full timer detail
if
((
ifix
>=
0
)
&&
timer
->
has_full
()
&&
me
==
0
)
{
double
thr_total
=
0.0
;
ThrData
*
td
;
FixOMP
*
fixomp
=
static_cast
<
FixOMP
*>
(
lmp
->
modify
->
fix
[
ifix
]);
for
(
i
=
0
;
i
<
nthreads
;
++
i
)
{
td
=
fixomp
->
get_thr
(
i
);
thr_total
+=
td
->
get_time
(
Timer
::
ALL
);
}
thr_total
/=
(
double
)
nthreads
;
if
(
thr_total
>
0.0
)
{
if
(
screen
)
{
fprintf
(
screen
,
thr_hdr_fmt
,
me
,
thr_total
,
thr_total
/
time_loop
*
100.0
);
fputs
(
thr_header
,
screen
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
thr_hdr_fmt
,
me
,
thr_total
,
thr_total
/
time_loop
*
100.0
);
fputs
(
thr_header
,
logfile
);
}
omp_times
(
fixomp
,
"Pair"
,
Timer
::
PAIR
,
nthreads
,
screen
,
logfile
);
if
(
atom
->
molecular
)
omp_times
(
fixomp
,
"Bond"
,
Timer
::
BOND
,
nthreads
,
screen
,
logfile
);
if
(
force
->
kspace
)
omp_times
(
fixomp
,
"Kspace"
,
Timer
::
KSPACE
,
nthreads
,
screen
,
logfile
);
omp_times
(
fixomp
,
"Neigh"
,
Timer
::
NEIGH
,
nthreads
,
screen
,
logfile
);
omp_times
(
fixomp
,
"Reduce"
,
Timer
::
COMM
,
nthreads
,
screen
,
logfile
);
}
}
#endif
if
(
lmp
->
kokkos
&&
lmp
->
kokkos
->
ngpu
>
0
)
if
(
const
char
*
env_clb
=
getenv
(
"CUDA_LAUNCH_BLOCKING"
))
if
(
!
(
strcmp
(
env_clb
,
"1"
)
==
0
))
{
error
->
warning
(
FLERR
,
"Timing breakdown may not be accurate since GPU/CPU overlap is enabled. "
"Using 'export CUDA_LAUNCH_BLOCKING=1' will give an accurate timing breakdown but will reduce performance"
);
}
// FFT timing statistics
// time3d,time1d = total time during run for 3d and 1d FFTs
// loop on timing() until nsample FFTs require at least 1.0 CPU sec
// time_kspace may be 0.0 if another partition is doing Kspace
if
(
fftflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
int
nsteps
=
update
->
nsteps
;
double
time3d
;
int
nsample
=
1
;
int
nfft
=
force
->
kspace
->
timing_3d
(
nsample
,
time3d
);
while
(
time3d
<
1.0
)
{
nsample
*=
2
;
nfft
=
force
->
kspace
->
timing_3d
(
nsample
,
time3d
);
}
time3d
=
nsteps
*
time3d
/
nsample
;
MPI_Allreduce
(
&
time3d
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time3d
=
tmp
/
nprocs
;
double
time1d
;
nsample
=
1
;
nfft
=
force
->
kspace
->
timing_1d
(
nsample
,
time1d
);
while
(
time1d
<
1.0
)
{
nsample
*=
2
;
nfft
=
force
->
kspace
->
timing_1d
(
nsample
,
time1d
);
}
time1d
=
nsteps
*
time1d
/
nsample
;
MPI_Allreduce
(
&
time1d
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time1d
=
tmp
/
nprocs
;
double
time_kspace
=
timer
->
get_wall
(
Timer
::
KSPACE
);
MPI_Allreduce
(
&
time_kspace
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time_kspace
=
tmp
/
nprocs
;
double
ntotal
=
1.0
*
force
->
kspace
->
nx_pppm
*
force
->
kspace
->
ny_pppm
*
force
->
kspace
->
nz_pppm
;
double
nflops
=
5.0
*
ntotal
*
log
(
ntotal
);
double
fraction
,
flop3
,
flop1
;
if
(
nsteps
)
{
if
(
time_kspace
)
fraction
=
time3d
/
time_kspace
*
100.0
;
else
fraction
=
0.0
;
flop3
=
nfft
*
nflops
/
1.0e9
/
(
time3d
/
nsteps
);
flop1
=
nfft
*
nflops
/
1.0e9
/
(
time1d
/
nsteps
);
}
else
fraction
=
flop3
=
flop1
=
0.0
;
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"FFT time (%% of Kspce) = %g (%g)
\n
"
,
time3d
,
fraction
);
fprintf
(
screen
,
"FFT Gflps 3d (1d only) = %g %g
\n
"
,
flop3
,
flop1
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"FFT time (%% of Kspce) = %g (%g)
\n
"
,
time3d
,
fraction
);
fprintf
(
logfile
,
"FFT Gflps 3d (1d only) = %g %g
\n
"
,
flop3
,
flop1
);
}
}
}
if
(
histoflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
tmp
=
atom
->
nlocal
;
stats
(
1
,
&
tmp
,
&
ave
,
&
max
,
&
min
,
10
,
histo
);
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"Nlocal: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
screen
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
screen
,
" %d"
,
histo
[
i
]);
fprintf
(
screen
,
"
\n
"
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"Nlocal: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
logfile
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
logfile
,
" %d"
,
histo
[
i
]);
fprintf
(
logfile
,
"
\n
"
);
}
}
tmp
=
atom
->
nghost
;
stats
(
1
,
&
tmp
,
&
ave
,
&
max
,
&
min
,
10
,
histo
);
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"Nghost: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
screen
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
screen
,
" %d"
,
histo
[
i
]);
fprintf
(
screen
,
"
\n
"
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"Nghost: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
logfile
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
logfile
,
" %d"
,
histo
[
i
]);
fprintf
(
logfile
,
"
\n
"
);
}
}
// find a non-skip neighbor list containing half pairwise interactions
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for
(
m
=
0
;
m
<
neighbor
->
old_nrequest
;
m
++
)
{
if
((
neighbor
->
old_requests
[
m
]
->
half
||
neighbor
->
old_requests
[
m
]
->
gran
||
neighbor
->
old_requests
[
m
]
->
respaouter
||
neighbor
->
old_requests
[
m
]
->
half_from_full
)
&&
neighbor
->
old_requests
[
m
]
->
skip
==
0
&&
neighbor
->
lists
[
m
]
&&
neighbor
->
lists
[
m
]
->
numneigh
)
{
if
(
!
neighbor
->
lists
[
m
]
&&
lmp
->
kokkos
&&
lmp
->
kokkos
->
neigh_list_kokkos
(
m
))
break
;
else
break
;
}
}
nneigh
=
0
;
if
(
m
<
neighbor
->
old_nrequest
)
{
if
(
neighbor
->
lists
[
m
])
{
int
inum
=
neighbor
->
lists
[
m
]
->
inum
;
int
*
ilist
=
neighbor
->
lists
[
m
]
->
ilist
;
int
*
numneigh
=
neighbor
->
lists
[
m
]
->
numneigh
;
for
(
i
=
0
;
i
<
inum
;
i
++
)
nneigh
+=
numneigh
[
ilist
[
i
]];
}
else
if
(
lmp
->
kokkos
)
nneigh
=
lmp
->
kokkos
->
neigh_count
(
m
);
}
tmp
=
nneigh
;
stats
(
1
,
&
tmp
,
&
ave
,
&
max
,
&
min
,
10
,
histo
);
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"Neighs: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
screen
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
screen
,
" %d"
,
histo
[
i
]);
fprintf
(
screen
,
"
\n
"
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"Neighs: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
logfile
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
logfile
,
" %d"
,
histo
[
i
]);
fprintf
(
logfile
,
"
\n
"
);
}
}
// find a non-skip neighbor list containing full pairwise interactions
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for
(
m
=
0
;
m
<
neighbor
->
old_nrequest
;
m
++
)
{
if
(
neighbor
->
old_requests
[
m
]
->
full
&&
neighbor
->
old_requests
[
m
]
->
skip
==
0
)
{
if
(
lmp
->
kokkos
&&
lmp
->
kokkos
->
neigh_list_kokkos
(
m
))
break
;
else
break
;
}
}
nneighfull
=
0
;
if
(
m
<
neighbor
->
old_nrequest
)
{
if
(
neighbor
->
lists
[
m
]
&&
neighbor
->
lists
[
m
]
->
numneigh
)
{
int
inum
=
neighbor
->
lists
[
m
]
->
inum
;
int
*
ilist
=
neighbor
->
lists
[
m
]
->
ilist
;
int
*
numneigh
=
neighbor
->
lists
[
m
]
->
numneigh
;
for
(
i
=
0
;
i
<
inum
;
i
++
)
nneighfull
+=
numneigh
[
ilist
[
i
]];
}
else
if
(
!
neighbor
->
lists
[
m
]
&&
lmp
->
kokkos
)
nneighfull
=
lmp
->
kokkos
->
neigh_count
(
m
);
tmp
=
nneighfull
;
stats
(
1
,
&
tmp
,
&
ave
,
&
max
,
&
min
,
10
,
histo
);
if
(
me
==
0
)
{
if
(
screen
)
{
fprintf
(
screen
,
"FullNghs: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
screen
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
screen
,
" %d"
,
histo
[
i
]);
fprintf
(
screen
,
"
\n
"
);
}
if
(
logfile
)
{
fprintf
(
logfile
,
"FullNghs: %g ave %g max %g min
\n
"
,
ave
,
max
,
min
);
fprintf
(
logfile
,
"Histogram:"
);
for
(
i
=
0
;
i
<
10
;
i
++
)
fprintf
(
logfile
,
" %d"
,
histo
[
i
]);
fprintf
(
logfile
,
"
\n
"
);
}
}
}
}
if
(
neighflag
)
{
if
(
me
==
0
)
{
if
(
screen
)
fprintf
(
screen
,
"
\n
"
);
if
(
logfile
)
fprintf
(
logfile
,
"
\n
"
);
}
tmp
=
MAX
(
nneigh
,
nneighfull
);
double
nall
;
MPI_Allreduce
(
&
tmp
,
&
nall
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
int
nspec
;
double
nspec_all
=
0
;
if
(
atom
->
molecular
==
1
)
{
int
**
nspecial
=
atom
->
nspecial
;
int
nlocal
=
atom
->
nlocal
;
nspec
=
0
;
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
nspec
+=
nspecial
[
i
][
2
];
tmp
=
nspec
;
MPI_Allreduce
(
&
tmp
,
&
nspec_all
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
}
else
if
(
atom
->
molecular
==
2
)
{
Molecule
**
onemols
=
atom
->
avec
->
onemols
;
int
*
molindex
=
atom
->
molindex
;
int
*
molatom
=
atom
->
molatom
;
int
nlocal
=
atom
->
nlocal
;
int
imol
,
iatom
;
nspec
=
0
;
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
if
(
molindex
[
i
]
<
0
)
continue
;
imol
=
molindex
[
i
];
iatom
=
molatom
[
i
];
nspec
+=
onemols
[
imol
]
->
nspecial
[
iatom
][
2
];
}
tmp
=
nspec
;
MPI_Allreduce
(
&
tmp
,
&
nspec_all
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
}
if
(
me
==
0
)
{
if
(
screen
)
{
if
(
nall
<
2.0e9
)
fprintf
(
screen
,
"Total # of neighbors = %d
\n
"
,
static_cast
<
int
>
(
nall
));
else
fprintf
(
screen
,
"Total # of neighbors = %g
\n
"
,
nall
);
if
(
atom
->
natoms
>
0
)
fprintf
(
screen
,
"Ave neighs/atom = %g
\n
"
,
nall
/
atom
->
natoms
);
if
(
atom
->
molecular
&&
atom
->
natoms
>
0
)
fprintf
(
screen
,
"Ave special neighs/atom = %g
\n
"
,
nspec_all
/
atom
->
natoms
);
fprintf
(
screen
,
"Neighbor list builds = "
BIGINT_FORMAT
"
\n
"
,
neighbor
->
ncalls
);
if
(
neighbor
->
dist_check
)
fprintf
(
screen
,
"Dangerous builds = "
BIGINT_FORMAT
"
\n
"
,
neighbor
->
ndanger
);
else
fprintf
(
screen
,
"Dangerous builds not checked
\n
"
);
}
if
(
logfile
)
{
if
(
nall
<
2.0e9
)
fprintf
(
logfile
,
"Total # of neighbors = %d
\n
"
,
static_cast
<
int
>
(
nall
));
else
fprintf
(
logfile
,
"Total # of neighbors = %g
\n
"
,
nall
);
if
(
atom
->
natoms
>
0
)
fprintf
(
logfile
,
"Ave neighs/atom = %g
\n
"
,
nall
/
atom
->
natoms
);
if
(
atom
->
molecular
&&
atom
->
natoms
>
0
)
fprintf
(
logfile
,
"Ave special neighs/atom = %g
\n
"
,
nspec_all
/
atom
->
natoms
);
fprintf
(
logfile
,
"Neighbor list builds = "
BIGINT_FORMAT
"
\n
"
,
neighbor
->
ncalls
);
if
(
neighbor
->
dist_check
)
fprintf
(
logfile
,
"Dangerous builds = "
BIGINT_FORMAT
"
\n
"
,
neighbor
->
ndanger
);
else
fprintf
(
logfile
,
"Dangerous builds not checked
\n
"
);
}
}
}
if
(
logfile
)
fflush
(
logfile
);
}
/* ---------------------------------------------------------------------- */
void
Finish
::
stats
(
int
n
,
double
*
data
,
double
*
pave
,
double
*
pmax
,
double
*
pmin
,
int
nhisto
,
int
*
histo
)
{
int
i
,
m
;
int
*
histotmp
;
double
min
=
1.0e20
;
double
max
=
-
1.0e20
;
double
ave
=
0.0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
ave
+=
data
[
i
];
if
(
data
[
i
]
<
min
)
min
=
data
[
i
];
if
(
data
[
i
]
>
max
)
max
=
data
[
i
];
}
int
ntotal
;
MPI_Allreduce
(
&
n
,
&
ntotal
,
1
,
MPI_INT
,
MPI_SUM
,
world
);
double
tmp
;
MPI_Allreduce
(
&
ave
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
ave
=
tmp
/
ntotal
;
MPI_Allreduce
(
&
min
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_MIN
,
world
);
min
=
tmp
;
MPI_Allreduce
(
&
max
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_MAX
,
world
);
max
=
tmp
;
for
(
i
=
0
;
i
<
nhisto
;
i
++
)
histo
[
i
]
=
0
;
double
del
=
max
-
min
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
if
(
del
==
0.0
)
m
=
0
;
else
m
=
static_cast
<
int
>
((
data
[
i
]
-
min
)
/
del
*
nhisto
);
if
(
m
>
nhisto
-
1
)
m
=
nhisto
-
1
;
histo
[
m
]
++
;
}
memory
->
create
(
histotmp
,
nhisto
,
"finish:histotmp"
);
MPI_Allreduce
(
histo
,
histotmp
,
nhisto
,
MPI_INT
,
MPI_SUM
,
world
);
for
(
i
=
0
;
i
<
nhisto
;
i
++
)
histo
[
i
]
=
histotmp
[
i
];
memory
->
destroy
(
histotmp
);
*
pave
=
ave
;
*
pmax
=
max
;
*
pmin
=
min
;
}
/* ---------------------------------------------------------------------- */
void
mpi_timings
(
const
char
*
label
,
Timer
*
t
,
enum
Timer
::
ttype
tt
,
MPI_Comm
world
,
const
int
nprocs
,
const
int
nthreads
,
const
int
me
,
double
time_loop
,
FILE
*
scr
,
FILE
*
log
)
{
double
tmp
,
time_max
,
time_min
,
time_sq
;
double
time
=
t
->
get_wall
(
tt
);
double
time_cpu
=
t
->
get_cpu
(
tt
);
if
(
time
/
time_loop
<
0.001
)
// insufficient timer resolution!
time_cpu
=
1.0
;
else
time_cpu
=
time_cpu
/
time
;
if
(
time_cpu
>
nthreads
)
time_cpu
=
nthreads
;
MPI_Allreduce
(
&
time
,
&
time_min
,
1
,
MPI_DOUBLE
,
MPI_MIN
,
world
);
MPI_Allreduce
(
&
time
,
&
time_max
,
1
,
MPI_DOUBLE
,
MPI_MAX
,
world
);
time_sq
=
time
*
time
;
MPI_Allreduce
(
&
time
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time
=
tmp
/
nprocs
;
MPI_Allreduce
(
&
time_sq
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time_sq
=
tmp
/
nprocs
;
MPI_Allreduce
(
&
time_cpu
,
&
tmp
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
time_cpu
=
tmp
/
nprocs
*
100.0
;
// % variance from the average as measure of load imbalance
if
(
time
>
1.0e-10
)
time_sq
=
sqrt
(
time_sq
/
time
-
time
)
*
100.0
;
else
time_sq
=
0.0
;
if
(
me
==
0
)
{
tmp
=
time
/
time_loop
*
100.0
;
if
(
t
->
has_full
())
{
const
char
fmt
[]
=
"%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f
\n
"
;
if
(
scr
)
fprintf
(
scr
,
fmt
,
label
,
time_min
,
time
,
time_max
,
time_sq
,
time_cpu
,
tmp
);
if
(
log
)
fprintf
(
log
,
fmt
,
label
,
time_min
,
time
,
time_max
,
time_sq
,
time_cpu
,
tmp
);
time_loop
=
100.0
/
time_loop
;
}
else
{
const
char
fmt
[]
=
"%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f
\n
"
;
if
(
scr
)
fprintf
(
scr
,
fmt
,
label
,
time_min
,
time
,
time_max
,
time_sq
,
tmp
);
if
(
log
)
fprintf
(
log
,
fmt
,
label
,
time_min
,
time
,
time_max
,
time_sq
,
tmp
);
}
}
}
/* ---------------------------------------------------------------------- */
#ifdef LMP_USER_OMP
void
omp_times
(
FixOMP
*
fix
,
const
char
*
label
,
enum
Timer
::
ttype
which
,
const
int
nthreads
,
FILE
*
scr
,
FILE
*
log
)
{
const
char
fmt
[]
=
"%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f
\n
"
;
double
time_min
,
time_max
,
time_avg
,
time_total
,
time_std
;
time_min
=
1.0e100
;
time_max
=
-
1.0e100
;
time_total
=
time_avg
=
time_std
=
0.0
;
for
(
int
i
=
0
;
i
<
nthreads
;
++
i
)
{
ThrData
*
thr
=
fix
->
get_thr
(
i
);
double
tmp
=
thr
->
get_time
(
which
);
time_min
=
MIN
(
time_min
,
tmp
);
time_max
=
MAX
(
time_max
,
tmp
);
time_avg
+=
tmp
;
time_std
+=
tmp
*
tmp
;
time_total
+=
thr
->
get_time
(
Timer
::
ALL
);
}
time_avg
/=
nthreads
;
time_std
/=
nthreads
;
time_total
/=
nthreads
;
if
(
time_avg
>
1.0e-10
)
time_std
=
sqrt
(
time_std
/
time_avg
-
time_avg
)
*
100.0
;
else
time_std
=
0.0
;
if
(
scr
)
fprintf
(
scr
,
fmt
,
label
,
time_min
,
time_avg
,
time_max
,
time_std
,
time_avg
/
time_total
*
100.0
);
if
(
log
)
fprintf
(
log
,
fmt
,
label
,
time_min
,
time_avg
,
time_max
,
time_std
,
time_avg
/
time_total
*
100.0
);
}
#endif
Event Timeline
Log In to Comment