Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F76846859
finish.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Aug 10, 18:04
Size
30 KB
Mime Type
text/x-c
Expires
Mon, Aug 12, 18:04 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
19779199
Attached To
rLAMMPS lammps
finish.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "finish.h"
#include "timer.h"
#include "universe.h"
#include "accelerator_kokkos.h"
#include "accelerator_omp.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "force.h"
#include "kspace.h"
#include "update.h"
#include "min.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "output.h"
#include "memory.h"
#include "error.h"
#ifdef LMP_USER_OMP
#include "modify.h"
#include "fix_omp.h"
#include "thr_data.h"
#endif
using namespace LAMMPS_NS;
// local function prototypes, code at end of file
static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
MPI_Comm world, const int nprocs, const int nthreads,
const int me, double time_loop, FILE *scr, FILE *log);
#ifdef LMP_USER_OMP
static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
const int nthreads,FILE *scr, FILE *log);
#endif
/* ---------------------------------------------------------------------- */
Finish::Finish(LAMMPS *lmp) : Pointers(lmp) {}
/* ---------------------------------------------------------------------- */
void Finish::end(int flag)
{
int i,m,nneigh,nneighfull;
int histo[10];
int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
double time,tmp,ave,max,min;
double time_loop,time_other,cpu_loop;
int me,nprocs;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
const int nthreads = comm->nthreads;
// recompute natoms in case atoms have been lost
bigint nblocal = atom->nlocal;
MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
// choose flavors of statistical output
// flag determines caller
// flag = 0 = just loop summary
// flag = 1 = dynamics or minimization
// flag = 2 = PRD
// flag = 3 = TAD
// turn off neighflag for Kspace partition of verlet/split integrator
minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0;
time_loop = cpu_loop = time_other = 0.0;
if (flag == 1) {
if (update->whichflag == 2) minflag = 1;
timeflag = histoflag = 1;
neighflag = 1;
if (update->whichflag == 1 &&
strncmp(update->integrate_style,"verlet/split",12) == 0 &&
universe->iworld == 1) neighflag = 0;
if (force->kspace && force->kspace_match("pppm",0)
&& force->kspace->fftbench) fftflag = 1;
}
if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1;
if (flag == 3) tadflag = histoflag = neighflag = 1;
// loop stats
if (timer->has_loop()) {
// overall loop time
time_loop = timer->get_wall(Timer::TOTAL);
cpu_loop = timer->get_cpu(Timer::TOTAL);
MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_loop = tmp/nprocs;
MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
cpu_loop = tmp/nprocs;
if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0;
if (me == 0) {
int ntasks = nprocs * nthreads;
const char fmt1[] = "Loop time of %g on %d procs "
"for %d steps with " BIGINT_FORMAT " atoms\n\n";
if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps,
atom->natoms);
if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
atom->natoms);
// Gromacs/NAMD-style performance metric for suitable unit settings
if ( timeflag && !minflag && !prdflag && !tadflag &&
(update->nsteps > 0) && (update->dt != 0.0) &&
((strcmp(update->unit_style,"lj") == 0) ||
(strcmp(update->unit_style,"metal") == 0) ||
(strcmp(update->unit_style,"micro") == 0) ||
(strcmp(update->unit_style,"nano") == 0) ||
(strcmp(update->unit_style,"electron") == 0) ||
(strcmp(update->unit_style,"real") == 0)) ) {
double one_fs = force->femtosecond;
double t_step = ((double) time_loop) / ((double) update->nsteps);
double step_t = 1.0/t_step;
if (strcmp(update->unit_style,"lj") == 0) {
double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
if (screen) fprintf(screen,perf,tau_day,step_t);
if (logfile) fprintf(logfile,perf,tau_day,step_t);
} else {
double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
const char perf[] =
"Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n";
if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t);
if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t);
}
}
// CPU use on MPI tasks and OpenMP threads
if (lmp->kokkos) {
const char fmt2[] =
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads);
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads);
} else {
#if defined(_OPENMP)
const char fmt2[] =
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads);
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads);
#else
const char fmt2[] =
"%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n";
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs);
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs);
#endif
}
}
}
// avoid division by zero for very short runs
if (time_loop == 0.0) time_loop = 1.0;
if (cpu_loop == 0.0) cpu_loop = 100.0;
// get "Other" wall time for later use
if (timer->has_normal())
time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL);
// minimization stats
if (minflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
if (me == 0) {
if (screen) {
fprintf(screen,"Minimization stats:\n");
fprintf(screen," Stopping criterion = %s\n",
update->minimize->stopstr);
fprintf(screen," Energy initial, next-to-last, final = \n"
" %18.12g %18.12g %18.12g\n",
update->minimize->einitial,update->minimize->eprevious,
update->minimize->efinal);
fprintf(screen," Force two-norm initial, final = %g %g\n",
update->minimize->fnorm2_init,update->minimize->fnorm2_final);
fprintf(screen," Force max component initial, final = %g %g\n",
update->minimize->fnorminf_init,
update->minimize->fnorminf_final);
fprintf(screen," Final line search alpha, max atom move = %g %g\n",
update->minimize->alpha_final,
update->minimize->alpha_final*
update->minimize->fnorminf_final);
fprintf(screen," Iterations, force evaluations = %d %d\n",
update->minimize->niter,update->minimize->neval);
}
if (logfile) {
fprintf(logfile,"Minimization stats:\n");
fprintf(logfile," Stopping criterion = %s\n",
update->minimize->stopstr);
fprintf(logfile," Energy initial, next-to-last, final = \n"
" %18.12g %18.12g %18.12g\n",
update->minimize->einitial,update->minimize->eprevious,
update->minimize->efinal);
fprintf(logfile," Force two-norm initial, final = %g %g\n",
update->minimize->fnorm2_init,update->minimize->fnorm2_final);
fprintf(logfile," Force max component initial, final = %g %g\n",
update->minimize->fnorminf_init,
update->minimize->fnorminf_final);
fprintf(logfile," Final line search alpha, max atom move = %g %g\n",
update->minimize->alpha_final,
update->minimize->alpha_final*
update->minimize->fnorminf_final);
fprintf(logfile," Iterations, force evaluations = %d %d\n",
update->minimize->niter,update->minimize->neval);
}
}
}
// PRD stats using PAIR,BOND,KSPACE for dephase,dynamics,quench
if (prdflag) {
if (me == 0) {
if (screen) fprintf(screen,"\nPRD stats:\n");
if (logfile) fprintf(logfile,"\nPRD stats:\n");
}
time = timer->get_wall(Timer::DEPHASE);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Dephase time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Dephase time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::DYNAMICS);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Dynamics time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Dynamics time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::QUENCH);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Quench time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Quench time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::REPCOMM);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::REPOUT);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = time_other;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) { // XXXX: replica comm, replica output
if (screen)
fprintf(screen," Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
}
// TAD stats using PAIR,BOND,KSPACE for neb,dynamics,quench
if (tadflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
if (screen) fprintf(screen,"TAD stats:\n");
if (logfile) fprintf(logfile,"TAD stats:\n");
time = timer->get_wall(Timer::NEB);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," NEB time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," NEB time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::DYNAMICS);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Dynamics time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Dynamics time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::QUENCH);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Quench time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Quench time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::REPCOMM);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::REPOUT);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = time_other;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
}
if (timeflag && timer->has_normal()) {
if (timer->has_full()) {
const char hdr[] = "\nMPI task timing breakdown:\n"
"Section | min time | avg time | max time |%varavg| %CPU | %total\n"
"-----------------------------------------------------------------------\n";
if (me == 0) {
if (screen) fputs(hdr,screen);
if (logfile) fputs(hdr,logfile);
}
} else {
const char hdr[] = "\nMPI task timing breakdown:\n"
"Section | min time | avg time | max time |%varavg| %total\n"
"---------------------------------------------------------------\n";
if (me == 0) {
if (screen) fputs(hdr,screen);
if (logfile) fputs(hdr,logfile);
}
}
mpi_timings("Pair",timer,Timer::PAIR, world,nprocs,
nthreads,me,time_loop,screen,logfile);
if (atom->molecular)
mpi_timings("Bond",timer,Timer::BOND,world,nprocs,
nthreads,me,time_loop,screen,logfile);
if (force->kspace)
mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Comm",timer,Timer::COMM,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs,
nthreads,me,time_loop,screen,logfile);
if (timer->has_sync())
mpi_timings("Sync",timer,Timer::SYNC,world,nprocs,
nthreads,me,time_loop,screen,logfile);
time = time_other;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
const char *fmt;
if (timer->has_full())
fmt = "Other | |%- 12.4g| | | |%6.2f\n";
else
fmt = "Other | |%- 12.4g| | |%6.2f\n";
if (me == 0) {
if (screen) fprintf(screen,fmt,time,time/time_loop*100.0);
if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0);
}
}
#ifdef LMP_USER_OMP
const char thr_hdr_fmt[] =
"\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n";
const char thr_header[] =
"Section | min time | avg time | max time |%varavg| %total\n"
"---------------------------------------------------------------\n";
int ifix = modify->find_fix("package_omp");
// print thread breakdown only with full timer detail
if ((ifix >= 0) && timer->has_full() && me == 0) {
double thr_total = 0.0;
ThrData *td;
FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
for (i=0; i < nthreads; ++i) {
td = fixomp->get_thr(i);
thr_total += td->get_time(Timer::ALL);
}
thr_total /= (double) nthreads;
if (thr_total > 0.0) {
if (screen) {
fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
fputs(thr_header,screen);
}
if (logfile) {
fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
fputs(thr_header,logfile);
}
omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile);
if (atom->molecular)
omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile);
if (force->kspace)
omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile);
omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile);
omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile);
}
}
#endif
if (lmp->kokkos && lmp->kokkos->ngpu > 0)
if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING"))
if (!(strcmp(env_clb,"1") == 0)) {
error->warning(FLERR,"Timing breakdown may not be accurate since GPU/CPU overlap is enabled. "
"Using 'export CUDA_LAUNCH_BLOCKING=1' will give an accurate timing breakdown but will reduce performance");
}
// FFT timing statistics
// time3d,time1d = total time during run for 3d and 1d FFTs
// loop on timing() until nsample FFTs require at least 1.0 CPU sec
// time_kspace may be 0.0 if another partition is doing Kspace
if (fftflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
int nsteps = update->nsteps;
double time3d;
int nsample = 1;
int nfft = force->kspace->timing_3d(nsample,time3d);
while (time3d < 1.0) {
nsample *= 2;
nfft = force->kspace->timing_3d(nsample,time3d);
}
time3d = nsteps * time3d / nsample;
MPI_Allreduce(&time3d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time3d = tmp/nprocs;
double time1d;
nsample = 1;
nfft = force->kspace->timing_1d(nsample,time1d);
while (time1d < 1.0) {
nsample *= 2;
nfft = force->kspace->timing_1d(nsample,time1d);
}
time1d = nsteps * time1d / nsample;
MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time1d = tmp/nprocs;
double time_kspace = timer->get_wall(Timer::KSPACE);
MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_kspace = tmp/nprocs;
double ntotal = 1.0 * force->kspace->nx_pppm *
force->kspace->ny_pppm * force->kspace->nz_pppm;
double nflops = 5.0 * ntotal * log(ntotal);
double fraction,flop3,flop1;
if (nsteps) {
if (time_kspace) fraction = time3d/time_kspace*100.0;
else fraction = 0.0;
flop3 = nfft*nflops/1.0e9/(time3d/nsteps);
flop1 = nfft*nflops/1.0e9/(time1d/nsteps);
} else fraction = flop3 = flop1 = 0.0;
if (me == 0) {
if (screen) {
fprintf(screen,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction);
fprintf(screen,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1);
}
if (logfile) {
fprintf(logfile,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction);
fprintf(logfile,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1);
}
}
}
if (histoflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
tmp = atom->nlocal;
stats(1,&tmp,&ave,&max,&min,10,histo);
if (me == 0) {
if (screen) {
fprintf(screen,"Nlocal: %g ave %g max %g min\n",ave,max,min);
fprintf(screen,"Histogram:");
for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile,"Nlocal: %g ave %g max %g min\n",ave,max,min);
fprintf(logfile,"Histogram:");
for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
fprintf(logfile,"\n");
}
}
tmp = atom->nghost;
stats(1,&tmp,&ave,&max,&min,10,histo);
if (me == 0) {
if (screen) {
fprintf(screen,"Nghost: %g ave %g max %g min\n",ave,max,min);
fprintf(screen,"Histogram:");
for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile,"Nghost: %g ave %g max %g min\n",ave,max,min);
fprintf(logfile,"Histogram:");
for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
fprintf(logfile,"\n");
}
}
// find a non-skip neighbor list containing half pairwise interactions
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for (m = 0; m < neighbor->old_nrequest; m++) {
if ((neighbor->old_requests[m]->half ||
neighbor->old_requests[m]->gran ||
neighbor->old_requests[m]->respaouter ||
neighbor->old_requests[m]->half_from_full) &&
neighbor->old_requests[m]->skip == 0 &&
neighbor->lists[m] && neighbor->lists[m]->numneigh) {
if (!neighbor->lists[m] && lmp->kokkos &&
lmp->kokkos->neigh_list_kokkos(m)) break;
else break;
}
}
nneigh = 0;
if (m < neighbor->old_nrequest) {
if (neighbor->lists[m]) {
int inum = neighbor->lists[m]->inum;
int *ilist = neighbor->lists[m]->ilist;
int *numneigh = neighbor->lists[m]->numneigh;
for (i = 0; i < inum; i++)
nneigh += numneigh[ilist[i]];
} else if (lmp->kokkos) nneigh = lmp->kokkos->neigh_count(m);
}
tmp = nneigh;
stats(1,&tmp,&ave,&max,&min,10,histo);
if (me == 0) {
if (screen) {
fprintf(screen,"Neighs: %g ave %g max %g min\n",ave,max,min);
fprintf(screen,"Histogram:");
for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile,"Neighs: %g ave %g max %g min\n",ave,max,min);
fprintf(logfile,"Histogram:");
for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
fprintf(logfile,"\n");
}
}
// find a non-skip neighbor list containing full pairwise interactions
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for (m = 0; m < neighbor->old_nrequest; m++) {
if (neighbor->old_requests[m]->full &&
neighbor->old_requests[m]->skip == 0) {
if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
else break;
}
}
nneighfull = 0;
if (m < neighbor->old_nrequest) {
if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
int inum = neighbor->lists[m]->inum;
int *ilist = neighbor->lists[m]->ilist;
int *numneigh = neighbor->lists[m]->numneigh;
for (i = 0; i < inum; i++)
nneighfull += numneigh[ilist[i]];
} else if (!neighbor->lists[m] && lmp->kokkos)
nneighfull = lmp->kokkos->neigh_count(m);
tmp = nneighfull;
stats(1,&tmp,&ave,&max,&min,10,histo);
if (me == 0) {
if (screen) {
fprintf(screen,"FullNghs: %g ave %g max %g min\n",ave,max,min);
fprintf(screen,"Histogram:");
for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile,"FullNghs: %g ave %g max %g min\n",ave,max,min);
fprintf(logfile,"Histogram:");
for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
fprintf(logfile,"\n");
}
}
}
}
if (neighflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
tmp = MAX(nneigh,nneighfull);
double nall;
MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world);
int nspec;
double nspec_all = 0;
if (atom->molecular == 1) {
int **nspecial = atom->nspecial;
int nlocal = atom->nlocal;
nspec = 0;
for (i = 0; i < nlocal; i++) nspec += nspecial[i][2];
tmp = nspec;
MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world);
} else if (atom->molecular == 2) {
Molecule **onemols = atom->avec->onemols;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
int nlocal = atom->nlocal;
int imol,iatom;
nspec = 0;
for (i = 0; i < nlocal; i++) {
if (molindex[i] < 0) continue;
imol = molindex[i];
iatom = molatom[i];
nspec += onemols[imol]->nspecial[iatom][2];
}
tmp = nspec;
MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world);
}
if (me == 0) {
if (screen) {
if (nall < 2.0e9)
fprintf(screen,
"Total # of neighbors = %d\n",static_cast<int> (nall));
else fprintf(screen,"Total # of neighbors = %g\n",nall);
if (atom->natoms > 0)
fprintf(screen,"Ave neighs/atom = %g\n",nall/atom->natoms);
if (atom->molecular && atom->natoms > 0)
fprintf(screen,"Ave special neighs/atom = %g\n",
nspec_all/atom->natoms);
fprintf(screen,"Neighbor list builds = " BIGINT_FORMAT "\n",
neighbor->ncalls);
if (neighbor->dist_check)
fprintf(screen,"Dangerous builds = " BIGINT_FORMAT "\n",
neighbor->ndanger);
else fprintf(screen,"Dangerous builds not checked\n");
}
if (logfile) {
if (nall < 2.0e9)
fprintf(logfile,
"Total # of neighbors = %d\n",static_cast<int> (nall));
else fprintf(logfile,"Total # of neighbors = %g\n",nall);
if (atom->natoms > 0)
fprintf(logfile,"Ave neighs/atom = %g\n",nall/atom->natoms);
if (atom->molecular && atom->natoms > 0)
fprintf(logfile,"Ave special neighs/atom = %g\n",
nspec_all/atom->natoms);
fprintf(logfile,"Neighbor list builds = " BIGINT_FORMAT "\n",
neighbor->ncalls);
if (neighbor->dist_check)
fprintf(logfile,"Dangerous builds = " BIGINT_FORMAT "\n",
neighbor->ndanger);
else fprintf(logfile,"Dangerous builds not checked\n");
}
}
}
if (logfile) fflush(logfile);
}
/* ---------------------------------------------------------------------- */
void Finish::stats(int n, double *data,
double *pave, double *pmax, double *pmin,
int nhisto, int *histo)
{
int i,m;
int *histotmp;
double min = 1.0e20;
double max = -1.0e20;
double ave = 0.0;
for (i = 0; i < n; i++) {
ave += data[i];
if (data[i] < min) min = data[i];
if (data[i] > max) max = data[i];
}
int ntotal;
MPI_Allreduce(&n,&ntotal,1,MPI_INT,MPI_SUM,world);
double tmp;
MPI_Allreduce(&ave,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
ave = tmp/ntotal;
MPI_Allreduce(&min,&tmp,1,MPI_DOUBLE,MPI_MIN,world);
min = tmp;
MPI_Allreduce(&max,&tmp,1,MPI_DOUBLE,MPI_MAX,world);
max = tmp;
for (i = 0; i < nhisto; i++) histo[i] = 0;
double del = max - min;
for (i = 0; i < n; i++) {
if (del == 0.0) m = 0;
else m = static_cast<int> ((data[i]-min)/del * nhisto);
if (m > nhisto-1) m = nhisto-1;
histo[m]++;
}
memory->create(histotmp,nhisto,"finish:histotmp");
MPI_Allreduce(histo,histotmp,nhisto,MPI_INT,MPI_SUM,world);
for (i = 0; i < nhisto; i++) histo[i] = histotmp[i];
memory->destroy(histotmp);
*pave = ave;
*pmax = max;
*pmin = min;
}
/* ---------------------------------------------------------------------- */
void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
MPI_Comm world, const int nprocs, const int nthreads,
const int me, double time_loop, FILE *scr, FILE *log)
{
double tmp, time_max, time_min, time_sq;
double time = t->get_wall(tt);
double time_cpu = t->get_cpu(tt);
if (time/time_loop < 0.001) // insufficient timer resolution!
time_cpu = 1.0;
else
time_cpu = time_cpu / time;
if (time_cpu > nthreads) time_cpu = nthreads;
MPI_Allreduce(&time,&time_min,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&time,&time_max,1,MPI_DOUBLE,MPI_MAX,world);
time_sq = time*time;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
MPI_Allreduce(&time_sq,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_sq = tmp/nprocs;
MPI_Allreduce(&time_cpu,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_cpu = tmp/nprocs*100.0;
// % variance from the average as measure of load imbalance
if ((time_sq/time - time) > 1.0e-10)
time_sq = sqrt(time_sq/time - time)*100.0;
else
time_sq = 0.0;
if (me == 0) {
tmp = time/time_loop*100.0;
if (t->has_full()) {
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f\n";
if (scr)
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
if (log)
fprintf(log,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
time_loop = 100.0/time_loop;
} else {
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
if (scr)
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,tmp);
if (log)
fprintf(log,fmt,label,time_min,time,time_max,time_sq,tmp);
}
}
}
/* ---------------------------------------------------------------------- */
#ifdef LMP_USER_OMP
void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
const int nthreads,FILE *scr, FILE *log)
{
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
double time_min, time_max, time_avg, time_total, time_std;
time_min = 1.0e100;
time_max = -1.0e100;
time_total = time_avg = time_std = 0.0;
for (int i=0; i < nthreads; ++i) {
ThrData *thr = fix->get_thr(i);
double tmp=thr->get_time(which);
time_min = MIN(time_min,tmp);
time_max = MAX(time_max,tmp);
time_avg += tmp;
time_std += tmp*tmp;
time_total += thr->get_time(Timer::ALL);
}
time_avg /= nthreads;
time_std /= nthreads;
time_total /= nthreads;
if ((time_std/time_avg -time_avg) > 1.0e-10)
time_std = sqrt(time_std/time_avg - time_avg)*100.0;
else
time_std = 0.0;
if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std,
time_avg/time_total*100.0);
if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std,
time_avg/time_total*100.0);
}
#endif
Event Timeline
Log In to Comment