Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91997402
balance.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Nov 16, 11:54
Size
5 KB
Mime Type
text/x-c++
Expires
Mon, Nov 18, 11:54 (2 d)
Engine
blob
Format
Raw Data
Handle
22358923
Attached To
rLAMMPS lammps
balance.h
View Options
/***************************************************************************
balance.h
-------------------
W. Michael Brown (ORNL)
Class for host-device load balancing
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_BALANCE_H
#define LAL_BALANCE_H
#include "device.h"
#include <math.h>
#define _HD_BALANCE_EVERY 25
#define _HD_BALANCE_WEIGHT 0.5
#define _HD_BALANCE_GAP 1.10
/// Host/device load balancer
template
<
class
numtyp
,
class
acctyp
>
class
Balance
{
public:
inline
Balance
()
:
_init_done
(
false
),
_measure_this_step
(
false
)
{}
inline
~
Balance
()
{
clear
();
}
/// Clear any old data and setup for new LAMMPS run
inline
void
init
(
Device
<
numtyp
,
acctyp
>
*
gpu
,
const
bool
gpu_nbor
,
const
double
split
);
/// Clear all host and device data
inline
void
clear
()
{
if
(
_init_done
)
{
_device_time
.
clear
();
_measure_this_step
=
false
;
_init_done
=
false
;
}
}
/// Return the timestep since initialization
inline
int
timestep
()
{
return
_timestep
;
}
/// Get a count of the number of particles host will handle for initial alloc
inline
int
first_host_count
(
const
int
nlocal
,
const
double
gpu_split
,
const
bool
gpu_nbor
)
const
{
int
host_nlocal
=
0
;
if
(
gpu_nbor
&&
gpu_split
!=
1.0
)
{
if
(
gpu_split
>
0
)
host_nlocal
=
static_cast
<
int
>
(
ceil
((
1.0
-
gpu_split
)
*
nlocal
));
else
host_nlocal
=
static_cast
<
int
>
(
ceil
(
0.05
*
nlocal
));
}
return
host_nlocal
;
}
/// Return the number of particles the device will handle this timestep
inline
int
get_gpu_count
(
const
int
ago
,
const
int
inum_full
);
/// Return the average fraction of particles handled by device on all procs
inline
double
all_avg_split
()
{
if
(
_load_balance
)
{
double
_all_avg_split
=
0.0
;
MPI_Reduce
(
&
_avg_split
,
&
_all_avg_split
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
0
,
_device
->
replica
());
_all_avg_split
/=
_device
->
replica_size
();
return
_all_avg_split
/
_avg_count
;
}
else
return
_actual_split
;
}
/// If CPU neighboring, allow the device fraction to increase on 2nd timestep
inline
int
ago_first
(
int
ago
)
const
{
if
(
_avg_count
==
1
&&
_actual_split
<
_desired_split
)
ago
=
0
;
return
ago
;
}
/// Start the timer for asynchronous device execution
inline
void
start_timer
()
{
if
(
_measure_this_step
)
{
_device
->
gpu
->
sync
();
_device
->
gpu_barrier
();
_device
->
start_host_timer
();
_device_time
.
start
();
_device
->
gpu
->
sync
();
_device
->
gpu_barrier
();
}
}
/// Stop the timer for asynchronous device execution
inline
void
stop_timer
()
{
if
(
_measure_this_step
)
{
_device_time
.
stop
();
}
}
/// Calculate the new host/device split based on the cpu and device times
/** \note Only does calculation every _HD_BALANCE_EVERY timesteps
(and first 10) **/
inline
void
balance
(
const
double
cpu_time
);
/// Calls balance() and then get_gpu_count()
inline
int
balance
(
const
int
ago
,
const
int
inum_full
,
const
double
cpu_time
)
{
balance
(
cpu_time
);
return
get_gpu_count
(
ago
,
inum_full
);
}
private:
Device
<
numtyp
,
acctyp
>
*
_device
;
UCL_Timer
_device_time
;
bool
_init_done
,
_gpu_nbor
;
bool
_load_balance
;
double
_actual_split
,
_avg_split
,
_desired_split
,
_max_split
;
int
_avg_count
;
bool
_measure_this_step
;
int
_inum
,
_inum_full
,
_timestep
;
};
#define BalanceT Balance<numtyp,acctyp>
template
<
class
numtyp
,
class
acctyp
>
void
BalanceT
::
init
(
Device
<
numtyp
,
acctyp
>
*
gpu
,
const
bool
gpu_nbor
,
const
double
split
)
{
clear
();
_gpu_nbor
=
gpu_nbor
;
_init_done
=
true
;
_device
=
gpu
;
_device_time
.
init
(
*
gpu
->
gpu
);
if
(
split
<
0.0
)
{
_load_balance
=
true
;
_desired_split
=
0.90
;
}
else
{
_load_balance
=
false
;
_desired_split
=
split
;
}
_actual_split
=
_desired_split
;
_avg_split
=
0.0
;
_avg_count
=
0
;
_timestep
=
0
;
}
template
<
class
numtyp
,
class
acctyp
>
int
BalanceT
::
get_gpu_count
(
const
int
ago
,
const
int
inum_full
)
{
_measure_this_step
=
false
;
if
(
_load_balance
)
{
if
(
_avg_count
<
11
||
_timestep
%
_HD_BALANCE_EVERY
==
0
)
{
_measure_this_step
=
true
;
_inum_full
=
inum_full
;
}
if
(
ago
==
0
)
{
_actual_split
=
_desired_split
;
_max_split
=
_desired_split
;
}
}
_inum
=
static_cast
<
int
>
(
floor
(
_actual_split
*
inum_full
));
if
(
_inum
==
0
)
_inum
++
;
_timestep
++
;
return
_inum
;
}
template
<
class
numtyp
,
class
acctyp
>
void
BalanceT
::
balance
(
const
double
cpu_time
)
{
if
(
_measure_this_step
)
{
_measure_this_step
=
false
;
double
gpu_time
=
_device_time
.
seconds
();
double
max_gpu_time
;
MPI_Allreduce
(
&
gpu_time
,
&
max_gpu_time
,
1
,
MPI_DOUBLE
,
MPI_MAX
,
_device
->
gpu_comm
());
if
(
_inum_full
==
_inum
)
{
_desired_split
=
1.0
;
return
;
}
double
cpu_time_per_atom
=
cpu_time
/
(
_inum_full
-
_inum
);
double
cpu_other_time
=
_device
->
host_time
()
-
cpu_time
;
int
host_inum
=
static_cast
<
int
>
((
max_gpu_time
-
cpu_other_time
)
/
cpu_time_per_atom
);
double
split
=
static_cast
<
double
>
(
_inum_full
-
host_inum
)
/
_inum_full
;
_desired_split
=
split
*
_HD_BALANCE_GAP
;
if
(
_desired_split
>
1.0
)
_desired_split
=
1.0
;
if
(
_desired_split
<
0.0
)
_desired_split
=
0.0
;
if
(
!
_gpu_nbor
)
{
if
(
_desired_split
<
_max_split
)
_actual_split
=
_desired_split
;
else
_actual_split
=
_max_split
;
}
//std::cout << gpu_time << " " << max_gpu_time << " " << cpu_other_time << " " << cpu_time_per_atom << " " << cpu_time << " " << _desired_split << " " << host_inum << std::endl;
}
_avg_split
+=
_desired_split
;
_avg_count
++
;
}
#endif
Event Timeline
Log In to Comment