Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93932501
Explicit.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Dec 2, 14:56
Size
15 KB
Mime Type
text/x-c
Expires
Wed, Dec 4, 14:56 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
22639305
Attached To
rLAMMPS lammps
Explicit.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef EXPLICIT_DRIVER_HPP
#define EXPLICIT_DRIVER_HPP
#include <sys/time.h>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <ExplicitFunctors.hpp>
//----------------------------------------------------------------------------
namespace
Explicit
{
struct
PerformanceData
{
double
mesh_time
;
double
init_time
;
double
internal_force_time
;
double
central_diff
;
double
comm_time
;
size_t
number_of_steps
;
PerformanceData
()
:
mesh_time
(
0
)
,
init_time
(
0
)
,
internal_force_time
(
0
)
,
central_diff
(
0
)
,
comm_time
(
0
)
,
number_of_steps
(
0
)
{}
void
best
(
const
PerformanceData
&
rhs
)
{
if
(
rhs
.
mesh_time
<
mesh_time
)
mesh_time
=
rhs
.
mesh_time
;
if
(
rhs
.
init_time
<
init_time
)
init_time
=
rhs
.
init_time
;
if
(
rhs
.
internal_force_time
<
internal_force_time
)
internal_force_time
=
rhs
.
internal_force_time
;
if
(
rhs
.
central_diff
<
central_diff
)
central_diff
=
rhs
.
central_diff
;
if
(
rhs
.
comm_time
<
comm_time
)
comm_time
=
rhs
.
comm_time
;
}
};
template
<
typename
Scalar
,
class
FixtureType
>
PerformanceData
run
(
const
typename
FixtureType
::
FEMeshType
&
mesh
,
const
int
global_max_x
,
const
int
global_max_y
,
const
int
global_max_z
,
const
int
steps
,
const
int
print_sample
)
{
typedef
Scalar
scalar_type
;
typedef
FixtureType
fixture_type
;
typedef
typename
fixture_type
::
execution_space
execution_space
;
//typedef typename fixture_type::FEMeshType mesh_type ; // unused
enum
{
ElementNodeCount
=
fixture_type
::
element_node_count
};
const
int
NumStates
=
2
;
const
int
total_num_steps
=
steps
;
const
Scalar
user_dt
=
5.0e-6
;
//const Scalar end_time = 0.0050;
// element block parameters
const
Scalar
lin_bulk_visc
=
0.0
;
const
Scalar
quad_bulk_visc
=
0.0
;
// const Scalar lin_bulk_visc = 0.06;
// const Scalar quad_bulk_visc = 1.2;
// const Scalar hg_stiffness = 0.0;
// const Scalar hg_viscosity = 0.0;
// const Scalar hg_stiffness = 0.03;
// const Scalar hg_viscosity = 0.001;
// material properties
const
Scalar
youngs_modulus
=
1.0e6
;
const
Scalar
poissons_ratio
=
0.0
;
const
Scalar
density
=
8.0e-4
;
const
comm
::
Machine
machine
=
mesh
.
parallel_data_map
.
machine
;
PerformanceData
perf_data
;
Kokkos
::
Timer
wall_clock
;
//------------------------------------
// Generate fields
typedef
Fields
<
scalar_type
,
execution_space
>
fields_type
;
fields_type
mesh_fields
(
mesh
,
lin_bulk_visc
,
quad_bulk_visc
,
youngs_modulus
,
poissons_ratio
,
density
);
typename
fields_type
::
node_coords_type
::
HostMirror
model_coords_h
=
Kokkos
::
create_mirror
(
mesh_fields
.
model_coords
);
typename
fields_type
::
geom_state_array_type
::
HostMirror
displacement_h
=
Kokkos
::
create_mirror
(
mesh_fields
.
displacement
);
typename
fields_type
::
geom_state_array_type
::
HostMirror
velocity_h
=
Kokkos
::
create_mirror
(
mesh_fields
.
velocity
);
Kokkos
::
deep_copy
(
model_coords_h
,
mesh_fields
.
model_coords
);
//------------------------------------
// Initialization
initialize_element
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
);
initialize_node
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
);
const
Scalar
x_bc
=
global_max_x
;
// Initial condition on velocity to initiate a pulse along the X axis
{
const
unsigned
X
=
0
;
for
(
int
inode
=
0
;
inode
<
mesh_fields
.
num_nodes
;
++
inode
)
{
if
(
model_coords_h
(
inode
,
X
)
==
0
)
{
velocity_h
(
inode
,
X
,
0
)
=
1.0e3
;
velocity_h
(
inode
,
X
,
1
)
=
1.0e3
;
}
}
}
Kokkos
::
deep_copy
(
mesh_fields
.
velocity
,
velocity_h
);
//--------------------------------------------------------------------------
// We will call a sequence of functions. These functions have been
// grouped into several functors to balance the number of global memory
// accesses versus requiring too many registers or too much L1 cache.
// Global memory accees have read/write cost and memory subsystem contention cost.
//--------------------------------------------------------------------------
perf_data
.
init_time
=
comm
::
max
(
machine
,
wall_clock
.
seconds
()
);
// Parameters required for the internal force computations.
int
current_state
=
0
;
int
previous_state
=
0
;
int
next_state
=
0
;
perf_data
.
number_of_steps
=
total_num_steps
;
#if defined( KOKKOS_HAVE_MPI )
typedef
typename
fields_type
::
geom_state_array_type
::
value_type
comm_value_type
;
const
unsigned
comm_value_count
=
6
;
Kokkos
::
AsyncExchange
<
comm_value_type
,
execution_space
,
Kokkos
::
ParallelDataMap
>
comm_exchange
(
mesh
.
parallel_data_map
,
comm_value_count
);
#endif
for
(
int
step
=
0
;
step
<
total_num_steps
;
++
step
)
{
wall_clock
.
reset
();
//------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
{
// Communicate "send" nodes' displacement and velocity next_state
// to the ghosted nodes.
// buffer packages: { { dx , dy , dz , vx , vy , vz }_node }
pack_state
<
Scalar
,
execution_space
>
::
apply
(
comm_exchange
.
buffer
()
,
mesh
.
parallel_data_map
.
count_interior
,
mesh
.
parallel_data_map
.
count_send
,
mesh_fields
,
next_state
);
comm_exchange
.
setup
();
comm_exchange
.
send_receive
();
unpack_state
<
Scalar
,
execution_space
>
::
apply
(
mesh_fields
,
next_state
,
comm_exchange
.
buffer
()
,
mesh
.
parallel_data_map
.
count_owned
,
mesh
.
parallel_data_map
.
count_receive
);
execution_space
::
fence
();
}
#endif
perf_data
.
comm_time
+=
comm
::
max
(
machine
,
wall_clock
.
seconds
()
);
//------------------------------------------------------------------------
// rotate the states
previous_state
=
current_state
;
current_state
=
next_state
;
++
next_state
;
next_state
%=
NumStates
;
wall_clock
.
reset
();
// First kernel 'grad_hgop' combines two functions:
// gradient, velocity gradient
grad
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
,
current_state
,
previous_state
);
// Combine tensor decomposition and rotation functions.
decomp_rotate
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
,
current_state
,
previous_state
);
internal_force
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
,
user_dt
,
current_state
);
execution_space
::
fence
();
perf_data
.
internal_force_time
+=
comm
::
max
(
machine
,
wall_clock
.
seconds
()
);
wall_clock
.
reset
();
// Assembly of elements' contributions to nodal force into
// a nodal force vector. Update the accelerations, velocities,
// displacements.
// The same pattern can be used for matrix-free residual computations.
nodal_step
<
Scalar
,
execution_space
>::
apply
(
mesh_fields
,
x_bc
,
current_state
,
next_state
);
execution_space
::
fence
();
perf_data
.
central_diff
+=
comm
::
max
(
machine
,
wall_clock
.
seconds
()
);
if
(
print_sample
&&
0
==
step
%
100
)
{
Kokkos
::
deep_copy
(
displacement_h
,
mesh_fields
.
displacement
);
Kokkos
::
deep_copy
(
velocity_h
,
mesh_fields
.
velocity
);
if
(
1
==
print_sample
)
{
std
::
cout
<<
"step "
<<
step
<<
" : displacement(*,0,0) ="
;
for
(
int
i
=
0
;
i
<
mesh_fields
.
num_nodes_owned
;
++
i
)
{
if
(
model_coords_h
(
i
,
1
)
==
0
&&
model_coords_h
(
i
,
2
)
==
0
)
{
std
::
cout
<<
" "
<<
displacement_h
(
i
,
0
,
next_state
);
}
}
std
::
cout
<<
std
::
endl
;
const
float
tol
=
1.0e-6
;
const
int
yb
=
global_max_y
;
const
int
zb
=
global_max_z
;
std
::
cout
<<
"step "
<<
step
<<
" : displacement(*,"
<<
yb
<<
","
<<
zb
<<
") ="
;
for
(
int
i
=
0
;
i
<
mesh_fields
.
num_nodes_owned
;
++
i
)
{
if
(
fabs
(
model_coords_h
(
i
,
1
)
-
yb
)
<
tol
&&
fabs
(
model_coords_h
(
i
,
2
)
-
zb
)
<
tol
)
{
std
::
cout
<<
" "
<<
displacement_h
(
i
,
0
,
next_state
);
}
}
std
::
cout
<<
std
::
endl
;
}
else
if
(
2
==
print_sample
)
{
const
float
tol
=
1.0e-6
;
const
int
xb
=
global_max_x
/
2
;
const
int
yb
=
global_max_y
/
2
;
const
int
zb
=
global_max_z
/
2
;
for
(
int
i
=
0
;
i
<
mesh_fields
.
num_nodes_owned
;
++
i
)
{
if
(
fabs
(
model_coords_h
(
i
,
0
)
-
xb
)
<
tol
&&
fabs
(
model_coords_h
(
i
,
1
)
-
yb
)
<
tol
&&
fabs
(
model_coords_h
(
i
,
2
)
-
zb
)
<
tol
)
{
std
::
cout
<<
"step "
<<
step
<<
" : displacement("
<<
xb
<<
","
<<
yb
<<
","
<<
zb
<<
") = {"
<<
std
::
setprecision
(
6
)
<<
" "
<<
displacement_h
(
i
,
0
,
next_state
)
<<
std
::
setprecision
(
2
)
<<
" "
<<
displacement_h
(
i
,
1
,
next_state
)
<<
std
::
setprecision
(
2
)
<<
" "
<<
displacement_h
(
i
,
2
,
next_state
)
<<
" }"
<<
std
::
endl
;
}
}
}
}
}
return
perf_data
;
}
template
<
typename
Scalar
,
typename
Device
>
static
void
driver
(
const
char
*
const
label
,
comm
::
Machine
machine
,
const
int
gang_count
,
const
int
elem_count_beg
,
const
int
elem_count_end
,
const
int
runs
)
{
typedef
Scalar
scalar_type
;
typedef
Device
execution_space
;
typedef
double
coordinate_scalar_type
;
typedef
FixtureElementHex8
fixture_element_type
;
typedef
BoxMeshFixture
<
coordinate_scalar_type
,
execution_space
,
fixture_element_type
>
fixture_type
;
typedef
typename
fixture_type
::
FEMeshType
mesh_type
;
const
size_t
proc_count
=
comm
::
size
(
machine
);
const
size_t
proc_rank
=
comm
::
rank
(
machine
);
const
int
space
=
15
;
const
int
steps
=
1000
;
const
int
print_sample
=
0
;
if
(
comm
::
rank
(
machine
)
==
0
)
{
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"
\"
MiniExplicitDynamics with Kokkos "
<<
label
<<
" time_steps("
<<
steps
<<
")"
<<
"
\"
"
<<
std
::
endl
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
Element
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
Node
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
Initialize
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
ElemForce
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
NodeUpdate
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
NodeComm
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
Time/Elem
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
Time/Node
\"
"
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
count
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
count
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
, "
;
std
::
cout
<<
std
::
left
<<
std
::
setw
(
space
)
<<
"
\"
microsec
\"
"
;
std
::
cout
<<
std
::
endl
;
}
for
(
int
i
=
elem_count_beg
;
i
<
elem_count_end
;
i
*=
2
)
{
const
int
iz
=
std
::
max
(
1
,
(
int
)
cbrt
(
((
double
)
i
)
/
2.0
)
);
const
int
iy
=
iz
+
1
;
const
int
ix
=
2
*
iy
;
const
int
nelem
=
ix
*
iy
*
iz
;
const
int
nnode
=
(
ix
+
1
)
*
(
iy
+
1
)
*
(
iz
+
1
);
mesh_type
mesh
=
fixture_type
::
create
(
proc_count
,
proc_rank
,
gang_count
,
ix
,
iy
,
iz
);
mesh
.
parallel_data_map
.
machine
=
machine
;
PerformanceData
perf
,
best
;
for
(
int
j
=
0
;
j
<
runs
;
j
++
){
perf
=
run
<
scalar_type
,
fixture_type
>
(
mesh
,
ix
,
iy
,
iz
,
steps
,
print_sample
);
if
(
j
==
0
)
{
best
=
perf
;
}
else
{
best
.
best
(
perf
);
}
}
if
(
comm
::
rank
(
machine
)
==
0
)
{
double
time_per_element
=
(
best
.
internal_force_time
)
/
(
nelem
*
perf
.
number_of_steps
);
double
time_per_node
=
(
best
.
comm_time
+
best
.
central_diff
)
/
(
nnode
*
perf
.
number_of_steps
);
std
::
cout
<<
std
::
setw
(
space
-
3
)
<<
nelem
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
nnode
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
best
.
number_of_steps
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
best
.
init_time
*
1000000
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
(
best
.
internal_force_time
*
1000000
)
/
best
.
number_of_steps
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
(
best
.
central_diff
*
1000000
)
/
best
.
number_of_steps
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
(
best
.
comm_time
*
1000000
)
/
best
.
number_of_steps
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
time_per_element
*
1000000
<<
" , "
<<
std
::
setw
(
space
-
3
)
<<
time_per_node
*
1000000
<<
std
::
endl
;
}
}
}
}
// namespace Explicit
#endif
/* #ifndef EXPLICIT_DRIVER_HPP */
Event Timeline
Log In to Comment