Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93932501
Explicit.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Dec 2, 14:56
Size
15 KB
Mime Type
text/x-c
Expires
Wed, Dec 4, 14:56 (2 d)
Engine
blob
Format
Raw Data
Handle
22639305
Attached To
rLAMMPS lammps
Explicit.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef EXPLICIT_DRIVER_HPP
#define EXPLICIT_DRIVER_HPP
#include <sys/time.h>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <ExplicitFunctors.hpp>
//----------------------------------------------------------------------------
namespace Explicit {
struct PerformanceData {
double mesh_time ;
double init_time ;
double internal_force_time ;
double central_diff ;
double comm_time ;
size_t number_of_steps ;
PerformanceData()
: mesh_time(0)
, init_time(0)
, internal_force_time(0)
, central_diff(0)
, comm_time(0)
, number_of_steps(0)
{}
void best( const PerformanceData & rhs )
{
if ( rhs.mesh_time < mesh_time ) mesh_time = rhs.mesh_time ;
if ( rhs.init_time < init_time ) init_time = rhs.init_time ;
if ( rhs.internal_force_time < internal_force_time ) internal_force_time = rhs.internal_force_time ;
if ( rhs.central_diff < central_diff ) central_diff = rhs.central_diff ;
if ( rhs.comm_time < comm_time ) comm_time = rhs.comm_time ;
}
};
template< typename Scalar , class FixtureType >
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
const int global_max_x ,
const int global_max_y ,
const int global_max_z ,
const int steps ,
const int print_sample )
{
typedef Scalar scalar_type ;
typedef FixtureType fixture_type ;
typedef typename fixture_type::execution_space execution_space ;
//typedef typename fixture_type::FEMeshType mesh_type ; // unused
enum { ElementNodeCount = fixture_type::element_node_count };
const int NumStates = 2;
const int total_num_steps = steps ;
const Scalar user_dt = 5.0e-6;
//const Scalar end_time = 0.0050;
// element block parameters
const Scalar lin_bulk_visc = 0.0;
const Scalar quad_bulk_visc = 0.0;
// const Scalar lin_bulk_visc = 0.06;
// const Scalar quad_bulk_visc = 1.2;
// const Scalar hg_stiffness = 0.0;
// const Scalar hg_viscosity = 0.0;
// const Scalar hg_stiffness = 0.03;
// const Scalar hg_viscosity = 0.001;
// material properties
const Scalar youngs_modulus=1.0e6;
const Scalar poissons_ratio=0.0;
const Scalar density = 8.0e-4;
const comm::Machine machine = mesh.parallel_data_map.machine ;
PerformanceData perf_data ;
Kokkos::Timer wall_clock ;
//------------------------------------
// Generate fields
typedef Fields< scalar_type , execution_space > fields_type ;
fields_type mesh_fields( mesh ,
lin_bulk_visc ,
quad_bulk_visc ,
youngs_modulus ,
poissons_ratio ,
density );
typename fields_type::node_coords_type::HostMirror
model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords );
typename fields_type::geom_state_array_type::HostMirror
displacement_h = Kokkos::create_mirror( mesh_fields.displacement );
typename fields_type::geom_state_array_type::HostMirror
velocity_h = Kokkos::create_mirror( mesh_fields.velocity );
Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords );
//------------------------------------
// Initialization
initialize_element<Scalar,execution_space>::apply( mesh_fields );
initialize_node< Scalar,execution_space>::apply( mesh_fields );
const Scalar x_bc = global_max_x ;
// Initial condition on velocity to initiate a pulse along the X axis
{
const unsigned X = 0;
for (int inode = 0; inode< mesh_fields.num_nodes; ++inode) {
if ( model_coords_h(inode,X) == 0) {
velocity_h(inode,X,0) = 1.0e3;
velocity_h(inode,X,1) = 1.0e3;
}
}
}
Kokkos::deep_copy( mesh_fields.velocity , velocity_h );
//--------------------------------------------------------------------------
// We will call a sequence of functions. These functions have been
// grouped into several functors to balance the number of global memory
// accesses versus requiring too many registers or too much L1 cache.
// Global memory accees have read/write cost and memory subsystem contention cost.
//--------------------------------------------------------------------------
perf_data.init_time = comm::max( machine , wall_clock.seconds() );
// Parameters required for the internal force computations.
int current_state = 0;
int previous_state = 0;
int next_state = 0;
perf_data.number_of_steps = total_num_steps ;
#if defined( KOKKOS_HAVE_MPI )
typedef typename
fields_type::geom_state_array_type::value_type comm_value_type ;
const unsigned comm_value_count = 6 ;
Kokkos::AsyncExchange< comm_value_type , execution_space ,
Kokkos::ParallelDataMap >
comm_exchange( mesh.parallel_data_map , comm_value_count );
#endif
for (int step = 0; step < total_num_steps; ++step) {
wall_clock.reset();
//------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
{
// Communicate "send" nodes' displacement and velocity next_state
// to the ghosted nodes.
// buffer packages: { { dx , dy , dz , vx , vy , vz }_node }
pack_state< Scalar , execution_space >
::apply( comm_exchange.buffer() ,
mesh.parallel_data_map.count_interior ,
mesh.parallel_data_map.count_send ,
mesh_fields , next_state );
comm_exchange.setup();
comm_exchange.send_receive();
unpack_state< Scalar , execution_space >
::apply( mesh_fields , next_state ,
comm_exchange.buffer() ,
mesh.parallel_data_map.count_owned ,
mesh.parallel_data_map.count_receive );
execution_space::fence();
}
#endif
perf_data.comm_time += comm::max( machine , wall_clock.seconds() );
//------------------------------------------------------------------------
// rotate the states
previous_state = current_state;
current_state = next_state;
++next_state;
next_state %= NumStates;
wall_clock.reset();
// First kernel 'grad_hgop' combines two functions:
// gradient, velocity gradient
grad< Scalar , execution_space >::apply( mesh_fields ,
current_state ,
previous_state );
// Combine tensor decomposition and rotation functions.
decomp_rotate< Scalar , execution_space >::apply( mesh_fields ,
current_state ,
previous_state );
internal_force< Scalar , execution_space >::apply( mesh_fields ,
user_dt ,
current_state );
execution_space::fence();
perf_data.internal_force_time +=
comm::max( machine , wall_clock.seconds() );
wall_clock.reset();
// Assembly of elements' contributions to nodal force into
// a nodal force vector. Update the accelerations, velocities,
// displacements.
// The same pattern can be used for matrix-free residual computations.
nodal_step< Scalar , execution_space >::apply( mesh_fields ,
x_bc ,
current_state,
next_state );
execution_space::fence();
perf_data.central_diff +=
comm::max( machine , wall_clock.seconds() );
if ( print_sample && 0 == step % 100 ) {
Kokkos::deep_copy( displacement_h , mesh_fields.displacement );
Kokkos::deep_copy( velocity_h , mesh_fields.velocity );
if ( 1 == print_sample ) {
std::cout << "step " << step
<< " : displacement(*,0,0) =" ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) {
std::cout << " " << displacement_h(i,0,next_state);
}
}
std::cout << std::endl ;
const float tol = 1.0e-6 ;
const int yb = global_max_y ;
const int zb = global_max_z ;
std::cout << "step " << step
<< " : displacement(*," << yb << "," << zb << ") =" ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( fabs( model_coords_h(i,1) - yb ) < tol &&
fabs( model_coords_h(i,2) - zb ) < tol ) {
std::cout << " " << displacement_h(i,0,next_state);
}
}
std::cout << std::endl ;
}
else if ( 2 == print_sample ) {
const float tol = 1.0e-6 ;
const int xb = global_max_x / 2 ;
const int yb = global_max_y / 2 ;
const int zb = global_max_z / 2 ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( fabs( model_coords_h(i,0) - xb ) < tol &&
fabs( model_coords_h(i,1) - yb ) < tol &&
fabs( model_coords_h(i,2) - zb ) < tol ) {
std::cout << "step " << step
<< " : displacement("
<< xb << "," << yb << "," << zb << ") = {"
<< std::setprecision(6)
<< " " << displacement_h(i,0,next_state)
<< std::setprecision(2)
<< " " << displacement_h(i,1,next_state)
<< std::setprecision(2)
<< " " << displacement_h(i,2,next_state)
<< " }" << std::endl ;
}
}
}
}
}
return perf_data ;
}
template <typename Scalar, typename Device>
static void driver( const char * const label ,
comm::Machine machine ,
const int gang_count ,
const int elem_count_beg ,
const int elem_count_end ,
const int runs )
{
typedef Scalar scalar_type ;
typedef Device execution_space ;
typedef double coordinate_scalar_type ;
typedef FixtureElementHex8 fixture_element_type ;
typedef BoxMeshFixture< coordinate_scalar_type ,
execution_space ,
fixture_element_type > fixture_type ;
typedef typename fixture_type::FEMeshType mesh_type ;
const size_t proc_count = comm::size( machine );
const size_t proc_rank = comm::rank( machine );
const int space = 15 ;
const int steps = 1000 ;
const int print_sample = 0 ;
if ( comm::rank( machine ) == 0 ) {
std::cout << std::endl ;
std::cout << "\"MiniExplicitDynamics with Kokkos " << label
<< " time_steps(" << steps << ")"
<< "\"" << std::endl;
std::cout << std::left << std::setw(space) << "\"Element\" , ";
std::cout << std::left << std::setw(space) << "\"Node\" , ";
std::cout << std::left << std::setw(space) << "\"Initialize\" , ";
std::cout << std::left << std::setw(space) << "\"ElemForce\" , ";
std::cout << std::left << std::setw(space) << "\"NodeUpdate\" , ";
std::cout << std::left << std::setw(space) << "\"NodeComm\" , ";
std::cout << std::left << std::setw(space) << "\"Time/Elem\" , ";
std::cout << std::left << std::setw(space) << "\"Time/Node\"";
std::cout << std::endl;
std::cout << std::left << std::setw(space) << "\"count\" , ";
std::cout << std::left << std::setw(space) << "\"count\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\"";
std::cout << std::endl;
}
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
{
const int iz = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
const int iy = iz + 1 ;
const int ix = 2 * iy ;
const int nelem = ix * iy * iz ;
const int nnode = ( ix + 1 ) * ( iy + 1 ) * ( iz + 1 );
mesh_type mesh =
fixture_type::create( proc_count , proc_rank , gang_count ,
ix , iy , iz );
mesh.parallel_data_map.machine = machine ;
PerformanceData perf , best ;
for(int j = 0; j < runs; j++){
perf = run<scalar_type,fixture_type>(mesh,ix,iy,iz,steps,print_sample);
if( j == 0 ) {
best = perf ;
}
else {
best.best( perf );
}
}
if ( comm::rank( machine ) == 0 ) {
double time_per_element =
( best.internal_force_time ) / ( nelem * perf.number_of_steps );
double time_per_node =
( best.comm_time + best.central_diff ) / ( nnode * perf.number_of_steps );
std::cout << std::setw(space-3) << nelem << " , "
<< std::setw(space-3) << nnode << " , "
<< std::setw(space-3) << best.number_of_steps << " , "
<< std::setw(space-3) << best.init_time * 1000000 << " , "
<< std::setw(space-3)
<< ( best.internal_force_time * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3)
<< ( best.central_diff * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3)
<< ( best.comm_time * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3) << time_per_element * 1000000 << " , "
<< std::setw(space-3) << time_per_node * 1000000
<< std::endl ;
}
}
}
} // namespace Explicit
#endif /* #ifndef EXPLICIT_DRIVER_HPP */
Event Timeline
Log In to Comment