ExplicitFunctors.hpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Mon, Nov 4, 08:48

ExplicitFunctors.hpp
View Options

	/*
	//@HEADER
	// ************************************************************************
	//
	// Kokkos v. 2.0
	// Copyright (2014) Sandia Corporation
	//
	// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
	// the U.S. Government retains certain rights in this software.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// 1. Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the Corporation nor the names of the
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
	// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	//
	// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
	//
	// ************************************************************************
	//@HEADER
	*/

	#ifndef KOKKOS_EXPLICITFUNCTORS_HPP
	#define KOKKOS_EXPLICITFUNCTORS_HPP

	#include <cmath>
	#include <Kokkos_Core.hpp>
	#include <FEMesh.hpp>

	namespace Explicit {

	template<typename Scalar , class Device >
	struct Fields {

	static const int NumStates = 2 ;
	static const int SpatialDim = 3 ;
	static const int ElemNodeCount = 8 ;

	// Indices for full 3x3 tensor:

	static const int K_F_XX = 0 ;
	static const int K_F_YY = 1 ;
	static const int K_F_ZZ = 2 ;
	static const int K_F_XY = 3 ;
	static const int K_F_YZ = 4 ;
	static const int K_F_ZX = 5 ;
	static const int K_F_YX = 6 ;
	static const int K_F_ZY = 7 ;
	static const int K_F_XZ = 8 ;

	// Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector

	static const int K_S_XX = 0 ;
	static const int K_S_YY = 1 ;
	static const int K_S_ZZ = 2 ;
	static const int K_S_XY = 3 ;
	static const int K_S_YZ = 4 ;
	static const int K_S_ZX = 5 ;
	static const int K_S_YX = 3 ;
	static const int K_S_ZY = 4 ;
	static const int K_S_XZ = 5 ;

	// Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector

	static const int K_V_XY = 0 ;
	static const int K_V_YZ = 1 ;
	static const int K_V_ZX = 2 ;


	typedef Device execution_space ;
	typedef typename execution_space::size_type size_type ;

	typedef HybridFEM::FEMesh<double,ElemNodeCount,execution_space> FEMesh ;

	typedef typename FEMesh::node_coords_type node_coords_type ;
	typedef typename FEMesh::elem_node_ids_type elem_node_ids_type ;
	typedef typename FEMesh::node_elem_ids_type node_elem_ids_type ;
	typedef typename Kokkos::ParallelDataMap parallel_data_map ;

	typedef Kokkos::View< double[][ SpatialDim ][ NumStates ] , execution_space > geom_state_array_type ;
	typedef Kokkos::View< Scalar[][ SpatialDim ] , execution_space > geom_array_type ;
	typedef Kokkos::View< Scalar[] , execution_space > array_type ;
	typedef Kokkos::View< Scalar , execution_space > scalar_type ;

	typedef Kokkos::View< Scalar[][ 6 ] , execution_space > elem_sym_tensor_type ;
	typedef Kokkos::View< Scalar[][ 9 ] , execution_space > elem_tensor_type ;
	typedef Kokkos::View< Scalar[][ 9 ][ NumStates ] , execution_space > elem_tensor_state_type ;
	typedef Kokkos::View< Scalar[][ SpatialDim ][ ElemNodeCount ] , execution_space > elem_node_geom_type ;

	// Parameters:
	const int num_nodes ;
	const int num_nodes_owned ;
	const int num_elements ;

	const Scalar lin_bulk_visc;
	const Scalar quad_bulk_visc;
	const Scalar two_mu;
	const Scalar bulk_modulus;
	const Scalar density;

	// Mesh:
	const elem_node_ids_type elem_node_connectivity ;
	const node_elem_ids_type node_elem_connectivity ;
	const node_coords_type model_coords ;

	// Compute:
	const scalar_type dt ;
	const scalar_type prev_dt ;
	const geom_state_array_type displacement ;
	const geom_state_array_type velocity ;
	const geom_array_type acceleration ;
	const geom_array_type internal_force ;
	const array_type nodal_mass ;
	const array_type elem_mass ;
	const array_type internal_energy ;
	const elem_sym_tensor_type stress_new ;
	const elem_tensor_state_type rotation ;
	const elem_node_geom_type element_force ;
	const elem_tensor_type vel_grad ;
	const elem_sym_tensor_type stretch ;
	const elem_sym_tensor_type rot_stretch ;

	Fields(
	const FEMesh & mesh,
	Scalar arg_lin_bulk_visc,
	Scalar arg_quad_bulk_visc,
	Scalar youngs_modulus,
	Scalar poissons_ratio,
	Scalar arg_density )
	: num_nodes( mesh.parallel_data_map.count_owned +
	mesh.parallel_data_map.count_receive )
	, num_nodes_owned( mesh.parallel_data_map.count_owned )
	, num_elements( mesh.elem_node_ids.dimension_0() )
	, lin_bulk_visc( arg_lin_bulk_visc )
	, quad_bulk_visc( arg_quad_bulk_visc )
	, two_mu(youngs_modulus/(1.0+poissons_ratio))
	, bulk_modulus(youngs_modulus/(3(1.0-2.0poissons_ratio)))
	, density(arg_density)

	// mesh

	, elem_node_connectivity( mesh.elem_node_ids ) // ( num_elements , ElemNodeCount )
	, node_elem_connectivity( mesh.node_elem_ids ) // ( num_nodes , ... )
	, model_coords( mesh.node_coords ) // ( num_nodes , 3 )

	// compute with input/output

	, dt( "dt" )
	, prev_dt( "prev_dt" )
	, displacement( "displacement" , num_nodes )
	, velocity( "velocity" , num_nodes )
	, acceleration( "acceleration" , num_nodes_owned )
	, internal_force( "internal_force" , num_nodes_owned )
	, nodal_mass( "nodal_mass" , num_nodes_owned )
	, elem_mass( "elem_mass" , num_elements )
	, internal_energy( "internal_energy" , num_elements )
	, stress_new( "stress_new" , num_elements )

	// temporary arrays

	, rotation( "rotation" , num_elements )
	, element_force( "element_force" , num_elements )
	, vel_grad( "vel_grad" , num_elements )
	, stretch( "stretch" , num_elements )
	, rot_stretch( "rot_stretch" , num_elements )
	{ }
	};


	//----------------------------------------------------------------------------

	template< typename Scalar , class DeviceType >
	KOKKOS_INLINE_FUNCTION
	Scalar dot8( const Scalar * a , const Scalar * b )
	{ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] +
	a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; }

	template< typename Scalar , class DeviceType >
	KOKKOS_INLINE_FUNCTION
	void comp_grad( const Scalar * const x ,
	const Scalar * const y ,
	const Scalar * const z,
	Scalar * const grad_x ,
	Scalar * const grad_y ,
	Scalar * const grad_z )
	{
	// calc X difference vectors

	Scalar R42=(x[3] - x[1]);
	Scalar R52=(x[4] - x[1]);
	Scalar R54=(x[4] - x[3]);

	Scalar R63=(x[5] - x[2]);
	Scalar R83=(x[7] - x[2]);
	Scalar R86=(x[7] - x[5]);

	Scalar R31=(x[2] - x[0]);
	Scalar R61=(x[5] - x[0]);
	Scalar R74=(x[6] - x[3]);

	Scalar R72=(x[6] - x[1]);
	Scalar R75=(x[6] - x[4]);
	Scalar R81=(x[7] - x[0]);

	Scalar t1=(R63 + R54);
	Scalar t2=(R61 + R74);
	Scalar t3=(R72 + R81);

	Scalar t4 =(R86 + R42);
	Scalar t5 =(R83 + R52);
	Scalar t6 =(R75 + R31);

	// Calculate Y gradient from X and Z data

	grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
	grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
	grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
	grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
	grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
	grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
	grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
	grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);

	// calc Z difference vectors

	R42=(z[3] - z[1]);
	R52=(z[4] - z[1]);
	R54=(z[4] - z[3]);

	R63=(z[5] - z[2]);
	R83=(z[7] - z[2]);
	R86=(z[7] - z[5]);

	R31=(z[2] - z[0]);
	R61=(z[5] - z[0]);
	R74=(z[6] - z[3]);

	R72=(z[6] - z[1]);
	R75=(z[6] - z[4]);
	R81=(z[7] - z[0]);

	t1=(R63 + R54);
	t2=(R61 + R74);
	t3=(R72 + R81);

	t4 =(R86 + R42);
	t5 =(R83 + R52);
	t6 =(R75 + R31);

	// Calculate X gradient from Y and Z data

	grad_x[0] = (y[1] * t1) - (y[2] * R42) - (y[3] * t5) + (y[4] * t4) + (y[5] * R52) - (y[7] * R54);
	grad_x[1] = (y[2] * t2) + (y[3] * R31) - (y[0] * t1) - (y[5] * t6) + (y[6] * R63) - (y[4] * R61);
	grad_x[2] = (y[3] * t3) + (y[0] * R42) - (y[1] * t2) - (y[6] * t4) + (y[7] * R74) - (y[5] * R72);
	grad_x[3] = (y[0] * t5) - (y[1] * R31) - (y[2] * t3) + (y[7] * t6) + (y[4] * R81) - (y[6] * R83);
	grad_x[4] = (y[5] * t3) + (y[6] * R86) - (y[7] * t2) - (y[0] * t4) - (y[3] * R81) + (y[1] * R61);
	grad_x[5] = (y[6] * t5) - (y[4] * t3) - (y[7] * R75) + (y[1] * t6) - (y[0] * R52) + (y[2] * R72);
	grad_x[6] = (y[7] * t1) - (y[5] * t5) - (y[4] * R86) + (y[2] * t4) - (y[1] * R63) + (y[3] * R83);
	grad_x[7] = (y[4] * t2) - (y[6] * t1) + (y[5] * R75) - (y[3] * t6) - (y[2] * R74) + (y[0] * R54);

	// calc Y difference vectors

	R42=(y[3] - y[1]);
	R52=(y[4] - y[1]);
	R54=(y[4] - y[3]);

	R63=(y[5] - y[2]);
	R83=(y[7] - y[2]);
	R86=(y[7] - y[5]);

	R31=(y[2] - y[0]);
	R61=(y[5] - y[0]);
	R74=(y[6] - y[3]);

	R72=(y[6] - y[1]);
	R75=(y[6] - y[4]);
	R81=(y[7] - y[0]);

	t1=(R63 + R54);
	t2=(R61 + R74);
	t3=(R72 + R81);

	t4 =(R86 + R42);
	t5 =(R83 + R52);
	t6 =(R75 + R31);

	// Calculate Z gradient from X and Y data

	grad_z[0] = (x[1] * t1) - (x[2] * R42) - (x[3] * t5) + (x[4] * t4) + (x[5] * R52) - (x[7] * R54);
	grad_z[1] = (x[2] * t2) + (x[3] * R31) - (x[0] * t1) - (x[5] * t6) + (x[6] * R63) - (x[4] * R61);
	grad_z[2] = (x[3] * t3) + (x[0] * R42) - (x[1] * t2) - (x[6] * t4) + (x[7] * R74) - (x[5] * R72);
	grad_z[3] = (x[0] * t5) - (x[1] * R31) - (x[2] * t3) + (x[7] * t6) + (x[4] * R81) - (x[6] * R83);
	grad_z[4] = (x[5] * t3) + (x[6] * R86) - (x[7] * t2) - (x[0] * t4) - (x[3] * R81) + (x[1] * R61);
	grad_z[5] = (x[6] * t5) - (x[4] * t3) - (x[7] * R75) + (x[1] * t6) - (x[0] * R52) + (x[2] * R72);
	grad_z[6] = (x[7] * t1) - (x[5] * t5) - (x[4] * R86) + (x[2] * t4) - (x[1] * R63) + (x[3] * R83);
	grad_z[7] = (x[4] * t2) - (x[6] * t1) + (x[5] * R75) - (x[3] * t6) - (x[2] * R74) + (x[0] * R54);
	}

	//----------------------------------------------------------------------------

	template< typename Scalar , class DeviceType >
	struct initialize_element
	{
	typedef DeviceType execution_space ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	typename Fields::elem_node_ids_type elem_node_connectivity ;
	typename Fields::node_coords_type model_coords ;
	typename Fields::elem_sym_tensor_type stretch ;
	typename Fields::elem_tensor_state_type rotation ;
	typename Fields::array_type elem_mass ;

	const Scalar density ;

	initialize_element( const Fields & mesh_fields )
	: elem_node_connectivity( mesh_fields.elem_node_connectivity )
	, model_coords( mesh_fields.model_coords )
	, stretch( mesh_fields.stretch )
	, rotation( mesh_fields.rotation )
	, elem_mass( mesh_fields.elem_mass )
	, density( mesh_fields.density )
	{}

	KOKKOS_INLINE_FUNCTION
	void operator()( int ielem )const
	{
	const int K_XX = 0 ;
	const int K_YY = 1 ;
	const int K_ZZ = 2 ;
	const Scalar ONE12TH = 1.0 / 12.0 ;

	Scalar x[ Fields::ElemNodeCount ];
	Scalar y[ Fields::ElemNodeCount ];
	Scalar z[ Fields::ElemNodeCount ];
	Scalar grad_x[ Fields::ElemNodeCount ];
	Scalar grad_y[ Fields::ElemNodeCount ];
	Scalar grad_z[ Fields::ElemNodeCount ];

	for ( int i = 0 ; i < Fields::ElemNodeCount ; ++i ) {
	const int n = elem_node_connectivity( ielem , i );

	x[i] = model_coords( n , 0 );
	y[i] = model_coords( n , 1 );
	z[i] = model_coords( n , 2 );
	}

	comp_grad<Scalar,execution_space>( x, y, z, grad_x, grad_y, grad_z);

	stretch(ielem,K_XX) = 1 ;
	stretch(ielem,K_YY) = 1 ;
	stretch(ielem,K_ZZ) = 1 ;

	rotation(ielem,K_XX,0) = 1 ;
	rotation(ielem,K_YY,0) = 1 ;
	rotation(ielem,K_ZZ,0) = 1 ;

	rotation(ielem,K_XX,1) = 1 ;
	rotation(ielem,K_YY,1) = 1 ;
	rotation(ielem,K_ZZ,1) = 1 ;

	elem_mass(ielem) = ONE12TH * density *
	dot8<Scalar,execution_space>( x , grad_x );
	}

	static void apply( const Fields & mesh_fields )
	{
	initialize_element op( mesh_fields );
	Kokkos::parallel_for( mesh_fields.num_elements , op );
	}
	};


	template<typename Scalar , class DeviceType >
	struct initialize_node
	{
	typedef DeviceType execution_space ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	typename Fields::node_elem_ids_type node_elem_connectivity ;
	typename Fields::array_type nodal_mass ;
	typename Fields::array_type elem_mass ;

	static const int ElemNodeCount = Fields::ElemNodeCount ;

	initialize_node( const Fields & mesh_fields )
	: node_elem_connectivity( mesh_fields.node_elem_connectivity )
	, nodal_mass( mesh_fields.nodal_mass )
	, elem_mass( mesh_fields.elem_mass )
	{}


	KOKKOS_INLINE_FUNCTION
	void operator()( int inode )const
	{
	const int begin = node_elem_connectivity.row_map[inode];
	const int end = node_elem_connectivity.row_map[inode+1];

	Scalar node_mass = 0;

	for(int i = begin; i != end; ++i) {
	const int elem_id = node_elem_connectivity.entries( i , 0 );
	node_mass += elem_mass(elem_id);
	}

	nodal_mass(inode) = node_mass / ElemNodeCount ;
	}

	static void apply( const Fields & mesh_fields )
	{
	initialize_node op( mesh_fields );
	Kokkos::parallel_for( mesh_fields.num_nodes_owned , op );
	}
	};

	//----------------------------------------------------------------------------


	template<typename Scalar, class DeviceType >
	struct grad
	{
	typedef DeviceType execution_space ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	static const int ElemNodeCount = Fields::ElemNodeCount ;

	static const int K_F_XX = Fields::K_F_XX ;
	static const int K_F_YY = Fields::K_F_YY ;
	static const int K_F_ZZ = Fields::K_F_ZZ ;
	static const int K_F_XY = Fields::K_F_XY ;
	static const int K_F_YZ = Fields::K_F_YZ ;
	static const int K_F_ZX = Fields::K_F_ZX ;
	static const int K_F_YX = Fields::K_F_YX ;
	static const int K_F_ZY = Fields::K_F_ZY ;
	static const int K_F_XZ = Fields::K_F_XZ ;

	// Global arrays used by this functor.

	const typename Fields::elem_node_ids_type elem_node_connectivity ;
	const typename Fields::node_coords_type model_coords ;
	const typename Fields::geom_state_array_type displacement ;
	const typename Fields::geom_state_array_type velocity ;
	const typename Fields::elem_tensor_type vel_grad ;
	const typename Fields::scalar_type dt ;

	const int current_state;
	const int previous_state;

	// Constructor on the Host to populate this device functor.
	// All array view copies are shallow.
	grad( const Fields & fields,
	const int arg_current_state,
	const int arg_previous_state)
	: elem_node_connectivity( fields.elem_node_connectivity)
	, model_coords( fields.model_coords)
	, displacement( fields.displacement)
	, velocity( fields.velocity)
	, vel_grad( fields.vel_grad)
	, dt( fields.dt)
	, current_state(arg_current_state)
	, previous_state(arg_previous_state)
	{ }

	//--------------------------------------------------------------------------

	// Calculate Velocity Gradients
	KOKKOS_INLINE_FUNCTION
	void v_grad( int ielem,
	Scalar * vx, Scalar * vy, Scalar * vz,
	Scalar * grad_x, Scalar * grad_y, Scalar * grad_z,
	Scalar inv_vol) const
	{
	const int K_F_XX = Fields::K_F_XX ;
	const int K_F_YY = Fields::K_F_YY ;
	const int K_F_ZZ = Fields::K_F_ZZ ;
	const int K_F_XY = Fields::K_F_XY ;
	const int K_F_YZ = Fields::K_F_YZ ;
	const int K_F_ZX = Fields::K_F_ZX ;
	const int K_F_YX = Fields::K_F_YX ;
	const int K_F_ZY = Fields::K_F_ZY ;
	const int K_F_XZ = Fields::K_F_XZ ;

	vel_grad(ielem, K_F_XX) = inv_vol * dot8<Scalar,execution_space>( vx , grad_x );
	vel_grad(ielem, K_F_YX) = inv_vol * dot8<Scalar,execution_space>( vy , grad_x );
	vel_grad(ielem, K_F_ZX) = inv_vol * dot8<Scalar,execution_space>( vz , grad_x );

	vel_grad(ielem, K_F_XY) = inv_vol * dot8<Scalar,execution_space>( vx , grad_y );
	vel_grad(ielem, K_F_YY) = inv_vol * dot8<Scalar,execution_space>( vy , grad_y );
	vel_grad(ielem, K_F_ZY) = inv_vol * dot8<Scalar,execution_space>( vz , grad_y );

	vel_grad(ielem, K_F_XZ) = inv_vol * dot8<Scalar,execution_space>( vx , grad_z );
	vel_grad(ielem, K_F_YZ) = inv_vol * dot8<Scalar,execution_space>( vy , grad_z );
	vel_grad(ielem, K_F_ZZ) = inv_vol * dot8<Scalar,execution_space>( vz , grad_z );
	}

	//--------------------------------------------------------------------------
	// Functor operator() which calls the three member functions.


	KOKKOS_INLINE_FUNCTION
	void operator()( int ielem )const
	{
	const int X = 0 ;
	const int Y = 1 ;
	const int Z = 2 ;
	const Scalar dt_scale = -0.5 * *dt;

	// declare and reuse local data for frequently accessed data to
	// reduce global memory reads and writes.

	Scalar x[8], y[8], z[8];
	Scalar vx[8], vy[8], vz[8];
	Scalar grad_x[8], grad_y[8], grad_z[8];

	// Read global velocity once and use many times
	// via local registers / L1 cache.
	// store the velocity information in local memory before using,
	// so it can be returned for other functions to use

	// Read global coordinates and velocity once and use many times
	// via local registers / L1 cache.
	// load X coordinate information and move by half time step

	for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
	const int n = elem_node_connectivity( ielem , i );

	vx[i] = velocity( n , X , current_state );
	vy[i] = velocity( n , Y , current_state );
	vz[i] = velocity( n , Z , current_state );

	x[i] = model_coords( n , X ) +
	displacement( n , X , current_state ) +
	dt_scale * vx[i];

	y[i] = model_coords( n , Y ) +
	displacement( n , Y , current_state ) +
	dt_scale * vy[i];

	z[i] = model_coords( n , Z ) +
	displacement( n , Z , current_state ) +
	dt_scale * vz[i];
	}

	comp_grad<Scalar,execution_space>( x, y, z, grad_x, grad_y, grad_z);

	// Calculate hexahedral volume from x model_coords and gradient information

	const Scalar inv_vol = 1.0 / dot8<Scalar,execution_space>( x , grad_x );

	v_grad(ielem, vx, vy, vz, grad_x, grad_y, grad_z, inv_vol);
	}

	static void apply( const Fields & fields ,
	const int arg_current_state ,
	const int arg_previous_state )
	{
	grad op( fields, arg_current_state , arg_previous_state );
	Kokkos::parallel_for( fields.num_elements , op );
	}
	};

	//----------------------------------------------------------------------------

	template<typename Scalar, class DeviceType >
	struct decomp_rotate
	{
	typedef DeviceType execution_space ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	static const int ElemNodeCount = Fields::ElemNodeCount ;

	static const int K_F_XX = Fields::K_F_XX ;
	static const int K_F_YY = Fields::K_F_YY ;
	static const int K_F_ZZ = Fields::K_F_ZZ ;
	static const int K_F_XY = Fields::K_F_XY ;
	static const int K_F_YZ = Fields::K_F_YZ ;
	static const int K_F_ZX = Fields::K_F_ZX ;
	static const int K_F_YX = Fields::K_F_YX ;
	static const int K_F_ZY = Fields::K_F_ZY ;
	static const int K_F_XZ = Fields::K_F_XZ ;

	static const int K_S_XX = Fields::K_S_XX ;
	static const int K_S_YY = Fields::K_S_YY ;
	static const int K_S_ZZ = Fields::K_S_ZZ ;
	static const int K_S_XY = Fields::K_S_XY ;
	static const int K_S_YZ = Fields::K_S_YZ ;
	static const int K_S_ZX = Fields::K_S_ZX ;
	static const int K_S_YX = Fields::K_S_YX ;
	static const int K_S_ZY = Fields::K_S_ZY ;
	static const int K_S_XZ = Fields::K_S_XZ ;

	static const int K_V_XY = Fields::K_V_XY ;
	static const int K_V_YZ = Fields::K_V_YZ ;
	static const int K_V_ZX = Fields::K_V_ZX ;

	// Global arrays used by this functor.

	const typename Fields::elem_tensor_state_type rotation ;
	const typename Fields::elem_tensor_type vel_grad ;
	const typename Fields::elem_sym_tensor_type stretch ;
	const typename Fields::elem_sym_tensor_type rot_stretch ;
	const typename Fields::scalar_type dt_value ;

	const int current_state;
	const int previous_state;

	decomp_rotate( const Fields & mesh_fields ,
	const int arg_current_state,
	const int arg_previous_state)
	: rotation( mesh_fields.rotation )
	, vel_grad( mesh_fields.vel_grad )
	, stretch( mesh_fields.stretch )
	, rot_stretch( mesh_fields.rot_stretch )
	, dt_value( mesh_fields.dt)
	, current_state( arg_current_state)
	, previous_state(arg_previous_state)
	{}

	static void apply( const Fields & mesh_fields ,
	const int arg_current_state ,
	const int arg_previous_state )
	{
	decomp_rotate op( mesh_fields , arg_current_state , arg_previous_state );
	Kokkos::parallel_for( mesh_fields.num_elements , op );
	}


	KOKKOS_INLINE_FUNCTION
	void additive_decomp(int ielem, Scalar * v_gr, Scalar * str_ten) const
	{
	// In addition to calculating stretching_tensor,
	// use this as an opportunity to load global
	// variables into a local space

	for ( int i = 0 ; i < 9 ; ++i ) {
	v_gr[i] = vel_grad( ielem , i );
	}

	//
	// Symmetric part
	//
	str_ten[K_S_XX] = v_gr[K_F_XX];
	str_ten[K_S_YY] = v_gr[K_F_YY];
	str_ten[K_S_ZZ] = v_gr[K_F_ZZ];
	str_ten[K_S_XY] = 0.5*(v_gr[K_F_XY] + v_gr[K_F_YX]);
	str_ten[K_S_YZ] = 0.5*(v_gr[K_F_YZ] + v_gr[K_F_ZY]);
	str_ten[K_S_ZX] = 0.5*(v_gr[K_F_ZX] + v_gr[K_F_XZ]);
	}

	KOKKOS_INLINE_FUNCTION
	void polar_decomp(int ielem, Scalar * v_gr, Scalar * str_ten, Scalar * str, Scalar * vort, Scalar * rot_old, Scalar * rot_new)const
	{
	const Scalar dt = *dt_value;
	const Scalar dt_half = 0.5 * dt;

	// Skew Symmetric part
	vort[K_V_XY] = 0.5*(v_gr[K_F_XY] - v_gr[K_F_YX]);
	vort[K_V_YZ] = 0.5*(v_gr[K_F_YZ] - v_gr[K_F_ZY]);
	vort[K_V_ZX] = 0.5*(v_gr[K_F_ZX] - v_gr[K_F_XZ]);

	// calculate the rates of rotation via gauss elimination.
	for ( int i = 0 ; i < 6 ; ++i ) {
	str[i] = stretch(ielem, i);
	}

	Scalar z1 = str_ten[K_S_XY] * str[K_S_ZX] -
	str_ten[K_S_ZX] * str[K_S_XY] +
	str_ten[K_S_YY] * str[K_S_YZ] -
	str_ten[K_S_YZ] * str[K_S_YY] +
	str_ten[K_S_YZ] * str[K_S_ZZ] -
	str_ten[K_S_ZZ] * str[K_S_YZ];

	Scalar z2 = str_ten[K_S_ZX] * str[K_S_XX] -
	str_ten[K_S_XX] * str[K_S_ZX] +
	str_ten[K_S_YZ] * str[K_S_XY] -
	str_ten[K_S_XY] * str[K_S_YZ] +
	str_ten[K_S_ZZ] * str[K_S_ZX] -
	str_ten[K_S_ZX] * str[K_S_ZZ];

	Scalar z3 = str_ten[K_S_XX] * str[K_S_XY] -
	str_ten[K_S_XY] * str[K_S_XX] +
	str_ten[K_S_XY] * str[K_S_YY] -
	str_ten[K_S_YY] * str[K_S_XY] +
	str_ten[K_S_ZX] * str[K_S_YZ] -
	str_ten[K_S_YZ] * str[K_S_ZX];

	// forward elimination
	const Scalar a1inv = 1.0 / (str[K_S_YY] + str[K_S_ZZ]);

	const Scalar a4BYa1 = -1 * str[K_S_XY] * a1inv;

	const Scalar a2inv = 1.0 / (str[K_S_ZZ] + str[K_S_XX] + str[K_S_XY] * a4BYa1);

	const Scalar a5 = -str[K_S_YZ] + str[K_S_ZX] * a4BYa1;

	z2 -= z1 * a4BYa1;
	Scalar a6BYa1 = -1 * str[K_S_ZX] * a1inv;
	const Scalar a5BYa2 = a5 * a2inv;
	z3 -= z1 * a6BYa1 - z2 * a5BYa2;

	// backward substitution -
	z3 /= (str[K_S_XX] + str[K_S_YY] + str[K_S_ZX] * a6BYa1 + a5 * a5BYa2);
	z2 = (z2 - a5 * z3) * a2inv;
	z1 = (z1a1inv - a6BYa1 z3 -a4BYa1 * z2);

	// calculate rotation rates - recall that spin_rate is an asymmetric tensor,
	// so compute spin rate vector as dual of spin rate tensor,
	// i.e w_i = e_ijk * spin_rate_jk
	z1 += vort[K_V_YZ];
	z2 += vort[K_V_ZX];
	z3 += vort[K_V_XY];

	// update rotation tensor:
	// 1) premultiply old rotation tensor to get right-hand side.

	for ( int i = 0 ; i < 9 ; ++i ) {
	rot_old[i] = rotation(ielem, i, previous_state);
	}

	Scalar r_XX = rot_old[K_F_XX] + dt_half( z3 rot_old[K_F_YX] - z2 * rot_old[K_F_ZX] );
	Scalar r_YX = rot_old[K_F_YX] + dt_half( z1 rot_old[K_F_ZX] - z3 * rot_old[K_F_XX] );
	Scalar r_ZX = rot_old[K_F_ZX] + dt_half( z2 rot_old[K_F_XX] - z1 * rot_old[K_F_YX] );
	Scalar r_XY = rot_old[K_F_XY] + dt_half( z3 rot_old[K_F_YY] - z2 * rot_old[K_F_ZY] );
	Scalar r_YY = rot_old[K_F_YY] + dt_half( z1 rot_old[K_F_ZY] - z3 * rot_old[K_F_XY] );
	Scalar r_ZY = rot_old[K_F_ZY] + dt_half( z2 rot_old[K_F_XY] - z1 * rot_old[K_F_YY] );
	Scalar r_XZ = rot_old[K_F_XZ] + dt_half( z3 rot_old[K_F_YZ] - z2 * rot_old[K_F_ZZ] );
	Scalar r_YZ = rot_old[K_F_YZ] + dt_half( z1 rot_old[K_F_ZZ] - z3 * rot_old[K_F_XZ] );
	Scalar r_ZZ = rot_old[K_F_ZZ] + dt_half( z2 rot_old[K_F_XZ] - z1 * rot_old[K_F_YZ] );


	// 2) solve for new rotation tensor via gauss elimination.
	// forward elimination -
	Scalar a12 = - dt_half * z3;
	Scalar a13 = dt_half * z2;
	Scalar b32 = - dt_half * z1;
	Scalar a22inv = 1.0 / (1.0 + a12 * a12);

	Scalar a13a12 = a13*a12;
	Scalar a23 = b32 + a13a12;
	r_YX += r_XX * a12;
	r_YY += r_XY * a12;
	r_YZ += r_XZ * a12;


	b32 = (b32 - a13a12) * a22inv;
	r_ZX += r_XX * a13 + r_YX * b32;
	r_ZY += r_XY * a13 + r_YY * b32;
	r_ZZ += r_XZ * a13 + r_YZ * b32;


	// backward substitution -
	const Scalar a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32);

	rot_new[K_F_ZX] = r_ZX * a33inv;
	rot_new[K_F_ZY] = r_ZY * a33inv;
	rot_new[K_F_ZZ] = r_ZZ * a33inv;
	rot_new[K_F_YX] = ( r_YX - rot_new[K_F_ZX] * a23 ) * a22inv;
	rot_new[K_F_YY] = ( r_YY - rot_new[K_F_ZY] * a23 ) * a22inv;
	rot_new[K_F_YZ] = ( r_YZ - rot_new[K_F_ZZ] * a23 ) * a22inv;
	rot_new[K_F_XX] = r_XX - rot_new[K_F_ZX] * a13 - rot_new[K_F_YX] * a12;
	rot_new[K_F_XY] = r_XY - rot_new[K_F_ZY] * a13 - rot_new[K_F_YY] * a12;
	rot_new[K_F_XZ] = r_XZ - rot_new[K_F_ZZ] * a13 - rot_new[K_F_YZ] * a12;

	for ( int i = 0 ; i < 9 ; ++i ) {
	rotation(ielem, i, current_state) = rot_new[i] ;
	}

	// update stretch tensor in the new configuration -
	const Scalar a1 = str_ten[K_S_XY] + vort[K_V_XY];
	const Scalar a2 = str_ten[K_S_YZ] + vort[K_V_YZ];
	const Scalar a3 = str_ten[K_S_ZX] + vort[K_V_ZX];
	const Scalar b1 = str_ten[K_S_ZX] - vort[K_V_ZX];
	const Scalar b2 = str_ten[K_S_XY] - vort[K_V_XY];
	const Scalar b3 = str_ten[K_S_YZ] - vort[K_V_YZ];

	const Scalar s_XX = str[K_S_XX];
	const Scalar s_YY = str[K_S_YY];
	const Scalar s_ZZ = str[K_S_ZZ];
	const Scalar s_XY = str[K_S_XY];
	const Scalar s_YZ = str[K_S_YZ];
	const Scalar s_ZX = str[K_S_ZX];

	str[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX);
	str[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY);
	str[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ);
	str[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX);
	str[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY);
	str[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ);

	}


	KOKKOS_INLINE_FUNCTION
	void rotate_tensor(int ielem, Scalar * str_ten, Scalar * str, Scalar * rot_new)const {

	Scalar t[9];
	Scalar rot_str[6]; // Rotated stretch

	t[0] = str_ten[K_S_XX]*rot_new[K_F_XX] +
	str_ten[K_S_XY]*rot_new[K_F_YX] +
	str_ten[K_S_XZ]*rot_new[K_F_ZX];

	t[1] = str_ten[K_S_YX]*rot_new[K_F_XX] +
	str_ten[K_S_YY]*rot_new[K_F_YX] +
	str_ten[K_S_YZ]*rot_new[K_F_ZX];

	t[2] = str_ten[K_S_ZX]*rot_new[K_F_XX] +
	str_ten[K_S_ZY]*rot_new[K_F_YX] +
	str_ten[K_S_ZZ]*rot_new[K_F_ZX];

	t[3] = str_ten[K_S_XX]*rot_new[K_F_XY] +
	str_ten[K_S_XY]*rot_new[K_F_YY] +
	str_ten[K_S_XZ]*rot_new[K_F_ZY];

	t[4] = str_ten[K_S_YX]*rot_new[K_F_XY] +
	str_ten[K_S_YY]*rot_new[K_F_YY] +
	str_ten[K_S_YZ]*rot_new[K_F_ZY];

	t[5] = str_ten[K_S_ZX]*rot_new[K_F_XY] +
	str_ten[K_S_ZY]*rot_new[K_F_YY] +
	str_ten[K_S_ZZ]*rot_new[K_F_ZY];

	t[6] = str_ten[K_S_XX]*rot_new[K_F_XZ] +
	str_ten[K_S_XY]*rot_new[K_F_YZ] +
	str_ten[K_S_XZ]*rot_new[K_F_ZZ];

	t[7] = str_ten[K_S_YX]*rot_new[K_F_XZ] +
	str_ten[K_S_YY]*rot_new[K_F_YZ] +
	str_ten[K_S_YZ]*rot_new[K_F_ZZ];

	t[8] = str_ten[K_S_ZX]*rot_new[K_F_XZ] +
	str_ten[K_S_ZY]*rot_new[K_F_YZ] +
	str_ten[K_S_ZZ]*rot_new[K_F_ZZ];


	rot_str[ K_S_XX ] = rot_new[K_F_XX] * t[0] +
	rot_new[K_F_YX] * t[1] +
	rot_new[K_F_ZX] * t[2];
	rot_str[ K_S_YY ] = rot_new[K_F_XY] * t[3] +
	rot_new[K_F_YY] * t[4] +
	rot_new[K_F_ZY] * t[5];
	rot_str[ K_S_ZZ ] = rot_new[K_F_XZ] * t[6] +
	rot_new[K_F_YZ] * t[7] +
	rot_new[K_F_ZZ] * t[8];

	rot_str[ K_S_XY ] = rot_new[K_F_XX] * t[3] +
	rot_new[K_F_YX] * t[4] +
	rot_new[K_F_ZX] * t[5];
	rot_str[ K_S_YZ ] = rot_new[K_F_XY] * t[6] +
	rot_new[K_F_YY] * t[7] +
	rot_new[K_F_ZY] * t[8];
	rot_str[ K_S_ZX ] = rot_new[K_F_XZ] * t[0] +
	rot_new[K_F_YZ] * t[1] +
	rot_new[K_F_ZZ] * t[2];

	for ( int i = 0 ; i < 6 ; ++i ) {
	rot_stretch(ielem, i) = rot_str[i] ;
	}

	for ( int i = 0 ; i < 6 ; ++i ) {
	stretch(ielem, i) = str[i] ;
	}
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( int ielem )const {

	// Local scratch space to avoid multiple
	// accesses to global memory.
	Scalar str_ten[6]; // Stretching tensor
	Scalar str[6]; // Stretch
	Scalar rot_old[9]; // Rotation old
	Scalar rot_new[9]; // Rotation new
	Scalar vort[3]; // Vorticity
	Scalar v_gr[9]; // Velocity gradient

	additive_decomp(ielem, v_gr, str_ten);

	polar_decomp(ielem, v_gr, str_ten, str, vort, rot_old, rot_new);

	rotate_tensor(ielem, str_ten, str, rot_new);
	}
	};

	//----------------------------------------------------------------------------

	template<typename Scalar, class DeviceType >
	struct internal_force
	{
	typedef DeviceType execution_space ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	static const int ElemNodeCount = Fields::ElemNodeCount ;

	static const int K_F_XX = Fields::K_F_XX ;
	static const int K_F_YY = Fields::K_F_YY ;
	static const int K_F_ZZ = Fields::K_F_ZZ ;
	static const int K_F_XY = Fields::K_F_XY ;
	static const int K_F_YZ = Fields::K_F_YZ ;
	static const int K_F_ZX = Fields::K_F_ZX ;
	static const int K_F_YX = Fields::K_F_YX ;
	static const int K_F_ZY = Fields::K_F_ZY ;
	static const int K_F_XZ = Fields::K_F_XZ ;

	static const int K_S_XX = Fields::K_S_XX ;
	static const int K_S_YY = Fields::K_S_YY ;
	static const int K_S_ZZ = Fields::K_S_ZZ ;
	static const int K_S_XY = Fields::K_S_XY ;
	static const int K_S_YZ = Fields::K_S_YZ ;
	static const int K_S_ZX = Fields::K_S_ZX ;
	static const int K_S_YX = Fields::K_S_YX ;
	static const int K_S_ZY = Fields::K_S_ZY ;
	static const int K_S_XZ = Fields::K_S_XZ ;

	//--------------------------------------------------------------------------
	// Reduction:

	typedef Scalar value_type;

	KOKKOS_INLINE_FUNCTION
	static void init(value_type &update) {
	update = 1.0e32;
	}

	KOKKOS_INLINE_FUNCTION
	static void join( volatile value_type & update,
	const volatile value_type & source )
	{
	update = update < source ? update : source;
	}

	// Final serial processing of reduction value:
	KOKKOS_INLINE_FUNCTION
	void final( value_type & result ) const
	{
	prev_dt = dt ;
	*dt = result ;
	};

	//--------------------------------------------------------------------------

	// Global arrays used by this functor.

	const typename Fields::elem_node_ids_type elem_node_connectivity ;
	const typename Fields::node_coords_type model_coords ;
	const typename Fields::scalar_type dt ;
	const typename Fields::scalar_type prev_dt ;
	const typename Fields::geom_state_array_type displacement ;
	const typename Fields::geom_state_array_type velocity ;
	const typename Fields::array_type elem_mass ;
	const typename Fields::array_type internal_energy ;
	const typename Fields::elem_sym_tensor_type stress_new ;
	const typename Fields::elem_node_geom_type element_force ;
	const typename Fields::elem_tensor_state_type rotation ;
	const typename Fields::elem_sym_tensor_type rot_stretch ;

	const Scalar two_mu;
	const Scalar bulk_modulus;
	const Scalar lin_bulk_visc;
	const Scalar quad_bulk_visc;
	const Scalar user_dt;
	const int current_state;

	internal_force( const Fields & mesh_fields,
	const Scalar arg_user_dt,
	const int arg_current_state )
	: elem_node_connectivity( mesh_fields.elem_node_connectivity )
	, model_coords( mesh_fields.model_coords )
	, dt( mesh_fields.dt )
	, prev_dt( mesh_fields.prev_dt )
	, displacement( mesh_fields.displacement )
	, velocity( mesh_fields.velocity )
	, elem_mass( mesh_fields.elem_mass )
	, internal_energy( mesh_fields.internal_energy )
	, stress_new( mesh_fields.stress_new )
	, element_force( mesh_fields.element_force )
	, rotation( mesh_fields.rotation )
	, rot_stretch( mesh_fields.rot_stretch )
	, two_mu( mesh_fields.two_mu )
	, bulk_modulus( mesh_fields.bulk_modulus )
	, lin_bulk_visc( mesh_fields.lin_bulk_visc )
	, quad_bulk_visc( mesh_fields.quad_bulk_visc )
	, user_dt( arg_user_dt )
	, current_state( arg_current_state )
	{}

	static void apply( const Fields & mesh_fields ,
	const Scalar arg_user_dt,
	const int arg_current_state )
	{
	internal_force op_force( mesh_fields , arg_user_dt , arg_current_state );

	Kokkos::parallel_reduce( mesh_fields.num_elements, op_force );
	}

	//--------------------------------------------------------------------------

	KOKKOS_INLINE_FUNCTION
	void rotate_tensor_backward(int ielem ,
	const Scalar * const s_n ,
	Scalar * const rot_stress )const
	{
	const int rot_state = current_state ; // 1 ;

	// t : temporary variables
	// s_n : stress_new in local memory space
	// r_n : rotation_new in local memory space
	Scalar t[9], r_n[9];

	r_n[0] = rotation(ielem, 0, rot_state );
	r_n[1] = rotation(ielem, 1, rot_state );
	r_n[2] = rotation(ielem, 2, rot_state );
	r_n[3] = rotation(ielem, 3, rot_state );
	r_n[4] = rotation(ielem, 4, rot_state );
	r_n[5] = rotation(ielem, 5, rot_state );
	r_n[6] = rotation(ielem, 6, rot_state );
	r_n[7] = rotation(ielem, 7, rot_state );
	r_n[8] = rotation(ielem, 8, rot_state );

	t[0] = s_n[K_S_XX]r_n[K_F_XX]+ s_n[K_S_XY]r_n[K_F_XY]+ s_n[K_S_XZ]*r_n[K_F_XZ];
	t[1] = s_n[K_S_YX]r_n[K_F_XX]+ s_n[K_S_YY]r_n[K_F_XY]+ s_n[K_S_YZ]*r_n[K_F_XZ];
	t[2] = s_n[K_S_ZX]r_n[K_F_XX]+ s_n[K_S_ZY]r_n[K_F_XY]+ s_n[K_S_ZZ]*r_n[K_F_XZ];
	t[3] = s_n[K_S_XX]r_n[K_F_YX]+ s_n[K_S_XY]r_n[K_F_YY]+ s_n[K_S_XZ]*r_n[K_F_YZ];
	t[4] = s_n[K_S_YX]r_n[K_F_YX]+ s_n[K_S_YY]r_n[K_F_YY]+ s_n[K_S_YZ]*r_n[K_F_YZ];
	t[5] = s_n[K_S_ZX]r_n[K_F_YX]+ s_n[K_S_ZY]r_n[K_F_YY]+ s_n[K_S_ZZ]*r_n[K_F_YZ];
	t[6] = s_n[K_S_XX]r_n[K_F_ZX]+ s_n[K_S_XY]r_n[K_F_ZY]+ s_n[K_S_XZ]*r_n[K_F_ZZ];
	t[7] = s_n[K_S_YX]r_n[K_F_ZX]+ s_n[K_S_YY]r_n[K_F_ZY]+ s_n[K_S_YZ]*r_n[K_F_ZZ];
	t[8] = s_n[K_S_ZX]r_n[K_F_ZX]+ s_n[K_S_ZY]r_n[K_F_ZY]+ s_n[K_S_ZZ]*r_n[K_F_ZZ];

	rot_stress[ K_S_XX ] = r_n[K_F_XX]t[0] + r_n[K_F_XY]t[1] + r_n[K_F_XZ]*t[2];
	rot_stress[ K_S_YY ] = r_n[K_F_YX]t[3] + r_n[K_F_YY]t[4] + r_n[K_F_YZ]*t[5];
	rot_stress[ K_S_ZZ ] = r_n[K_F_ZX]t[6] + r_n[K_F_ZY]t[7] + r_n[K_F_ZZ]*t[8];

	rot_stress[ K_S_XY ] = r_n[K_F_XX]t[3] + r_n[K_F_XY]t[4] + r_n[K_F_XZ]*t[5];
	rot_stress[ K_S_YZ ] = r_n[K_F_YX]t[6] + r_n[K_F_YY]t[7] + r_n[K_F_YZ]*t[8];
	rot_stress[ K_S_ZX ] = r_n[K_F_ZX]t[0] + r_n[K_F_ZY]t[1] + r_n[K_F_ZZ]*t[2];
	}

	//--------------------------------------------------------------------------

	KOKKOS_INLINE_FUNCTION
	void comp_force(int ielem,
	const Scalar * const vx ,
	const Scalar * const vy ,
	const Scalar * const vz ,
	const Scalar * const grad_x ,
	const Scalar * const grad_y ,
	const Scalar * const grad_z ,
	Scalar * total_stress12th ) const
	{
	Scalar internal_energy_inc = 0 ;

	for(int inode = 0; inode < 8; ++inode) {

	const Scalar fx =
	total_stress12th[K_S_XX] * grad_x[inode] +
	total_stress12th[K_S_XY] * grad_y[inode] +
	total_stress12th[K_S_XZ] * grad_z[inode] ;

	element_force(ielem, 0, inode) = fx ;

	const Scalar fy =
	total_stress12th[K_S_YX] * grad_x[inode] +
	total_stress12th[K_S_YY] * grad_y[inode] +
	total_stress12th[K_S_YZ] * grad_z[inode] ;

	element_force(ielem, 1, inode) = fy ;

	const Scalar fz =
	total_stress12th[K_S_ZX] * grad_x[inode] +
	total_stress12th[K_S_ZY] * grad_y[inode] +
	total_stress12th[K_S_ZZ] * grad_z[inode] ;

	element_force(ielem, 2, inode) = fz ;

	internal_energy_inc +=
	fx * vx[inode] +
	fy * vy[inode] +
	fz * vz[inode] ;
	}

	internal_energy(ielem) = internal_energy_inc ;
	}

	//----------------------------------------------------------------------------

	KOKKOS_INLINE_FUNCTION
	void get_stress(int ielem , Scalar * const s_n ) const
	{
	const int kxx = 0;
	const int kyy = 1;
	const int kzz = 2;
	const int kxy = 3;
	const int kyz = 4;
	const int kzx = 5;

	const Scalar e = (rot_stretch(ielem,kxx)+rot_stretch(ielem,kyy)+rot_stretch(ielem,kzz))/3.0;

	s_n[kxx] = stress_new(ielem,kxx) += dt (two_mu * (rot_stretch(ielem,kxx)-e)+3bulk_moduluse);
	s_n[kyy] = stress_new(ielem,kyy) += dt (two_mu * (rot_stretch(ielem,kyy)-e)+3bulk_moduluse);
	s_n[kzz] = stress_new(ielem,kzz) += dt (two_mu * (rot_stretch(ielem,kzz)-e)+3bulk_moduluse);

	s_n[kxy] = stress_new(ielem,kxy) += dt two_mu * rot_stretch(ielem,kxy);
	s_n[kyz] = stress_new(ielem,kyz) += dt two_mu * rot_stretch(ielem,kyz);
	s_n[kzx] = stress_new(ielem,kzx) += dt two_mu * rot_stretch(ielem,kzx);
	}

	//----------------------------------------------------------------------------


	KOKKOS_INLINE_FUNCTION
	void operator()( int ielem, value_type & update )const
	{
	const Scalar ONE12TH = 1.0 / 12.0 ;

	Scalar x[8], y[8], z[8] ;
	Scalar vx[8], vy[8], vz[8];
	Scalar grad_x[8], grad_y[8], grad_z[8];

	// Position and velocity:

	for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
	const int n = elem_node_connectivity(ielem,i);

	x[i] = model_coords(n, 0) + displacement(n, 0, current_state) ;
	y[i] = model_coords(n, 1) + displacement(n, 1, current_state) ;
	z[i] = model_coords(n, 2) + displacement(n, 2, current_state) ;

	vx[i] = velocity(n, 0, current_state);
	vy[i] = velocity(n, 1, current_state);
	vz[i] = velocity(n, 2, current_state);
	}

	// Gradient:

	comp_grad<Scalar,execution_space>( x , y , z , grad_x , grad_y , grad_z );


	const Scalar mid_vol = dot8<Scalar,execution_space>( x , grad_x );

	const Scalar shr = two_mu ;
	const Scalar dil = bulk_modulus + ((2.0*shr)/3.0);

	const Scalar aspect = 6.0 * mid_vol /
	( dot8<Scalar,execution_space>( grad_x , grad_x ) +
	dot8<Scalar,execution_space>( grad_y , grad_y ) +
	dot8<Scalar,execution_space>( grad_z , grad_z ) );

	const Scalar dtrial = std::sqrt(elem_mass(ielem) * aspect / dil);
	const Scalar traced = (rot_stretch(ielem, 0) + rot_stretch(ielem, 1) + rot_stretch(ielem, 2));

	const Scalar eps = traced < 0 ? (lin_bulk_visc - quad_bulk_visc * traced * dtrial) : lin_bulk_visc ;

	const Scalar bulkq = eps * dil * dtrial * traced;

	Scalar cur_time_step = dtrial * ( std::sqrt( 1.0 + eps * eps) - eps);

	// force fixed time step if input

	cur_time_step = user_dt > 0 ? user_dt : cur_time_step;

	update = update < cur_time_step ? update : cur_time_step;


	Scalar s_n[ 6 ];

	get_stress( ielem, s_n );

	Scalar total_stress12th[6];

	// Get rotated stress:

	rotate_tensor_backward(ielem, s_n , total_stress12th );

	total_stress12th[0] = ONE12TH*( total_stress12th[ 0 ] + bulkq );
	total_stress12th[1] = ONE12TH*( total_stress12th[ 1 ] + bulkq );
	total_stress12th[2] = ONE12TH*( total_stress12th[ 2 ] + bulkq );
	total_stress12th[3] = ONE12TH*( total_stress12th[ 3 ] );
	total_stress12th[4] = ONE12TH*( total_stress12th[ 4 ] );
	total_stress12th[5] = ONE12TH*( total_stress12th[ 5 ] );

	comp_force(ielem, vx, vy, vz,
	grad_x, grad_y, grad_z, total_stress12th);
	}
	};

	//----------------------------------------------------------------------------

	template<typename Scalar, class DeviceType >
	struct nodal_step
	{
	typedef DeviceType execution_space ;
	typedef typename execution_space::size_type size_type;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	const typename Fields::scalar_type dt ;
	const typename Fields::scalar_type prev_dt ;
	const typename Fields::node_elem_ids_type node_elem_connectivity ;
	const typename Fields::node_coords_type model_coords ;
	const typename Fields::array_type nodal_mass ;
	const typename Fields::geom_state_array_type displacement ;
	const typename Fields::geom_state_array_type velocity ;
	const typename Fields::geom_array_type acceleration ;
	const typename Fields::geom_array_type internal_force ;
	const typename Fields::elem_node_geom_type element_force ;

	const Scalar x_bc;
	const int current_state;
	const int next_state;


	nodal_step( const Fields & mesh_fields ,
	const Scalar arg_x_bc,
	const int arg_current_state,
	const int arg_next_state)
	: dt( mesh_fields.dt )
	, prev_dt( mesh_fields.prev_dt )
	, node_elem_connectivity( mesh_fields.node_elem_connectivity )
	, model_coords( mesh_fields.model_coords )
	, nodal_mass( mesh_fields.nodal_mass )
	, displacement( mesh_fields.displacement )
	, velocity( mesh_fields.velocity )
	, acceleration( mesh_fields.acceleration )
	, internal_force( mesh_fields.internal_force )
	, element_force( mesh_fields.element_force )
	, x_bc( arg_x_bc )
	, current_state( arg_current_state )
	, next_state( arg_next_state )
	{
	//std::cout << "finish_step dt: " << dt << std::endl;
	//std::cout << "finish_step prev_dt: " << prev_dt << std::endl;
	}

	static void apply( const Fields & mesh_fields ,
	const Scalar arg_x_bc ,
	const int arg_current_state ,
	const int arg_next_state )
	{
	nodal_step op( mesh_fields, arg_x_bc, arg_current_state, arg_next_state );

	// Only update the owned nodes:

	Kokkos::parallel_for( mesh_fields.num_nodes_owned , op );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()(int inode) const
	{
	// Getting count as per 'CSR-like' data structure
	const int begin = node_elem_connectivity.row_map[inode];
	const int end = node_elem_connectivity.row_map[inode+1];

	double local_force[] = {0.0, 0.0, 0.0};

	// Gather-sum internal force from
	// each element that a node is attached to.

	for ( int i = begin; i < end ; ++i ){

	// node_elem_offset is a cumulative structure, so
	// node_elem_offset(inode) should be the index where
	// a particular row's elem_IDs begin
	const int nelem = node_elem_connectivity.entries( i, 0);

	// find the row in an element's stiffness matrix
	// that corresponds to inode
	const int elem_node_index = node_elem_connectivity.entries( i, 1);

	local_force[0] += element_force(nelem, 0, elem_node_index);
	local_force[1] += element_force(nelem, 1, elem_node_index);
	local_force[2] += element_force(nelem, 2, elem_node_index);
	}

	internal_force(inode, 0) = local_force[0];
	internal_force(inode, 1) = local_force[1];
	internal_force(inode, 2) = local_force[2];

	// Acceleration:

	Scalar v_new[3];
	Scalar a_current[3];

	const Scalar tol = 1.0e-7;

	// If not on the boundary then: a = F / m
	if ( tol < fabs(model_coords(inode,0)-x_bc) ) {

	const Scalar m = nodal_mass( inode );

	acceleration(inode,0) = a_current[0] = -local_force[0] / m ;
	acceleration(inode,1) = a_current[1] = -local_force[1] / m ;
	acceleration(inode,2) = a_current[2] = -local_force[2] / m ;
	}
	else { //enforce fixed BC
	acceleration(inode,0) = a_current[0] = 0;
	acceleration(inode,1) = a_current[1] = 0;
	acceleration(inode,2) = a_current[2] = 0;
	}

	// Central difference time integration:

	const Scalar dt_disp = *dt ;
	const Scalar dt_vel = ( dt + prev_dt ) / 2.0 ;

	velocity(inode,0,next_state) = v_new[0] =
	velocity(inode,0,current_state) + dt_vel * a_current[0];

	velocity(inode,1,next_state) = v_new[1] =
	velocity(inode,1,current_state) + dt_vel * a_current[1];

	velocity(inode,2,next_state) = v_new[2] =
	velocity(inode,2,current_state) + dt_vel * a_current[2];

	displacement(inode,0,next_state) =
	displacement(inode,0,current_state) + dt_disp * v_new[0];

	displacement(inode,1,next_state) =
	displacement(inode,1,current_state) + dt_disp * v_new[1];

	displacement(inode,2,next_state) =
	displacement(inode,2,current_state) + dt_disp * v_new[2];
	}
	};

	//----------------------------------------------------------------------------

	template< typename Scalar , class DeviceType >
	struct pack_state
	{
	typedef DeviceType execution_space ;
	typedef typename execution_space::size_type size_type ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	typedef typename Fields::geom_state_array_type::value_type value_type ;
	typedef Kokkos::View< value_type* , execution_space > buffer_type ;

	static const unsigned value_count = 6 ;

	const typename Fields::geom_state_array_type displacement ;
	const typename Fields::geom_state_array_type velocity ;
	const buffer_type output ;
	const size_type inode_base ;
	const size_type state_next ;

	pack_state( const buffer_type & arg_output ,
	const Fields & mesh_fields ,
	const size_type arg_begin ,
	const size_type arg_state )
	: displacement( mesh_fields.displacement )
	, velocity( mesh_fields.velocity )
	, output( arg_output )
	, inode_base( arg_begin )
	, state_next( arg_state )
	{}

	static void apply( const buffer_type & arg_output ,
	const size_type arg_begin ,
	const size_type arg_count ,
	const Fields & mesh_fields ,
	const size_type arg_state )
	{
	pack_state op( arg_output , mesh_fields , arg_begin , arg_state );

	Kokkos::parallel_for( arg_count , op );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( const size_type i ) const
	{
	const size_type inode = inode_base + i ;

	size_type j = i * value_count ;

	output[j++] = displacement( inode , 0 , state_next );
	output[j++] = displacement( inode , 1 , state_next );
	output[j++] = displacement( inode , 2 , state_next );
	output[j++] = velocity( inode , 0 , state_next );
	output[j++] = velocity( inode , 1 , state_next );
	output[j++] = velocity( inode , 2 , state_next );
	}
	};

	template< typename Scalar , class DeviceType >
	struct unpack_state
	{
	typedef DeviceType execution_space ;
	typedef typename execution_space::size_type size_type ;

	typedef Explicit::Fields< Scalar , execution_space > Fields ;

	typedef typename Fields::geom_state_array_type::value_type value_type ;
	typedef Kokkos::View< value_type* , execution_space > buffer_type ;

	static const unsigned value_count = 6 ;

	const typename Fields::geom_state_array_type displacement ;
	const typename Fields::geom_state_array_type velocity ;
	const buffer_type input ;
	const size_type inode_base ;
	const size_type state_next ;

	unpack_state( const buffer_type & arg_input ,
	const Fields & mesh_fields ,
	const size_type arg_begin ,
	const size_type arg_state )
	: displacement( mesh_fields.displacement )
	, velocity( mesh_fields.velocity )
	, input( arg_input )
	, inode_base( arg_begin )
	, state_next( arg_state )
	{}

	static void apply( const Fields & mesh_fields ,
	const size_type arg_state ,
	const buffer_type & arg_input ,
	const size_type arg_begin ,
	const size_type arg_count )
	{
	unpack_state op( arg_input , mesh_fields , arg_begin , arg_state );

	Kokkos::parallel_for( arg_count , op );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( const size_type i ) const
	{
	const size_type inode = inode_base + i ;

	size_type j = i * value_count ;

	displacement( inode , 0 , state_next ) = input[j++] ;
	displacement( inode , 1 , state_next ) = input[j++] ;
	displacement( inode , 2 , state_next ) = input[j++] ;
	velocity( inode , 0 , state_next ) = input[j++] ;
	velocity( inode , 1 , state_next ) = input[j++] ;
	velocity( inode , 2 , state_next ) = input[j++] ;
	}
	};

	} /* namespace Explicit */

	#endif /* #ifndef KOKKOS_EXPLICITFUNCTORS_HPP */

ExplicitFunctors.hppNo OneTemporaryActions

File Metadata

ExplicitFunctors.hppView Options

Event Timeline

ExplicitFunctors.hpp
No OneTemporary
Actions

ExplicitFunctors.hpp
View Options