Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F83616875
TestReduce.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Sep 18, 03:23
Size
59 KB
Mime Type
text/x-c++
Expires
Fri, Sep 20, 03:23 (2 d)
Engine
blob
Format
Raw Data
Handle
20867989
Attached To
rLAMMPS lammps
TestReduce.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <limits>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
template< typename ScalarType , class DeviceType >
class ReduceFunctor
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
struct value_type {
ScalarType value[3] ;
};
const size_type nwork ;
ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
ReduceFunctor( const ReduceFunctor & rhs )
: nwork( rhs.nwork ) {}
/*
KOKKOS_INLINE_FUNCTION
void init( value_type & dst ) const
{
dst.value[0] = 0 ;
dst.value[1] = 0 ;
dst.value[2] = 0 ;
}
*/
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst ,
const volatile value_type & src ) const
{
dst.value[0] += src.value[0] ;
dst.value[1] += src.value[1] ;
dst.value[2] += src.value[2] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type iwork , value_type & dst ) const
{
dst.value[0] += 1 ;
dst.value[1] += iwork + 1 ;
dst.value[2] += nwork - iwork ;
}
};
template< class DeviceType >
class ReduceFunctorFinal : public ReduceFunctor< long , DeviceType > {
public:
typedef typename ReduceFunctor< long , DeviceType >::value_type value_type ;
ReduceFunctorFinal( const size_t n )
: ReduceFunctor<long,DeviceType>(n)
{}
KOKKOS_INLINE_FUNCTION
void final( value_type & dst ) const
{
dst.value[0] = - dst.value[0] ;
dst.value[1] = - dst.value[1] ;
dst.value[2] = - dst.value[2] ;
}
};
template< typename ScalarType , class DeviceType >
class RuntimeReduceFunctor
{
public:
// Required for functor:
typedef DeviceType execution_space ;
typedef ScalarType value_type[] ;
const unsigned value_count ;
// Unit test details:
typedef typename execution_space::size_type size_type ;
const size_type nwork ;
RuntimeReduceFunctor( const size_type arg_nwork ,
const size_type arg_count )
: value_count( arg_count )
, nwork( arg_nwork ) {}
KOKKOS_INLINE_FUNCTION
void init( ScalarType dst[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile ScalarType dst[] ,
const volatile ScalarType src[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type iwork , ScalarType dst[] ) const
{
const size_type tmp[3] = { 1 , iwork + 1 , nwork - iwork };
for ( size_type i = 0 ; i < value_count ; ++i ) {
dst[i] += tmp[ i % 3 ];
}
}
};
template< typename ScalarType , class DeviceType >
class RuntimeReduceMinMax
{
public:
// Required for functor:
typedef DeviceType execution_space ;
typedef ScalarType value_type[] ;
const unsigned value_count ;
// Unit test details:
typedef typename execution_space::size_type size_type ;
const size_type nwork ;
const ScalarType amin ;
const ScalarType amax ;
RuntimeReduceMinMax( const size_type arg_nwork ,
const size_type arg_count )
: value_count( arg_count )
, nwork( arg_nwork )
, amin( std::numeric_limits<ScalarType>::min() )
, amax( std::numeric_limits<ScalarType>::max() )
{}
KOKKOS_INLINE_FUNCTION
void init( ScalarType dst[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) {
dst[i] = i % 2 ? amax : amin ;
}
}
KOKKOS_INLINE_FUNCTION
void join( volatile ScalarType dst[] ,
const volatile ScalarType src[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) {
dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min
: ( dst[i] > src[i] ? dst[i] : src[i] ); // max
}
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type iwork , ScalarType dst[] ) const
{
const ScalarType tmp[2] = { ScalarType(iwork + 1)
, ScalarType(nwork - iwork) };
for ( size_type i = 0 ; i < value_count ; ++i ) {
dst[i] = i % 2 ? ( dst[i] < tmp[i%2] ? dst[i] : tmp[i%2] )
: ( dst[i] > tmp[i%2] ? dst[i] : tmp[i%2] );
}
}
};
template< class DeviceType >
class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long , DeviceType > {
public:
typedef RuntimeReduceFunctor< long , DeviceType > base_type ;
typedef typename base_type::value_type value_type ;
typedef long scalar_type ;
RuntimeReduceFunctorFinal( const size_t theNwork , const size_t count ) : base_type(theNwork,count) {}
KOKKOS_INLINE_FUNCTION
void final( value_type dst ) const
{
for ( unsigned i = 0 ; i < base_type::value_count ; ++i ) {
dst[i] = - dst[i] ;
}
}
};
} // namespace Test
namespace {
template< typename ScalarType , class DeviceType >
class TestReduce
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduce( const size_type & nwork )
{
run_test(nwork);
run_test_final(nwork);
}
void run_test( const size_type & nwork )
{
typedef Test::ReduceFunctor< ScalarType , execution_space > functor_type ;
typedef typename functor_type::value_type value_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] );
}
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i].value[j] );
}
}
}
void run_test_final( const size_type & nwork )
{
typedef Test::ReduceFunctorFinal< execution_space > functor_type ;
typedef typename functor_type::value_type value_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
if(i%2==0)
Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] );
else
Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork) , result[i] );
}
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , - result[i].value[j] );
}
}
}
};
template< typename ScalarType , class DeviceType >
class TestReduceDynamic
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceDynamic( const size_type nwork )
{
run_test_dynamic(nwork);
run_test_dynamic_minmax(nwork);
run_test_dynamic_final(nwork);
}
void run_test_dynamic( const size_type nwork )
{
typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ;
enum { Count = 3 };
enum { Repeat = 100 };
ScalarType result[ Repeat ][ Count ] ;
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
if(i%2==0)
Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] );
else
Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] );
}
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i][j] );
}
}
}
void run_test_dynamic_minmax( const size_type nwork )
{
typedef Test::RuntimeReduceMinMax< ScalarType , execution_space > functor_type ;
enum { Count = 2 };
enum { Repeat = 100 };
ScalarType result[ Repeat ][ Count ] ;
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
if(i%2==0)
Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] );
else
Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] );
}
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = j % 2 ? 1 : nwork ;
ASSERT_EQ( (ScalarType) correct , result[i][j] );
}
}
}
void run_test_dynamic_final( const size_type nwork )
{
typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type ;
enum { Count = 3 };
enum { Repeat = 100 };
typename functor_type::scalar_type result[ Repeat ][ Count ] ;
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
if(i%2==0)
Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] );
else
Kokkos::parallel_reduce( "TestKernelReduce" , nwork , functor_type(nwork,Count) , result[i] );
}
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , - result[i][j] );
}
}
}
};
template< typename ScalarType , class DeviceType >
class TestReduceDynamicView
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceDynamicView( const size_type nwork )
{
run_test_dynamic_view(nwork);
}
void run_test_dynamic_view( const size_type nwork )
{
typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ;
typedef Kokkos::View< ScalarType* , DeviceType > result_type ;
typedef typename result_type::HostMirror result_host_type ;
const unsigned CountLimit = 23 ;
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
for ( unsigned count = 0 ; count < CountLimit ; ++count ) {
result_type result("result",count);
result_host_type host_result = Kokkos::create_mirror( result );
// Test result to host pointer:
std::string str("TestKernelReduce");
if(count%2==0)
Kokkos::parallel_reduce( nw , functor_type(nw,count) , host_result.ptr_on_device() );
else
Kokkos::parallel_reduce( str , nw , functor_type(nw,count) , host_result.ptr_on_device() );
for ( unsigned j = 0 ; j < count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( host_result(j), (ScalarType) correct );
host_result(j) = 0 ;
}
}
}
};
}
// Computes y^T*A*x
// (modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar )
#if ( ! defined( KOKKOS_HAVE_CUDA ) ) || defined( KOKKOS_CUDA_USE_LAMBDA )
template< typename ScalarType , class DeviceType >
class TestTripleNestedReduce
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestTripleNestedReduce( const size_type & nrows , const size_type & ncols
, const size_type & team_size , const size_type & vector_length )
{
run_test( nrows , ncols , team_size, vector_length );
}
void run_test( const size_type & nrows , const size_type & ncols
, const size_type & team_size, const size_type & vector_length )
{
//typedef Kokkos::LayoutLeft Layout;
typedef Kokkos::LayoutRight Layout;
typedef Kokkos::View<ScalarType* , DeviceType> ViewVector;
typedef Kokkos::View<ScalarType** , Layout , DeviceType> ViewMatrix;
ViewVector y( "y" , nrows );
ViewVector x( "x" , ncols );
ViewMatrix A( "A" , nrows , ncols );
typedef Kokkos::RangePolicy<DeviceType> range_policy;
// Initialize y vector
Kokkos::parallel_for( range_policy( 0 , nrows ) , KOKKOS_LAMBDA( const int i ) { y( i ) = 1; } );
// Initialize x vector
Kokkos::parallel_for( range_policy( 0 , ncols ) , KOKKOS_LAMBDA( const int i ) { x( i ) = 1; } );
typedef Kokkos::TeamPolicy<DeviceType> team_policy;
typedef typename Kokkos::TeamPolicy<DeviceType>::member_type member_type;
// Initialize A matrix, note 2D indexing computation
Kokkos::parallel_for( team_policy( nrows , Kokkos::AUTO ) , KOKKOS_LAMBDA( const member_type& teamMember ) {
const int j = teamMember.league_rank();
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , ncols ) , [&] ( const int i ) {
A( j , i ) = 1;
} );
} );
// Three level parallelism kernel to force caching of vector x
ScalarType result = 0.0;
int chunk_size = 128;
Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) {
const int row_start = teamMember.league_rank() * chunk_size;
const int row_end = row_start + chunk_size;
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , row_start , row_end ) , [&] ( const int i ) {
ScalarType sum_i = 0.0;
Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember , ncols ) , [&] ( const int j , ScalarType &innerUpdate ) {
innerUpdate += A( i , j ) * x( j );
} , sum_i );
Kokkos::single( Kokkos::PerThread( teamMember ) , [&] () {
update += y( i ) * sum_i;
} );
} );
} , result );
const ScalarType solution= ( ScalarType ) nrows * ( ScalarType ) ncols;
ASSERT_EQ( solution , result );
}
};
#else /* #if ( ! defined( KOKKOS_HAVE_CUDA ) ) || defined( KOKKOS_CUDA_USE_LAMBDA ) */
template< typename ScalarType , class DeviceType >
class TestTripleNestedReduce
{
public:
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
TestTripleNestedReduce( const size_type & , const size_type
, const size_type & , const size_type )
{ }
};
#endif
//--------------------------------------------------------------------------
namespace Test {
namespace ReduceCombinatorical {
template<class Scalar,class Space = Kokkos::HostSpace>
struct AddPlus {
public:
//Required
typedef AddPlus reducer_type;
typedef Scalar value_type;
typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
private:
result_view_type result;
public:
AddPlus(value_type& result_):result(&result_) {}
//Required
KOKKOS_INLINE_FUNCTION
void join(value_type& dest, const value_type& src) const {
dest += src + 1;
}
KOKKOS_INLINE_FUNCTION
void join(volatile value_type& dest, const volatile value_type& src) const {
dest += src + 1;
}
//Optional
KOKKOS_INLINE_FUNCTION
void init( value_type& val) const {
val = value_type();
}
result_view_type result_view() const {
return result;
}
};
template<int ISTEAM>
struct FunctorScalar;
template<>
struct FunctorScalar<0>{
FunctorScalar(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i,double& update) const {
update+=i;
}
};
template<>
struct FunctorScalar<1>{
FunctorScalar(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
};
template<int ISTEAM>
struct FunctorScalarInit;
template<>
struct FunctorScalarInit<0> {
FunctorScalarInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
template<>
struct FunctorScalarInit<1> {
FunctorScalarInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
template<int ISTEAM>
struct FunctorScalarFinal;
template<>
struct FunctorScalarFinal<0> {
FunctorScalarFinal(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
};
template<>
struct FunctorScalarFinal<1> {
FunctorScalarFinal(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team, double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
};
template<int ISTEAM>
struct FunctorScalarJoin;
template<>
struct FunctorScalarJoin<0> {
FunctorScalarJoin(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
};
template<>
struct FunctorScalarJoin<1> {
FunctorScalarJoin(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
};
template<int ISTEAM>
struct FunctorScalarJoinFinal;
template<>
struct FunctorScalarJoinFinal<0> {
FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
};
template<>
struct FunctorScalarJoinFinal<1> {
FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
};
template<int ISTEAM>
struct FunctorScalarJoinInit;
template<>
struct FunctorScalarJoinInit<0> {
FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
template<>
struct FunctorScalarJoinInit<1> {
FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
template<int ISTEAM>
struct FunctorScalarJoinFinalInit;
template<>
struct FunctorScalarJoinFinalInit<0> {
FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, double& update) const {
update += i;
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
template<>
struct FunctorScalarJoinFinalInit<1> {
FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {}
Kokkos::View<double> result;
typedef Kokkos::TeamPolicy<>::member_type team_type;
KOKKOS_INLINE_FUNCTION
void operator() (const team_type& team,double& update) const {
update+=1.0/team.team_size()*team.league_rank();
}
KOKKOS_INLINE_FUNCTION
void join(volatile double& dst, const volatile double& update) const {
dst += update;
}
KOKKOS_INLINE_FUNCTION
void final(double& update) const {
result() = update;
}
KOKKOS_INLINE_FUNCTION
void init(double& update) const {
update = 0.0;
}
};
struct Functor1 {
KOKKOS_INLINE_FUNCTION
void operator() (const int& i,double& update) const {
update+=i;
}
};
struct Functor2 {
typedef double value_type[];
const unsigned value_count;
Functor2(unsigned n):value_count(n){}
KOKKOS_INLINE_FUNCTION
void operator() (const unsigned& i,double update[]) const {
for(unsigned j=0;j<value_count;j++)
update[j]+=i;
}
KOKKOS_INLINE_FUNCTION
void init( double dst[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile double dst[] ,
const volatile double src[] ) const
{
for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ;
}
};
}
}
namespace Test {
template<class ExecSpace = Kokkos::DefaultExecutionSpace>
struct TestReduceCombinatoricalInstantiation {
template<class ... Args>
static void CallParallelReduce(Args... args) {
Kokkos::parallel_reduce(args...);
}
template<class ... Args>
static void AddReturnArgument(Args... args) {
Kokkos::View<double,Kokkos::HostSpace> result_view("ResultView");
double expected_result = 1000.0*999.0/2.0;
double value = 0;
Kokkos::parallel_reduce(args...,value);
ASSERT_EQ(expected_result,value);
result_view() = 0;
CallParallelReduce(args...,result_view);
ASSERT_EQ(expected_result,result_view());
value = 0;
CallParallelReduce(args...,Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&value));
ASSERT_EQ(expected_result,value);
result_view() = 0;
const Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> result_view_const_um = result_view;
CallParallelReduce(args...,result_view_const_um);
ASSERT_EQ(expected_result,result_view_const_um());
value = 0;
CallParallelReduce(args...,Test::ReduceCombinatorical::AddPlus<double>(value));
if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1))
ASSERT_TRUE(expected_result<value);
else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1))
ASSERT_TRUE(expected_result<=value);
else
ASSERT_EQ(expected_result,value);
value = 0;
Test::ReduceCombinatorical::AddPlus<double> add(value);
CallParallelReduce(args...,add);
if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1))
ASSERT_TRUE(expected_result<value);
else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1))
ASSERT_TRUE(expected_result<=value);
else
ASSERT_EQ(expected_result,value);
}
template<class ... Args>
static void AddLambdaRange(void*,Args... args) {
AddReturnArgument(args..., KOKKOS_LAMBDA (const int&i , double& lsum) {
lsum += i;
});
}
template<class ... Args>
static void AddLambdaTeam(void*,Args... args) {
AddReturnArgument(args..., KOKKOS_LAMBDA (const Kokkos::TeamPolicy<>::member_type& team, double& update) {
update+=1.0/team.team_size()*team.league_rank();
});
}
template<class ... Args>
static void AddLambdaRange(Kokkos::InvalidType,Args... args) {
}
template<class ... Args>
static void AddLambdaTeam(Kokkos::InvalidType,Args... args) {
}
template<int ISTEAM, class ... Args>
static void AddFunctor(Args... args) {
Kokkos::View<double> result_view("FunctorView");
auto h_r = Kokkos::create_mirror_view(result_view);
Test::ReduceCombinatorical::FunctorScalar<ISTEAM> functor(result_view);
double expected_result = 1000.0*999.0/2.0;
AddReturnArgument(args..., functor);
AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalar<ISTEAM>(result_view));
AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarInit<ISTEAM>(result_view));
AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoin<ISTEAM>(result_view));
AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoinInit<ISTEAM>(result_view));
h_r() = 0;
Kokkos::deep_copy(result_view,h_r);
CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarFinal<ISTEAM>(result_view));
Kokkos::deep_copy(h_r,result_view);
ASSERT_EQ(expected_result,h_r());
h_r() = 0;
Kokkos::deep_copy(result_view,h_r);
CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal<ISTEAM>(result_view));
Kokkos::deep_copy(h_r,result_view);
ASSERT_EQ(expected_result,h_r());
h_r() = 0;
Kokkos::deep_copy(result_view,h_r);
CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit<ISTEAM>(result_view));
Kokkos::deep_copy(h_r,result_view);
ASSERT_EQ(expected_result,h_r());
}
template<class ... Args>
static void AddFunctorLambdaRange(Args... args) {
AddFunctor<0,Args...>(args...);
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
AddLambdaRange(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...);
#endif
}
template<class ... Args>
static void AddFunctorLambdaTeam(Args... args) {
AddFunctor<1,Args...>(args...);
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
AddLambdaTeam(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...);
#endif
}
template<class ... Args>
static void AddPolicy(Args... args) {
int N = 1000;
Kokkos::RangePolicy<ExecSpace> policy(0,N);
AddFunctorLambdaRange(args...,1000);
AddFunctorLambdaRange(args...,N);
AddFunctorLambdaRange(args...,policy);
AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace>(0,N));
AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N));
AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(0,N).set_chunk_size(10));
AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N).set_chunk_size(10));
AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace>(N,Kokkos::AUTO));
AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO));
AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(N,Kokkos::AUTO).set_chunk_size(10));
AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO).set_chunk_size(10));
}
static void AddLabel() {
std::string s("Std::String");
AddPolicy();
AddPolicy("Char Constant");
AddPolicy(s.c_str());
AddPolicy(s);
}
static void execute() {
AddLabel();
}
};
template<class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace>
struct TestReducers {
struct SumFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value += values(i);
}
};
struct ProdFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value *= values(i);
}
};
struct MinFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
if(values(i) < value)
value = values(i);
}
};
struct MaxFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
if(values(i) > value)
value = values(i);
}
};
struct MinLocFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i,
typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type& value) const {
if(values(i) < value.val) {
value.val = values(i);
value.loc = i;
}
}
};
struct MaxLocFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i,
typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type& value) const {
if(values(i) > value.val) {
value.val = values(i);
value.loc = i;
}
}
};
struct MinMaxLocFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i,
typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type& value) const {
if(values(i) > value.max_val) {
value.max_val = values(i);
value.max_loc = i;
}
if(values(i) < value.min_val) {
value.min_val = values(i);
value.min_loc = i;
}
}
};
struct BAndFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value & values(i);
}
};
struct BOrFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value | values(i);
}
};
struct BXorFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value ^ values(i);
}
};
struct LAndFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value && values(i);
}
};
struct LOrFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value || values(i);
}
};
struct LXorFunctor {
Kokkos::View<const Scalar*,ExecSpace> values;
KOKKOS_INLINE_FUNCTION
void operator() (const int& i, Scalar& value) const {
value = value ? (!values(i)) : values(i);
}
};
static void test_sum(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_sum = 0;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100);
reference_sum += h_values(i);
}
Kokkos::deep_copy(values,h_values);
SumFunctor f;
f.values = values;
Scalar init = 0;
{
Scalar sum_scalar = init;
Kokkos::Experimental::Sum<Scalar> reducer_scalar(sum_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(sum_scalar,reference_sum);
Scalar sum_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(sum_scalar_view,reference_sum);
}
{
Scalar sum_scalar_init = init;
Kokkos::Experimental::Sum<Scalar> reducer_scalar_init(sum_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(sum_scalar_init,reference_sum);
Scalar sum_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(sum_scalar_init_view,reference_sum);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> sum_view("View");
sum_view() = init;
Kokkos::Experimental::Sum<Scalar> reducer_view(sum_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar sum_view_scalar = sum_view();
ASSERT_EQ(sum_view_scalar,reference_sum);
Scalar sum_view_view = reducer_view.result_view()();
ASSERT_EQ(sum_view_view,reference_sum);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> sum_view_init("View");
sum_view_init() = init;
Kokkos::Experimental::Sum<Scalar> reducer_view_init(sum_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
Scalar sum_view_init_scalar = sum_view_init();
ASSERT_EQ(sum_view_init_scalar,reference_sum);
Scalar sum_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(sum_view_init_view,reference_sum);
}
}
static void test_prod(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_prod = 1;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%4+1);
reference_prod *= h_values(i);
}
Kokkos::deep_copy(values,h_values);
ProdFunctor f;
f.values = values;
Scalar init = 1;
if(std::is_arithmetic<Scalar>::value)
{
Scalar prod_scalar = init;
Kokkos::Experimental::Prod<Scalar> reducer_scalar(prod_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(prod_scalar,reference_prod);
Scalar prod_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(prod_scalar_view,reference_prod);
}
{
Scalar prod_scalar_init = init;
Kokkos::Experimental::Prod<Scalar> reducer_scalar_init(prod_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(prod_scalar_init,reference_prod);
Scalar prod_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(prod_scalar_init_view,reference_prod);
}
if(std::is_arithmetic<Scalar>::value)
{
Kokkos::View<Scalar,Kokkos::HostSpace> prod_view("View");
prod_view() = init;
Kokkos::Experimental::Prod<Scalar> reducer_view(prod_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar prod_view_scalar = prod_view();
ASSERT_EQ(prod_view_scalar,reference_prod);
Scalar prod_view_view = reducer_view.result_view()();
ASSERT_EQ(prod_view_view,reference_prod);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> prod_view_init("View");
prod_view_init() = init;
Kokkos::Experimental::Prod<Scalar> reducer_view_init(prod_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
Scalar prod_view_init_scalar = prod_view_init();
ASSERT_EQ(prod_view_init_scalar,reference_prod);
Scalar prod_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(prod_view_init_view,reference_prod);
}
}
static void test_min(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_min = std::numeric_limits<Scalar>::max();
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000);
if(h_values(i)<reference_min)
reference_min = h_values(i);
}
Kokkos::deep_copy(values,h_values);
MinFunctor f;
f.values = values;
Scalar init = std::numeric_limits<Scalar>::max();
{
Scalar min_scalar = init;
Kokkos::Experimental::Min<Scalar> reducer_scalar(min_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(min_scalar,reference_min);
Scalar min_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(min_scalar_view,reference_min);
}
{
Scalar min_scalar_init = init;
Kokkos::Experimental::Min<Scalar> reducer_scalar_init(min_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(min_scalar_init,reference_min);
Scalar min_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(min_scalar_init_view,reference_min);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> min_view("View");
min_view() = init;
Kokkos::Experimental::Min<Scalar> reducer_view(min_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar min_view_scalar = min_view();
ASSERT_EQ(min_view_scalar,reference_min);
Scalar min_view_view = reducer_view.result_view()();
ASSERT_EQ(min_view_view,reference_min);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> min_view_init("View");
min_view_init() = init;
Kokkos::Experimental::Min<Scalar> reducer_view_init(min_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
Scalar min_view_init_scalar = min_view_init();
ASSERT_EQ(min_view_init_scalar,reference_min);
Scalar min_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(min_view_init_view,reference_min);
}
}
static void test_max(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_max = std::numeric_limits<Scalar>::min();
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000+1);
if(h_values(i)>reference_max)
reference_max = h_values(i);
}
Kokkos::deep_copy(values,h_values);
MaxFunctor f;
f.values = values;
Scalar init = std::numeric_limits<Scalar>::min();
{
Scalar max_scalar = init;
Kokkos::Experimental::Max<Scalar> reducer_scalar(max_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(max_scalar,reference_max);
Scalar max_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(max_scalar_view,reference_max);
}
{
Scalar max_scalar_init = init;
Kokkos::Experimental::Max<Scalar> reducer_scalar_init(max_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(max_scalar_init,reference_max);
Scalar max_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(max_scalar_init_view,reference_max);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> max_view("View");
max_view() = init;
Kokkos::Experimental::Max<Scalar> reducer_view(max_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar max_view_scalar = max_view();
ASSERT_EQ(max_view_scalar,reference_max);
Scalar max_view_view = reducer_view.result_view()();
ASSERT_EQ(max_view_view,reference_max);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> max_view_init("View");
max_view_init() = init;
Kokkos::Experimental::Max<Scalar> reducer_view_init(max_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
Scalar max_view_init_scalar = max_view_init();
ASSERT_EQ(max_view_init_scalar,reference_max);
Scalar max_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(max_view_init_view,reference_max);
}
}
static void test_minloc(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_min = std::numeric_limits<Scalar>::max();
int reference_loc = -1;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000);
if(h_values(i)<reference_min) {
reference_min = h_values(i);
reference_loc = i;
}
}
Kokkos::deep_copy(values,h_values);
MinLocFunctor f;
typedef typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type value_type;
f.values = values;
Scalar init = std::numeric_limits<Scalar>::max();
{
value_type min_scalar;
Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar(min_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(min_scalar.val,reference_min);
ASSERT_EQ(min_scalar.loc,reference_loc);
value_type min_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(min_scalar_view.val,reference_min);
ASSERT_EQ(min_scalar_view.loc,reference_loc);
}
{
value_type min_scalar_init;
Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar_init(min_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(min_scalar_init.val,reference_min);
ASSERT_EQ(min_scalar_init.loc,reference_loc);
value_type min_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(min_scalar_init_view.val,reference_min);
ASSERT_EQ(min_scalar_init_view.loc,reference_loc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> min_view("View");
Kokkos::Experimental::MinLoc<Scalar,int> reducer_view(min_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
value_type min_view_scalar = min_view();
ASSERT_EQ(min_view_scalar.val,reference_min);
ASSERT_EQ(min_view_scalar.loc,reference_loc);
value_type min_view_view = reducer_view.result_view()();
ASSERT_EQ(min_view_view.val,reference_min);
ASSERT_EQ(min_view_view.loc,reference_loc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> min_view_init("View");
Kokkos::Experimental::MinLoc<Scalar,int> reducer_view_init(min_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
value_type min_view_init_scalar = min_view_init();
ASSERT_EQ(min_view_init_scalar.val,reference_min);
ASSERT_EQ(min_view_init_scalar.loc,reference_loc);
value_type min_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(min_view_init_view.val,reference_min);
ASSERT_EQ(min_view_init_view.loc,reference_loc);
}
}
static void test_maxloc(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_max = std::numeric_limits<Scalar>::min();
int reference_loc = -1;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000);
if(h_values(i)>reference_max) {
reference_max = h_values(i);
reference_loc = i;
}
}
Kokkos::deep_copy(values,h_values);
MaxLocFunctor f;
typedef typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type value_type;
f.values = values;
Scalar init = std::numeric_limits<Scalar>::min();
{
value_type max_scalar;
Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar(max_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(max_scalar.val,reference_max);
ASSERT_EQ(max_scalar.loc,reference_loc);
value_type max_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(max_scalar_view.val,reference_max);
ASSERT_EQ(max_scalar_view.loc,reference_loc);
}
{
value_type max_scalar_init;
Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar_init(max_scalar_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(max_scalar_init.val,reference_max);
ASSERT_EQ(max_scalar_init.loc,reference_loc);
value_type max_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(max_scalar_init_view.val,reference_max);
ASSERT_EQ(max_scalar_init_view.loc,reference_loc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> max_view("View");
Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view(max_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
value_type max_view_scalar = max_view();
ASSERT_EQ(max_view_scalar.val,reference_max);
ASSERT_EQ(max_view_scalar.loc,reference_loc);
value_type max_view_view = reducer_view.result_view()();
ASSERT_EQ(max_view_view.val,reference_max);
ASSERT_EQ(max_view_view.loc,reference_loc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> max_view_init("View");
Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view_init(max_view_init,init);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
value_type max_view_init_scalar = max_view_init();
ASSERT_EQ(max_view_init_scalar.val,reference_max);
ASSERT_EQ(max_view_init_scalar.loc,reference_loc);
value_type max_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(max_view_init_view.val,reference_max);
ASSERT_EQ(max_view_init_view.loc,reference_loc);
}
}
static void test_minmaxloc(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_max = std::numeric_limits<Scalar>::min();
Scalar reference_min = std::numeric_limits<Scalar>::max();
int reference_minloc = -1;
int reference_maxloc = -1;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000);
if(h_values(i)>reference_max) {
reference_max = h_values(i);
reference_maxloc = i;
}
if(h_values(i)<reference_min) {
reference_min = h_values(i);
reference_minloc = i;
}
}
Kokkos::deep_copy(values,h_values);
MinMaxLocFunctor f;
typedef typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type value_type;
f.values = values;
Scalar init_min = std::numeric_limits<Scalar>::max();
Scalar init_max = std::numeric_limits<Scalar>::min();
{
value_type minmax_scalar;
Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar(minmax_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(minmax_scalar.min_val,reference_min);
ASSERT_EQ(minmax_scalar.min_loc,reference_minloc);
ASSERT_EQ(minmax_scalar.max_val,reference_max);
ASSERT_EQ(minmax_scalar.max_loc,reference_maxloc);
value_type minmax_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(minmax_scalar_view.min_val,reference_min);
ASSERT_EQ(minmax_scalar_view.min_loc,reference_minloc);
ASSERT_EQ(minmax_scalar_view.max_val,reference_max);
ASSERT_EQ(minmax_scalar_view.max_loc,reference_maxloc);
}
{
value_type minmax_scalar_init;
Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar_init(minmax_scalar_init,init_min,init_max);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init);
ASSERT_EQ(minmax_scalar_init.min_val,reference_min);
ASSERT_EQ(minmax_scalar_init.min_loc,reference_minloc);
ASSERT_EQ(minmax_scalar_init.max_val,reference_max);
ASSERT_EQ(minmax_scalar_init.max_loc,reference_maxloc);
value_type minmax_scalar_init_view = reducer_scalar_init.result_view()();
ASSERT_EQ(minmax_scalar_init_view.min_val,reference_min);
ASSERT_EQ(minmax_scalar_init_view.min_loc,reference_minloc);
ASSERT_EQ(minmax_scalar_init_view.max_val,reference_max);
ASSERT_EQ(minmax_scalar_init_view.max_loc,reference_maxloc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> minmax_view("View");
Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view(minmax_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
value_type minmax_view_scalar = minmax_view();
ASSERT_EQ(minmax_view_scalar.min_val,reference_min);
ASSERT_EQ(minmax_view_scalar.min_loc,reference_minloc);
ASSERT_EQ(minmax_view_scalar.max_val,reference_max);
ASSERT_EQ(minmax_view_scalar.max_loc,reference_maxloc);
value_type minmax_view_view = reducer_view.result_view()();
ASSERT_EQ(minmax_view_view.min_val,reference_min);
ASSERT_EQ(minmax_view_view.min_loc,reference_minloc);
ASSERT_EQ(minmax_view_view.max_val,reference_max);
ASSERT_EQ(minmax_view_view.max_loc,reference_maxloc);
}
{
Kokkos::View<value_type,Kokkos::HostSpace> minmax_view_init("View");
Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view_init(minmax_view_init,init_min,init_max);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init);
value_type minmax_view_init_scalar = minmax_view_init();
ASSERT_EQ(minmax_view_init_scalar.min_val,reference_min);
ASSERT_EQ(minmax_view_init_scalar.min_loc,reference_minloc);
ASSERT_EQ(minmax_view_init_scalar.max_val,reference_max);
ASSERT_EQ(minmax_view_init_scalar.max_loc,reference_maxloc);
value_type minmax_view_init_view = reducer_view_init.result_view()();
ASSERT_EQ(minmax_view_init_view.min_val,reference_min);
ASSERT_EQ(minmax_view_init_view.min_loc,reference_minloc);
ASSERT_EQ(minmax_view_init_view.max_val,reference_max);
ASSERT_EQ(minmax_view_init_view.max_loc,reference_maxloc);
}
}
static void test_BAnd(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_band = Scalar() | (~Scalar());
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%100000+1);
reference_band = reference_band & h_values(i);
}
Kokkos::deep_copy(values,h_values);
BAndFunctor f;
f.values = values;
Scalar init = Scalar() | (~Scalar());
{
Scalar band_scalar = init;
Kokkos::Experimental::BAnd<Scalar> reducer_scalar(band_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(band_scalar,reference_band);
Scalar band_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(band_scalar_view,reference_band);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> band_view("View");
band_view() = init;
Kokkos::Experimental::BAnd<Scalar> reducer_view(band_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar band_view_scalar = band_view();
ASSERT_EQ(band_view_scalar,reference_band);
Scalar band_view_view = reducer_view.result_view()();
ASSERT_EQ(band_view_view,reference_band);
}
}
static void test_BOr(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_bor = Scalar() & (~Scalar());
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)((rand()%100000+1)*2);
reference_bor = reference_bor | h_values(i);
}
Kokkos::deep_copy(values,h_values);
BOrFunctor f;
f.values = values;
Scalar init = Scalar() & (~Scalar());
{
Scalar bor_scalar = init;
Kokkos::Experimental::BOr<Scalar> reducer_scalar(bor_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(bor_scalar,reference_bor);
Scalar bor_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(bor_scalar_view,reference_bor);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> bor_view("View");
bor_view() = init;
Kokkos::Experimental::BOr<Scalar> reducer_view(bor_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar bor_view_scalar = bor_view();
ASSERT_EQ(bor_view_scalar,reference_bor);
Scalar bor_view_view = reducer_view.result_view()();
ASSERT_EQ(bor_view_view,reference_bor);
}
}
static void test_BXor(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_bxor = Scalar() & (~Scalar());
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)((rand()%100000+1)*2);
reference_bxor = reference_bxor ^ h_values(i);
}
Kokkos::deep_copy(values,h_values);
BXorFunctor f;
f.values = values;
Scalar init = Scalar() & (~Scalar());
{
Scalar bxor_scalar = init;
Kokkos::Experimental::BXor<Scalar> reducer_scalar(bxor_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(bxor_scalar,reference_bxor);
Scalar bxor_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(bxor_scalar_view,reference_bxor);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> bxor_view("View");
bxor_view() = init;
Kokkos::Experimental::BXor<Scalar> reducer_view(bxor_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar bxor_view_scalar = bxor_view();
ASSERT_EQ(bxor_view_scalar,reference_bxor);
Scalar bxor_view_view = reducer_view.result_view()();
ASSERT_EQ(bxor_view_view,reference_bxor);
}
}
static void test_LAnd(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_land = 1;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%2);
reference_land = reference_land && h_values(i);
}
Kokkos::deep_copy(values,h_values);
LAndFunctor f;
f.values = values;
Scalar init = 1;
{
Scalar land_scalar = init;
Kokkos::Experimental::LAnd<Scalar> reducer_scalar(land_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(land_scalar,reference_land);
Scalar land_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(land_scalar_view,reference_land);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> land_view("View");
land_view() = init;
Kokkos::Experimental::LAnd<Scalar> reducer_view(land_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar land_view_scalar = land_view();
ASSERT_EQ(land_view_scalar,reference_land);
Scalar land_view_view = reducer_view.result_view()();
ASSERT_EQ(land_view_view,reference_land);
}
}
static void test_LOr(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_lor = 0;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%2);
reference_lor = reference_lor || h_values(i);
}
Kokkos::deep_copy(values,h_values);
LOrFunctor f;
f.values = values;
Scalar init = 0;
{
Scalar lor_scalar = init;
Kokkos::Experimental::LOr<Scalar> reducer_scalar(lor_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(lor_scalar,reference_lor);
Scalar lor_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(lor_scalar_view,reference_lor);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> lor_view("View");
lor_view() = init;
Kokkos::Experimental::LOr<Scalar> reducer_view(lor_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar lor_view_scalar = lor_view();
ASSERT_EQ(lor_view_scalar,reference_lor);
Scalar lor_view_view = reducer_view.result_view()();
ASSERT_EQ(lor_view_view,reference_lor);
}
}
static void test_LXor(int N) {
Kokkos::View<Scalar*,ExecSpace> values("Values",N);
auto h_values = Kokkos::create_mirror_view(values);
Scalar reference_lxor = 0;
for(int i=0; i<N; i++) {
h_values(i) = (Scalar)(rand()%2);
reference_lxor = reference_lxor ? (!h_values(i)) : h_values(i);
}
Kokkos::deep_copy(values,h_values);
LXorFunctor f;
f.values = values;
Scalar init = 0;
{
Scalar lxor_scalar = init;
Kokkos::Experimental::LXor<Scalar> reducer_scalar(lxor_scalar);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar);
ASSERT_EQ(lxor_scalar,reference_lxor);
Scalar lxor_scalar_view = reducer_scalar.result_view()();
ASSERT_EQ(lxor_scalar_view,reference_lxor);
}
{
Kokkos::View<Scalar,Kokkos::HostSpace> lxor_view("View");
lxor_view() = init;
Kokkos::Experimental::LXor<Scalar> reducer_view(lxor_view);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view);
Scalar lxor_view_scalar = lxor_view();
ASSERT_EQ(lxor_view_scalar,reference_lxor);
Scalar lxor_view_view = reducer_view.result_view()();
ASSERT_EQ(lxor_view_view,reference_lxor);
}
}
static void execute_float() {
test_sum(10001);
test_prod(35);
test_min(10003);
test_minloc(10003);
test_max(10007);
test_maxloc(10007);
test_minmaxloc(10007);
}
static void execute_integer() {
test_sum(10001);
test_prod(35);
test_min(10003);
test_minloc(10003);
test_max(10007);
test_maxloc(10007);
test_minmaxloc(10007);
test_BAnd(35);
test_BOr(35);
test_BXor(35);
test_LAnd(35);
test_LOr(35);
test_LXor(35);
}
static void execute_basic() {
test_sum(10001);
test_prod(35);
}
};
}
/*--------------------------------------------------------------------------*/
Event Timeline
Log In to Comment