Page MenuHomec4science

TestTeam.hpp
No OneTemporary

File Metadata

Created
Tue, Jul 16, 20:31

TestTeam.hpp

/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
namespace {
template< class ExecSpace, class ScheduleType >
struct TestTeamPolicy {
typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ;
typedef Kokkos::View<int**,ExecSpace> view_type ;
view_type m_flags ;
TestTeamPolicy( const size_t league_size )
: m_flags( Kokkos::ViewAllocateWithoutInitializing("flags")
, Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this )
, league_size )
{}
struct VerifyInitTag {};
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
m_flags( member.team_rank() , member.league_rank() ) = tid ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const VerifyInitTag & , const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) {
printf("TestTeamPolicy member(%d,%d) error %d != %d\n"
, member.league_rank() , member.team_rank()
, tid , m_flags( member.team_rank() , member.league_rank() ) );
}
}
// included for test_small_league_size
TestTeamPolicy()
: m_flags()
{}
// included for test_small_league_size
struct NoOpTag {} ;
KOKKOS_INLINE_FUNCTION
void operator()( const NoOpTag & , const team_member & member ) const
{}
static void test_small_league_size() {
int bs = 8; // batch size (number of elements per batch)
int ns = 16; // total number of "problems" to process
// calculate total scratch memory space size
const int level = 0;
int mem_size = 960;
const int num_teams = ns/bs;
const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO());
Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0))
, TestTeamPolicy()
);
}
static void test_for( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor );
test_small_league_size();
}
struct ReduceTag {};
typedef long value_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member , value_type & update ) const
{
update += member.team_rank() + member.team_size() * member.league_rank();
}
KOKKOS_INLINE_FUNCTION
void operator()( const ReduceTag & , const team_member & member , value_type & update ) const
{
update += 1 + member.team_rank() + member.team_size() * member.league_rank();
}
static void test_reduce( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor );
const long N = team_size * league_size ;
long total = 0 ;
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total );
ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) );
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total );
ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) );
}
};
}
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< typename ScalarType , class DeviceType, class ScheduleType >
class ReduceTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
struct value_type {
ScalarType value[3] ;
};
const size_type nwork ;
ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
ReduceTeamFunctor( const ReduceTeamFunctor & rhs )
: nwork( rhs.nwork ) {}
KOKKOS_INLINE_FUNCTION
void init( value_type & dst ) const
{
dst.value[0] = 0 ;
dst.value[1] = 0 ;
dst.value[2] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst ,
const volatile value_type & src ) const
{
dst.value[0] += src.value[0] ;
dst.value[1] += src.value[1] ;
dst.value[2] += src.value[2] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & dst ) const
{
const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank();
const int thread_size = ind.team_size() * ind.league_size();
const int chunk = ( nwork + thread_size - 1 ) / thread_size ;
size_type iwork = chunk * thread_rank ;
const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ;
for ( ; iwork < iwork_end ; ++iwork ) {
dst.value[0] += 1 ;
dst.value[1] += iwork + 1 ;
dst.value[2] += nwork - iwork ;
}
}
};
} // namespace Test
namespace {
template< typename ScalarType , class DeviceType, class ScheduleType >
class TestReduceTeam
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceTeam( const size_type & nwork )
{
run_test(nwork);
}
void run_test( const size_type & nwork )
{
typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ;
typedef typename functor_type::value_type value_type ;
typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) );
const unsigned league_size = ( nwork + team_size - 1 ) / team_size ;
policy_type team_exec( league_size , team_size );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
result_type tmp( & result[i] );
Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp );
}
execution_space::fence();
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i].value[j] );
}
}
}
};
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< class DeviceType, class ScheduleType >
class ScanTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef long int value_type ;
Kokkos::View< value_type , execution_space > accum ;
Kokkos::View< value_type , execution_space > total ;
ScanTeamFunctor() : accum("accum"), total("total") {}
KOKKOS_INLINE_FUNCTION
void init( value_type & error ) const { error = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & error ,
value_type volatile const & input ) const
{ if ( input ) error = 1 ; }
struct JoinMax {
typedef long int value_type ;
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & dst
, value_type volatile const & input ) const
{ if ( dst < input ) dst = input ; }
};
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & error ) const
{
if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) {
const long int thread_count = ind.league_size() * ind.team_size();
total() = ( thread_count * ( thread_count + 1 ) ) / 2 ;
}
// Team max:
const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() );
if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n"
, ind.league_rank(), ind.team_rank()
, ind.league_size(), ind.team_size()
, (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m );
}
// Scan:
const long int answer =
( ind.league_rank() + 1 ) * ind.team_rank() +
( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ;
const long int result =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
const long int result2 =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
if ( answer != result || answer != result2 ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n",
ind.league_rank(), ind.team_rank(),
ind.league_size(), ind.team_size(),
answer,result,result2);
error = 1 ;
}
const long int thread_rank = ind.team_rank() +
ind.team_size() * ind.league_rank();
ind.team_scan( 1 + thread_rank , accum.ptr_on_device() );
}
};
template< class DeviceType, class ScheduleType >
class TestScanTeam
{
public:
typedef DeviceType execution_space ;
typedef long int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef Test::ScanTeamFunctor<DeviceType, ScheduleType> functor_type ;
//------------------------------------
TestScanTeam( const size_t nteam )
{
run_test(nteam);
}
void run_test( const size_t nteam )
{
typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const unsigned REPEAT = 100000 ;
const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ;
functor_type functor ;
policy_type team_exec( nteam , policy_type::team_size_max( functor ) );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
long int accum = 0 ;
long int total = 0 ;
long int error = 0 ;
Kokkos::deep_copy( functor.accum , total );
Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) );
DeviceType::fence();
Kokkos::deep_copy( accum , functor.accum );
Kokkos::deep_copy( total , functor.total );
ASSERT_EQ( error , 0 );
ASSERT_EQ( total , accum );
}
execution_space::fence();
}
};
} // namespace Test
/*--------------------------------------------------------------------------*/
namespace Test {
template< class ExecSpace, class ScheduleType >
struct SharedTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
enum { SHARED_COUNT = 1000 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
// Tell how much shared memory will be required by this functor:
inline
unsigned team_shmem_size( int team_size ) const
{
return shared_int_array_type::shmem_size( SHARED_COUNT ) +
shared_int_array_type::shmem_size( SHARED_COUNT );
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
}
else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}
};
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestSharedTeam {
TestSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
}
namespace Test {
#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
template< class MemorySpace, class ExecSpace, class ScheduleType >
struct TestLambdaSharedTeam {
TestLambdaSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ;
//typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ;
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
const int SHARED_COUNT = 1000;
int team_size = 1;
#ifdef KOKKOS_HAVE_CUDA
if(std::is_same<ExecSpace,Kokkos::Cuda>::value)
team_size = 128;
#endif
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size);
team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int)));
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA
( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) {
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
} else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}, result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
#endif
}
namespace Test {
template< class ExecSpace, class ScheduleType >
struct ScratchTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
enum { SHARED_TEAM_COUNT = 100 };
enum { SHARED_THREAD_COUNT = 10 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<size_t*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 2*ind.team_size() );
const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT );
const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT );
if ((scratch_ptr.ptr_on_device () == NULL ) ||
(scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) ||
(scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_TEAM_COUNT));
++update; // failure to allocate is an error
}
else {
Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) {
scratch_A[i] = i + ind.league_rank();
});
for(int i=0; i<SHARED_THREAD_COUNT; i++)
scratch_B[i] = 10000*ind.league_rank() + 100*ind.team_rank() + i;
scratch_ptr[ind.team_rank()] = (size_t) scratch_A.ptr_on_device();
scratch_ptr[ind.team_rank() + ind.team_size()] = (size_t) scratch_B.ptr_on_device();
ind.team_barrier();
for( int i = 0; i<SHARED_TEAM_COUNT; i++) {
if(scratch_A[i] != size_t(i + ind.league_rank()))
++update;
}
for( int i = 0; i < ind.team_size(); i++) {
if(scratch_ptr[0]!=scratch_ptr[i]) ++update;
}
if(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()] <
SHARED_THREAD_COUNT*sizeof(size_t))
++update;
for( int i = 1; i < ind.team_size(); i++) {
if((scratch_ptr[i+ind.team_size()] - scratch_ptr[i-1+ind.team_size()]) !=
(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()])) ++update;
}
}
}
};
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestScratchTeam {
TestScratchTeam()
{ run(); }
void run()
{
typedef Test::ScratchTeamFunctor<ExecSpace, ScheduleType> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) +
Functor::shared_int_array_type::shmem_size(2*team_size);
int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT);
Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size),
Kokkos::PerThread(thread_scratch_size)) ,
Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
}
namespace Test {
template< class ExecSpace>
KOKKOS_INLINE_FUNCTION
int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000);
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
a_team1(i) = 1000000 + i;
a_team2(i) = 2000000 + i;
a_team3(i) = 3000000 + i;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
});
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
b_team1(i) = 1000000 + i;
b_team2(i) = 2000000 + i;
b_team3(i) = 3000000 + i;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
});
team.team_barrier();
int error = 0;
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
if(a_team1(i) != 1000000 + i) error++;
if(a_team2(i) != 2000000 + i) error++;
if(a_team3(i) != 3000000 + i) error++;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
});
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
if(b_team1(i) != 1000000 + i) error++;
if(b_team2(i) != 2000000 + i) error++;
if(b_team3(i) != 3000000 + i) error++;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
});
return error;
}
struct TagReduce {};
struct TagFor {};
template< class ExecSpace, class ScheduleType >
struct ClassNoShmemSizeFunction {
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
KOKKOS_INLINE_FUNCTION
void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
}
KOKKOS_INLINE_FUNCTION
void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const {
error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
}
void run() {
Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors");
errors = d_errors;
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
{
Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this);
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors = Kokkos::create_mirror_view(d_errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
}
{
int error = 0;
Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this,error);
Kokkos::fence();
ASSERT_EQ(error,0);
}
};
};
template< class ExecSpace, class ScheduleType >
struct ClassWithShmemSizeFunction {
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
KOKKOS_INLINE_FUNCTION
void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
}
KOKKOS_INLINE_FUNCTION
void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const {
error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
}
void run() {
Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors");
errors = d_errors;
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
{
Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this);
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(d_errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
}
{
int error = 0;
Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this,error);
Kokkos::fence();
ASSERT_EQ(error,0);
}
};
unsigned team_shmem_size(int team_size) const {
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
return per_team0 + team_size * per_thread0;
}
};
template< class ExecSpace, class ScheduleType >
void test_team_mulit_level_scratch_test_lambda() {
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
Kokkos::View<int,ExecSpace> d_errors("Errors");
errors = d_errors;
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
Kokkos::TeamPolicy<ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
});
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
int error = 0;
Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team, int& count) {
count += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
},error);
ASSERT_EQ(error,0);
Kokkos::fence();
#endif
}
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestMultiLevelScratchTeam {
TestMultiLevelScratchTeam()
{ run(); }
void run()
{
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
Test::test_team_mulit_level_scratch_test_lambda<ExecSpace, ScheduleType>();
#endif
Test::ClassNoShmemSizeFunction<ExecSpace, ScheduleType> c1;
c1.run();
Test::ClassWithShmemSizeFunction<ExecSpace, ScheduleType> c2;
c2.run();
}
};
}
namespace Test {
template< class ExecSpace >
struct TestShmemSize {
TestShmemSize() { run(); }
void run()
{
typedef Kokkos::View< long***, ExecSpace > view_type;
size_t d1 = 5;
size_t d2 = 6;
size_t d3 = 7;
size_t size = view_type::shmem_size( d1, d2, d3 );
ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) );
}
};
}
/*--------------------------------------------------------------------------*/

Event Timeline