Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F83599920
TestTeam.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Sep 18, 01:12
Size
31 KB
Mime Type
text/x-c++
Expires
Fri, Sep 20, 01:12 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
20863546
Attached To
rLAMMPS lammps
TestTeam.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
namespace {
template< class ExecSpace, class ScheduleType >
struct TestTeamPolicy {
typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ;
typedef Kokkos::View<int**,ExecSpace> view_type ;
view_type m_flags ;
TestTeamPolicy( const size_t league_size )
: m_flags( Kokkos::ViewAllocateWithoutInitializing("flags")
, Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this )
, league_size )
{}
struct VerifyInitTag {};
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
m_flags( member.team_rank() , member.league_rank() ) = tid ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const VerifyInitTag & , const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) {
printf("TestTeamPolicy member(%d,%d) error %d != %d\n"
, member.league_rank() , member.team_rank()
, tid , m_flags( member.team_rank() , member.league_rank() ) );
}
}
// included for test_small_league_size
TestTeamPolicy()
: m_flags()
{}
// included for test_small_league_size
struct NoOpTag {} ;
KOKKOS_INLINE_FUNCTION
void operator()( const NoOpTag & , const team_member & member ) const
{}
static void test_small_league_size() {
int bs = 8; // batch size (number of elements per batch)
int ns = 16; // total number of "problems" to process
// calculate total scratch memory space size
const int level = 0;
int mem_size = 960;
const int num_teams = ns/bs;
const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO());
Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0))
, TestTeamPolicy()
);
}
static void test_for( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor );
test_small_league_size();
}
struct ReduceTag {};
typedef long value_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member , value_type & update ) const
{
update += member.team_rank() + member.team_size() * member.league_rank();
}
KOKKOS_INLINE_FUNCTION
void operator()( const ReduceTag & , const team_member & member , value_type & update ) const
{
update += 1 + member.team_rank() + member.team_size() * member.league_rank();
}
static void test_reduce( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor );
const long N = team_size * league_size ;
long total = 0 ;
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total );
ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) );
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total );
ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) );
}
};
}
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< typename ScalarType , class DeviceType, class ScheduleType >
class ReduceTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
struct value_type {
ScalarType value[3] ;
};
const size_type nwork ;
ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
ReduceTeamFunctor( const ReduceTeamFunctor & rhs )
: nwork( rhs.nwork ) {}
KOKKOS_INLINE_FUNCTION
void init( value_type & dst ) const
{
dst.value[0] = 0 ;
dst.value[1] = 0 ;
dst.value[2] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst ,
const volatile value_type & src ) const
{
dst.value[0] += src.value[0] ;
dst.value[1] += src.value[1] ;
dst.value[2] += src.value[2] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & dst ) const
{
const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank();
const int thread_size = ind.team_size() * ind.league_size();
const int chunk = ( nwork + thread_size - 1 ) / thread_size ;
size_type iwork = chunk * thread_rank ;
const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ;
for ( ; iwork < iwork_end ; ++iwork ) {
dst.value[0] += 1 ;
dst.value[1] += iwork + 1 ;
dst.value[2] += nwork - iwork ;
}
}
};
} // namespace Test
namespace {
template< typename ScalarType , class DeviceType, class ScheduleType >
class TestReduceTeam
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceTeam( const size_type & nwork )
{
run_test(nwork);
}
void run_test( const size_type & nwork )
{
typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ;
typedef typename functor_type::value_type value_type ;
typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) );
const unsigned league_size = ( nwork + team_size - 1 ) / team_size ;
policy_type team_exec( league_size , team_size );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
result_type tmp( & result[i] );
Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp );
}
execution_space::fence();
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i].value[j] );
}
}
}
};
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< class DeviceType, class ScheduleType >
class ScanTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef long int value_type ;
Kokkos::View< value_type , execution_space > accum ;
Kokkos::View< value_type , execution_space > total ;
ScanTeamFunctor() : accum("accum"), total("total") {}
KOKKOS_INLINE_FUNCTION
void init( value_type & error ) const { error = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & error ,
value_type volatile const & input ) const
{ if ( input ) error = 1 ; }
struct JoinMax {
typedef long int value_type ;
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & dst
, value_type volatile const & input ) const
{ if ( dst < input ) dst = input ; }
};
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & error ) const
{
if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) {
const long int thread_count = ind.league_size() * ind.team_size();
total() = ( thread_count * ( thread_count + 1 ) ) / 2 ;
}
// Team max:
const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() );
if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n"
, ind.league_rank(), ind.team_rank()
, ind.league_size(), ind.team_size()
, (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m );
}
// Scan:
const long int answer =
( ind.league_rank() + 1 ) * ind.team_rank() +
( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ;
const long int result =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
const long int result2 =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
if ( answer != result || answer != result2 ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n",
ind.league_rank(), ind.team_rank(),
ind.league_size(), ind.team_size(),
answer,result,result2);
error = 1 ;
}
const long int thread_rank = ind.team_rank() +
ind.team_size() * ind.league_rank();
ind.team_scan( 1 + thread_rank , accum.ptr_on_device() );
}
};
template< class DeviceType, class ScheduleType >
class TestScanTeam
{
public:
typedef DeviceType execution_space ;
typedef long int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
typedef Test::ScanTeamFunctor<DeviceType, ScheduleType> functor_type ;
//------------------------------------
TestScanTeam( const size_t nteam )
{
run_test(nteam);
}
void run_test( const size_t nteam )
{
typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const unsigned REPEAT = 100000 ;
const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ;
functor_type functor ;
policy_type team_exec( nteam , policy_type::team_size_max( functor ) );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
long int accum = 0 ;
long int total = 0 ;
long int error = 0 ;
Kokkos::deep_copy( functor.accum , total );
Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) );
DeviceType::fence();
Kokkos::deep_copy( accum , functor.accum );
Kokkos::deep_copy( total , functor.total );
ASSERT_EQ( error , 0 );
ASSERT_EQ( total , accum );
}
execution_space::fence();
}
};
} // namespace Test
/*--------------------------------------------------------------------------*/
namespace Test {
template< class ExecSpace, class ScheduleType >
struct SharedTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
enum { SHARED_COUNT = 1000 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
// Tell how much shared memory will be required by this functor:
inline
unsigned team_shmem_size( int team_size ) const
{
return shared_int_array_type::shmem_size( SHARED_COUNT ) +
shared_int_array_type::shmem_size( SHARED_COUNT );
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
}
else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}
};
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestSharedTeam {
TestSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
}
namespace Test {
#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
template< class MemorySpace, class ExecSpace, class ScheduleType >
struct TestLambdaSharedTeam {
TestLambdaSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ;
//typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ;
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
const int SHARED_COUNT = 1000;
int team_size = 1;
#ifdef KOKKOS_HAVE_CUDA
if(std::is_same<ExecSpace,Kokkos::Cuda>::value)
team_size = 128;
#endif
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size);
team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int)));
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA
( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) {
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
} else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}, result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
#endif
}
namespace Test {
template< class ExecSpace, class ScheduleType >
struct ScratchTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ;
enum { SHARED_TEAM_COUNT = 100 };
enum { SHARED_THREAD_COUNT = 10 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<size_t*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 2*ind.team_size() );
const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT );
const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT );
if ((scratch_ptr.ptr_on_device () == NULL ) ||
(scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) ||
(scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_TEAM_COUNT));
++update; // failure to allocate is an error
}
else {
Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) {
scratch_A[i] = i + ind.league_rank();
});
for(int i=0; i<SHARED_THREAD_COUNT; i++)
scratch_B[i] = 10000*ind.league_rank() + 100*ind.team_rank() + i;
scratch_ptr[ind.team_rank()] = (size_t) scratch_A.ptr_on_device();
scratch_ptr[ind.team_rank() + ind.team_size()] = (size_t) scratch_B.ptr_on_device();
ind.team_barrier();
for( int i = 0; i<SHARED_TEAM_COUNT; i++) {
if(scratch_A[i] != size_t(i + ind.league_rank()))
++update;
}
for( int i = 0; i < ind.team_size(); i++) {
if(scratch_ptr[0]!=scratch_ptr[i]) ++update;
}
if(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()] <
SHARED_THREAD_COUNT*sizeof(size_t))
++update;
for( int i = 1; i < ind.team_size(); i++) {
if((scratch_ptr[i+ind.team_size()] - scratch_ptr[i-1+ind.team_size()]) !=
(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()])) ++update;
}
}
}
};
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestScratchTeam {
TestScratchTeam()
{ run(); }
void run()
{
typedef Test::ScratchTeamFunctor<ExecSpace, ScheduleType> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) +
Functor::shared_int_array_type::shmem_size(2*team_size);
int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT);
Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size),
Kokkos::PerThread(thread_scratch_size)) ,
Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
}
namespace Test {
template< class ExecSpace>
KOKKOS_INLINE_FUNCTION
int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000);
Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000);
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
a_team1(i) = 1000000 + i;
a_team2(i) = 2000000 + i;
a_team3(i) = 3000000 + i;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
});
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
b_team1(i) = 1000000 + i;
b_team2(i) = 2000000 + i;
b_team3(i) = 3000000 + i;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i;
b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i;
b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i;
});
team.team_barrier();
int error = 0;
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) {
if(a_team1(i) != 1000000 + i) error++;
if(a_team2(i) != 2000000 + i) error++;
if(a_team3(i) != 3000000 + i) error++;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i){
if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
});
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) {
if(b_team1(i) != 1000000 + i) error++;
if(b_team2(i) != 2000000 + i) error++;
if(b_team3(i) != 3000000 + i) error++;
});
team.team_barrier();
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i){
if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++;
if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++;
if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++;
});
return error;
}
struct TagReduce {};
struct TagFor {};
template< class ExecSpace, class ScheduleType >
struct ClassNoShmemSizeFunction {
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
KOKKOS_INLINE_FUNCTION
void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
}
KOKKOS_INLINE_FUNCTION
void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const {
error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
}
void run() {
Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors");
errors = d_errors;
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
{
Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this);
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors = Kokkos::create_mirror_view(d_errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
}
{
int error = 0;
Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this,error);
Kokkos::fence();
ASSERT_EQ(error,0);
}
};
};
template< class ExecSpace, class ScheduleType >
struct ClassWithShmemSizeFunction {
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
KOKKOS_INLINE_FUNCTION
void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
}
KOKKOS_INLINE_FUNCTION
void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const {
error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
}
void run() {
Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors");
errors = d_errors;
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
{
Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this);
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(d_errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
}
{
int error = 0;
Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
*this,error);
Kokkos::fence();
ASSERT_EQ(error,0);
}
};
unsigned team_shmem_size(int team_size) const {
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
return per_team0 + team_size * per_thread0;
}
};
template< class ExecSpace, class ScheduleType >
void test_team_mulit_level_scratch_test_lambda() {
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
Kokkos::View<int,ExecSpace> d_errors("Errors");
errors = d_errors;
const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000);
const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000);
Kokkos::TeamPolicy<ExecSpace,ScheduleType> policy(10,8,16);
Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
errors() += error;
});
Kokkos::fence();
typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(errors);
Kokkos::deep_copy(h_errors,d_errors);
ASSERT_EQ(h_errors(),0);
int error = 0;
Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)),
KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team, int& count) {
count += test_team_mulit_level_scratch_loop_body<ExecSpace>(team);
},error);
ASSERT_EQ(error,0);
Kokkos::fence();
#endif
}
}
namespace {
template< class ExecSpace, class ScheduleType >
struct TestMultiLevelScratchTeam {
TestMultiLevelScratchTeam()
{ run(); }
void run()
{
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
Test::test_team_mulit_level_scratch_test_lambda<ExecSpace, ScheduleType>();
#endif
Test::ClassNoShmemSizeFunction<ExecSpace, ScheduleType> c1;
c1.run();
Test::ClassWithShmemSizeFunction<ExecSpace, ScheduleType> c2;
c2.run();
}
};
}
namespace Test {
template< class ExecSpace >
struct TestShmemSize {
TestShmemSize() { run(); }
void run()
{
typedef Kokkos::View< long***, ExecSpace > view_type;
size_t d1 = 5;
size_t d2 = 6;
size_t d3 = 7;
size_t size = view_type::shmem_size( d1, d2, d3 );
ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) );
}
};
}
/*--------------------------------------------------------------------------*/
Event Timeline
Log In to Comment