Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
// Make sure total block size is permissable
if ( m_team_size * m_vector_length > 1024 ) {
Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024."));
}
}
/** \brief Specify league size, request team size */
TeamPolicyInternal( execution_space &
, int league_size_
, const Kokkos::AUTO_t & /* team_size_request */
, int vector_length_request = 1 )
: m_league_size( league_size_ )
, m_team_size( -1 )
, m_vector_length( vector_length_request )
, m_team_scratch_size {0,0}
, m_thread_scratch_size {0,0}
, m_chunk_size ( 32 )
{
// Allow only power-of-two vector_length
if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
// Make sure total block size is permissable
if ( m_team_size * m_vector_length > 1024 ) {
Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024."));
}
}
TeamPolicyInternal( int league_size_
, const Kokkos::AUTO_t & /* team_size_request */
, int vector_length_request = 1 )
: m_league_size( league_size_ )
, m_team_size( -1 )
, m_vector_length ( vector_length_request )
, m_team_scratch_size {0,0}
, m_thread_scratch_size {0,0}
, m_chunk_size ( 32 )
{
// Allow only power-of-two vector_length
if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a vector length of greater than 1 is not currently supported for CUDA for dynamic sized reduction types.");
if( (m_team_size < 32) && !UseShflReduction )
Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller than 32 is not currently supported with CUDA for dynamic sized reduction types.");
// Functor's reduce memory, team scan memory, and team shared memory depend upon team size.
const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
if (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) {
Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size"));
}
if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory"));
Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a vector length of greater than 1 is not currently supported for CUDA for dynamic sized reduction types.");
if( (m_team_size < 32) && !UseShflReduction )
Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller than 32 is not currently supported with CUDA for dynamic sized reduction types.");
// Functor's reduce memory, team scan memory, and team shared memory depend upon team size.
const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
if ( (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) ||