Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F72793606
TestMemoryPool.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Jul 17, 02:34
Size
24 KB
Mime Type
text/x-c++
Expires
Fri, Jul 19, 02:34 (2 d)
Engine
blob
Format
Raw Data
Handle
19048406
Attached To
rLAMMPS lammps
TestMemoryPool.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_UNITTEST_MEMPOOL_HPP
#define KOKKOS_UNITTEST_MEMPOOL_HPP
#include <stdio.h>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
//#define TESTMEMORYPOOL_PRINT
//#define TESTMEMORYPOOL_PRINT_STATUS
#ifdef KOKKOS_HAVE_CUDA
#define STRIDE 32
#else
#define STRIDE 1
#endif
namespace TestMemoryPool {
struct pointer_obj {
uint64_t * ptr;
};
struct pointer_obj2 {
void * ptr;
size_t size;
};
template < typename PointerView, typename Allocator >
struct allocate_memory {
typedef typename PointerView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
PointerView m_pointers;
size_t m_chunk_size;
Allocator m_mempool;
allocate_memory( PointerView & ptrs, size_t num_ptrs,
size_t cs, Allocator & m )
: m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_for( num_ptrs * STRIDE, *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
if ( i % STRIDE == 0 ) {
m_pointers[i / STRIDE].ptr =
static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) );
}
}
};
template < typename PointerView >
struct count_invalid_memory {
typedef typename PointerView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
typedef uint64_t value_type;
PointerView m_pointers;
uint64_t & m_result;
count_invalid_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res )
: m_pointers( ptrs ), m_result( res )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result );
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, volatile value_type const & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()( size_type i, value_type & r ) const
{
if ( i % STRIDE == 0 ) {
r += ( m_pointers[i / STRIDE].ptr == 0 );
}
}
};
template < typename PointerView >
struct fill_memory {
typedef typename PointerView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
PointerView m_pointers;
fill_memory( PointerView & ptrs, size_t num_ptrs ) : m_pointers( ptrs )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_for( num_ptrs * STRIDE, *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
if ( i % STRIDE == 0 ) {
*m_pointers[i / STRIDE].ptr = i / STRIDE ;
}
}
};
template < typename PointerView >
struct sum_memory {
typedef typename PointerView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
typedef uint64_t value_type;
PointerView m_pointers;
uint64_t & m_result;
sum_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res )
: m_pointers( ptrs ), m_result( res )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result );
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, volatile value_type const & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()( size_type i, value_type & r ) const
{
if ( i % STRIDE == 0 ) {
r += *m_pointers[i / STRIDE].ptr;
}
}
};
template < typename PointerView, typename Allocator >
struct deallocate_memory {
typedef typename PointerView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
PointerView m_pointers;
size_t m_chunk_size;
Allocator m_mempool;
deallocate_memory( PointerView & ptrs, size_t num_ptrs,
size_t cs, Allocator & m )
: m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_for( num_ptrs * STRIDE, *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
if ( i % STRIDE == 0 ) {
m_mempool.deallocate( m_pointers[i / STRIDE].ptr, m_chunk_size );
}
}
};
template < typename WorkView, typename PointerView, typename ScalarView,
typename Allocator >
struct allocate_deallocate_memory {
typedef typename WorkView::execution_space execution_space;
typedef typename execution_space::size_type size_type;
WorkView m_work;
PointerView m_pointers;
ScalarView m_ptrs_front;
ScalarView m_ptrs_back;
Allocator m_mempool;
allocate_deallocate_memory( WorkView & w, size_t work_size, PointerView & p,
ScalarView pf, ScalarView pb, Allocator & m )
: m_work( w ), m_pointers( p ), m_ptrs_front( pf ), m_ptrs_back( pb ),
m_mempool( m )
{
// Initialize the view with the out degree of each vertex.
Kokkos::parallel_for( work_size * STRIDE, *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
if ( i % STRIDE == 0 ) {
unsigned my_work = m_work[i / STRIDE];
if ( ( my_work & 1 ) == 0 ) {
// Allocation.
size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_back(), 1 );
size_t alloc_size = my_work >> 1;
m_pointers[pos].ptr = m_mempool.allocate( alloc_size );
m_pointers[pos].size = alloc_size;
}
else {
// Deallocation.
size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_front(), 1 );
m_mempool.deallocate( m_pointers[pos].ptr, m_pointers[pos].size );
}
}
}
};
#define PRECISION 6
#define SHIFTW 24
#define SHIFTW2 12
template < typename F >
void print_results( const std::string & text, F elapsed_time )
{
std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
<< std::endl;
}
template < typename F, typename T >
void print_results( const std::string & text, unsigned long long width,
F elapsed_time, T result )
{
std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
<< " " << std::setw( width ) << result << std::endl;
}
template < typename F >
void print_results( const std::string & text, unsigned long long width,
F elapsed_time, const std::string & result )
{
std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
<< " " << std::setw( width ) << result << std::endl;
}
// This test slams allocation and deallocation in a worse than real-world usage
// scenario to see how bad the thread-safety really is by having a loop where
// all threads allocate and a subsequent loop where all threads deallocate.
// All of the allocation requests are for equal-sized chunks that are the base
// chunk size of the memory pool. It also tests initialization of the memory
// pool and breaking large chunks into smaller chunks to fulfill allocation
// requests. It verifies that MemoryPool(), allocate(), and deallocate() work
// correctly.
template < class Device >
bool test_mempool( size_t chunk_size, size_t total_size )
{
typedef typename Device::execution_space execution_space;
typedef typename Device::memory_space memory_space;
typedef Device device_type;
typedef Kokkos::View< pointer_obj *, device_type > pointer_view;
typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;
uint64_t result;
size_t num_chunks = total_size / chunk_size;
bool return_val = true;
pointer_view pointers( "pointers", num_chunks );
#ifdef TESTMEMORYPOOL_PRINT
std::cout << "*** test_mempool() ***" << std::endl
<< std::setw( SHIFTW ) << "chunk_size: " << std::setw( 12 )
<< chunk_size << std::endl
<< std::setw( SHIFTW ) << "total_size: " << std::setw( 12 )
<< total_size << std::endl
<< std::setw( SHIFTW ) << "num_chunks: " << std::setw( 12 )
<< num_chunks << std::endl;
double elapsed_time = 0;
Kokkos::Timer timer;
#endif
pool_memory_space mempool( memory_space(), total_size * 1.2, 20 );
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "initialize mempool: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_memory< pointer_view, pool_memory_space >
am( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "allocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
count_invalid_memory< pointer_view > sm( pointers, num_chunks, result );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "invalid chunks: ", 16, elapsed_time, result );
timer.reset();
#endif
{
fill_memory< pointer_view > fm( pointers, num_chunks );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "fill chunks: ", elapsed_time );
timer.reset();
#endif
{
sum_memory< pointer_view > sm( pointers, num_chunks, result );
}
execution_space::fence();
#ifdef TESTMEMORYPOOL_PRINT
elapsed_time = timer.seconds();
print_results( "sum chunks: ", 16, elapsed_time, result );
#endif
if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) {
std::cerr << "Invalid sum value in memory." << std::endl;
return_val = false;
}
#ifdef TESTMEMORYPOOL_PRINT
timer.reset();
#endif
{
deallocate_memory< pointer_view, pool_memory_space >
dm( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "deallocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_memory< pointer_view, pool_memory_space >
am( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "allocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
count_invalid_memory< pointer_view > sm( pointers, num_chunks, result );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "invalid chunks: ", 16, elapsed_time, result );
timer.reset();
#endif
{
fill_memory< pointer_view > fm( pointers, num_chunks );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "fill chunks: ", elapsed_time );
timer.reset();
#endif
{
sum_memory< pointer_view > sm( pointers, num_chunks, result );
}
execution_space::fence();
#ifdef TESTMEMORYPOOL_PRINT
elapsed_time = timer.seconds();
print_results( "sum chunks: ", 16, elapsed_time, result );
#endif
if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) {
std::cerr << "Invalid sum value in memory." << std::endl;
return_val = false;
}
#ifdef TESTMEMORYPOOL_PRINT
timer.reset();
#endif
{
deallocate_memory< pointer_view, pool_memory_space >
dm( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "deallocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
#endif
return return_val;
}
template < typename T >
T smallest_power2_ge( T val )
{
// Find the most significant nonzero bit.
int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val );
// If val is an integral power of 2, ceil( log2(val) ) is equal to the
// most significant nonzero bit. Otherwise, you need to add 1.
int lg2_size = first_nonzero_bit +
!Kokkos::Impl::is_integral_power_of_two( val );
return T(1) << T(lg2_size);
}
// This test makes allocation requests for multiple sizes and interleaves
// allocation and deallocation.
//
// There are 3 phases. The first phase does only allocations to build up a
// working state for the allocator. The second phase interleaves allocations
// and deletions. The third phase does only deallocations to undo all the
// allocations from the first phase. By building first to a working state,
// allocations and deallocations can happen in any order for the second phase.
// Each phase performs on multiple chunk sizes.
template < class Device >
void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes,
size_t phase1_size, size_t phase2_size )
{
#ifdef TESTMEMORYPOOL_PRINT
typedef typename Device::execution_space execution_space;
#endif
typedef typename Device::memory_space memory_space;
typedef Device device_type;
typedef Kokkos::View< unsigned *, device_type > work_view;
typedef Kokkos::View< size_t, device_type > scalar_view;
typedef Kokkos::View< pointer_obj2 *, device_type > pointer_view;
typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;
enum {
MIN_CHUNK_SIZE = 64,
MIN_BASE_CHUNK_SIZE = MIN_CHUNK_SIZE / 2 + 1
};
// Make sure the base chunk size is at least MIN_BASE_CHUNK_SIZE bytes, so
// all the different chunk sizes translate to different block sizes for the
// allocator.
if ( base_chunk_size < MIN_BASE_CHUNK_SIZE ) {
base_chunk_size = MIN_BASE_CHUNK_SIZE;
}
// Get the smallest power of 2 >= the base chunk size. The size must be
// >= MIN_CHUNK_SIZE, though.
unsigned ceil_base_chunk_size = smallest_power2_ge( base_chunk_size );
if ( ceil_base_chunk_size < MIN_CHUNK_SIZE ) {
ceil_base_chunk_size = MIN_CHUNK_SIZE;
}
// Make sure the phase 1 size is multiples of num_chunk_sizes.
phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) *
num_chunk_sizes;
// Make sure the phase 2 size is multiples of (2 * num_chunk_sizes).
phase2_size =
( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) *
2 * num_chunk_sizes;
// The phase2 size must be <= twice the phase1 size so that deallocations
// can't happen before allocations.
if ( phase2_size > 2 * phase1_size ) phase2_size = 2 * phase1_size;
size_t phase3_size = phase1_size;
size_t half_phase2_size = phase2_size / 2;
// Each entry in the work views has the following format. The least
// significant bit indicates allocation (0) vs. deallocation (1). For
// allocation, the other bits indicate the desired allocation size.
// Initialize the phase 1 work view with an equal number of allocations for
// each chunk size.
work_view phase1_work( "Phase 1 Work", phase1_size );
typename work_view::HostMirror host_phase1_work =
create_mirror_view(phase1_work);
size_t inner_size = phase1_size / num_chunk_sizes;
unsigned chunk_size = base_chunk_size;
for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
for ( size_t j = 0; j < inner_size; ++j ) {
host_phase1_work[i * inner_size + j] = chunk_size << 1;
}
chunk_size *= 2;
}
std::random_shuffle( host_phase1_work.ptr_on_device(),
host_phase1_work.ptr_on_device() + phase1_size );
deep_copy( phase1_work, host_phase1_work );
// Initialize the phase 2 work view with half allocations and half
// deallocations with an equal number of allocations for each chunk size.
work_view phase2_work( "Phase 2 Work", phase2_size );
typename work_view::HostMirror host_phase2_work =
create_mirror_view(phase2_work);
inner_size = half_phase2_size / num_chunk_sizes;
chunk_size = base_chunk_size;
for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
for ( size_t j = 0; j < inner_size; ++j ) {
host_phase2_work[i * inner_size + j] = chunk_size << 1;
}
chunk_size *= 2;
}
for ( size_t i = half_phase2_size; i < phase2_size; ++i ) {
host_phase2_work[i] = 1;
}
std::random_shuffle( host_phase2_work.ptr_on_device(),
host_phase2_work.ptr_on_device() + phase2_size );
deep_copy( phase2_work, host_phase2_work );
// Initialize the phase 3 work view with all deallocations.
work_view phase3_work( "Phase 3 Work", phase3_size );
typename work_view::HostMirror host_phase3_work =
create_mirror_view(phase3_work);
inner_size = phase3_size / num_chunk_sizes;
for ( size_t i = 0; i < phase3_size; ++i ) host_phase3_work[i] = 1;
deep_copy( phase3_work, host_phase3_work );
// Calculate the amount of memory needed for the allocator. We need to know
// the number of superblocks required for each chunk size and use that to
// calculate the amount of memory for each chunk size.
size_t lg_sb_size = 18;
size_t sb_size = 1 << lg_sb_size;
size_t total_size = 0;
size_t allocs_per_size = phase1_size / num_chunk_sizes +
half_phase2_size / num_chunk_sizes;
chunk_size = ceil_base_chunk_size;
for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
size_t my_size = allocs_per_size * chunk_size;
total_size += ( my_size + sb_size - 1 ) / sb_size * sb_size;
chunk_size *= 2;
}
// Declare the queue to hold the records for allocated memory. An allocation
// adds a record to the back of the queue, and a deallocation removes a
// record from the front of the queue.
size_t num_allocations = phase1_size + half_phase2_size;
scalar_view ptrs_front( "Pointers front" );
scalar_view ptrs_back( "Pointers back" );
pointer_view pointers( "pointers", num_allocations );
#ifdef TESTMEMORYPOOL_PRINT
printf( "\n*** test_mempool2() ***\n" );
printf( " num_chunk_sizes: %12zu\n", num_chunk_sizes );
printf( " base_chunk_size: %12u\n", base_chunk_size );
printf( " ceil_base_chunk_size: %12u\n", ceil_base_chunk_size );
printf( " phase1_size: %12zu\n", phase1_size );
printf( " phase2_size: %12zu\n", phase2_size );
printf( " phase3_size: %12zu\n", phase3_size );
printf( " allocs_per_size: %12zu\n", allocs_per_size );
printf( " num_allocations: %12zu\n", num_allocations );
printf( " total_size: %12zu\n", total_size );
fflush( stdout );
double elapsed_time = 0;
Kokkos::Timer timer;
#endif
pool_memory_space mempool( memory_space(), total_size * 1.2, lg_sb_size );
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "initialize mempool: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_deallocate_memory< work_view, pointer_view, scalar_view,
pool_memory_space >
adm( phase1_work, phase1_size, pointers, ptrs_front, ptrs_back, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "phase1: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_deallocate_memory< work_view, pointer_view, scalar_view,
pool_memory_space >
adm( phase2_work, phase2_size, pointers, ptrs_front, ptrs_back, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "phase2: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_deallocate_memory< work_view, pointer_view, scalar_view,
pool_memory_space >
adm( phase3_work, phase3_size, pointers, ptrs_front, ptrs_back, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "phase3: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
#endif
}
// Tests for correct behavior when the allocator is out of memory.
template < class Device >
void test_memory_exhaustion()
{
#ifdef TESTMEMORYPOOL_PRINT
typedef typename Device::execution_space execution_space;
#endif
typedef typename Device::memory_space memory_space;
typedef Device device_type;
typedef Kokkos::View< pointer_obj *, device_type > pointer_view;
typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;
// The allocator will have a single superblock, and allocations will all be
// of the same chunk size. The allocation loop will attempt to allocate
// twice the number of chunks as are available in the allocator. The
// deallocation loop will only free the successfully allocated chunks.
size_t chunk_size = 128;
size_t num_chunks = 128;
size_t half_num_chunks = num_chunks / 2;
size_t superblock_size = chunk_size * half_num_chunks;
size_t lg_superblock_size =
Kokkos::Impl::integral_power_of_two( superblock_size );
pointer_view pointers( "pointers", num_chunks );
#ifdef TESTMEMORYPOOL_PRINT
std::cout << "\n*** test_memory_exhaustion() ***" << std::endl;
double elapsed_time = 0;
Kokkos::Timer timer;
#endif
pool_memory_space mempool( memory_space(), superblock_size,
lg_superblock_size );
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "initialize mempool: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
allocate_memory< pointer_view, pool_memory_space >
am( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "allocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif
{
// In parallel, the allocations that succeeded were not put contiguously
// into the pointers View. The whole View can still be looped over and
// have deallocate called because deallocate will just do nothing for NULL
// pointers.
deallocate_memory< pointer_view, pool_memory_space >
dm( pointers, num_chunks, chunk_size, mempool );
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "deallocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
#endif
}
}
#ifdef TESTMEMORYPOOL_PRINT
#undef TESTMEMORYPOOL_PRINT
#endif
#ifdef TESTMEMORYPOOL_PRINT_STATUS
#undef TESTMEMORYPOOL_PRINT_STATUS
#endif
#ifdef STRIDE
#undef STRIDE
#endif
#endif
Event Timeline
Log In to Comment