TestMemoryPool.hpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, Jul 17, 02:34

TestMemoryPool.hpp
View Options

	/*
	//@HEADER
	// ************************************************************************
	//
	// Kokkos v. 2.0
	// Copyright (2014) Sandia Corporation
	//
	// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
	// the U.S. Government retains certain rights in this software.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// 1. Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the Corporation nor the names of the
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
	// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	//
	// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
	//
	// ************************************************************************
	//@HEADER
	*/


	#ifndef KOKKOS_UNITTEST_MEMPOOL_HPP
	#define KOKKOS_UNITTEST_MEMPOOL_HPP

	#include <stdio.h>
	#include <iostream>
	#include <cmath>
	#include <algorithm>

	#include <impl/Kokkos_Timer.hpp>

	//#define TESTMEMORYPOOL_PRINT
	//#define TESTMEMORYPOOL_PRINT_STATUS

	#ifdef KOKKOS_HAVE_CUDA
	#define STRIDE 32
	#else
	#define STRIDE 1
	#endif

	namespace TestMemoryPool {

	struct pointer_obj {
	uint64_t * ptr;
	};

	struct pointer_obj2 {
	void * ptr;
	size_t size;
	};

	template < typename PointerView, typename Allocator >
	struct allocate_memory {
	typedef typename PointerView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;

	PointerView m_pointers;
	size_t m_chunk_size;
	Allocator m_mempool;

	allocate_memory( PointerView & ptrs, size_t num_ptrs,
	size_t cs, Allocator & m )
	: m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_for( num_ptrs * STRIDE, *this );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i ) const
	{
	if ( i % STRIDE == 0 ) {
	m_pointers[i / STRIDE].ptr =
	static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) );
	}
	}
	};

	template < typename PointerView >
	struct count_invalid_memory {
	typedef typename PointerView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;
	typedef uint64_t value_type;

	PointerView m_pointers;
	uint64_t & m_result;

	count_invalid_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res )
	: m_pointers( ptrs ), m_result( res )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result );
	}

	KOKKOS_INLINE_FUNCTION
	void init( value_type & v ) const
	{ v = 0; }

	KOKKOS_INLINE_FUNCTION
	void join( volatile value_type & dst, volatile value_type const & src ) const
	{ dst += src; }

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i, value_type & r ) const
	{
	if ( i % STRIDE == 0 ) {
	r += ( m_pointers[i / STRIDE].ptr == 0 );
	}
	}
	};

	template < typename PointerView >
	struct fill_memory {
	typedef typename PointerView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;

	PointerView m_pointers;

	fill_memory( PointerView & ptrs, size_t num_ptrs ) : m_pointers( ptrs )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_for( num_ptrs * STRIDE, *this );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i ) const
	{
	if ( i % STRIDE == 0 ) {
	*m_pointers[i / STRIDE].ptr = i / STRIDE ;
	}
	}
	};

	template < typename PointerView >
	struct sum_memory {
	typedef typename PointerView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;
	typedef uint64_t value_type;

	PointerView m_pointers;
	uint64_t & m_result;

	sum_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res )
	: m_pointers( ptrs ), m_result( res )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result );
	}

	KOKKOS_INLINE_FUNCTION
	void init( value_type & v ) const
	{ v = 0; }

	KOKKOS_INLINE_FUNCTION
	void join( volatile value_type & dst, volatile value_type const & src ) const
	{ dst += src; }

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i, value_type & r ) const
	{
	if ( i % STRIDE == 0 ) {
	r += *m_pointers[i / STRIDE].ptr;
	}
	}
	};

	template < typename PointerView, typename Allocator >
	struct deallocate_memory {
	typedef typename PointerView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;

	PointerView m_pointers;
	size_t m_chunk_size;
	Allocator m_mempool;

	deallocate_memory( PointerView & ptrs, size_t num_ptrs,
	size_t cs, Allocator & m )
	: m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_for( num_ptrs * STRIDE, *this );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i ) const
	{
	if ( i % STRIDE == 0 ) {
	m_mempool.deallocate( m_pointers[i / STRIDE].ptr, m_chunk_size );
	}
	}
	};

	template < typename WorkView, typename PointerView, typename ScalarView,
	typename Allocator >
	struct allocate_deallocate_memory {
	typedef typename WorkView::execution_space execution_space;
	typedef typename execution_space::size_type size_type;

	WorkView m_work;
	PointerView m_pointers;
	ScalarView m_ptrs_front;
	ScalarView m_ptrs_back;
	Allocator m_mempool;

	allocate_deallocate_memory( WorkView & w, size_t work_size, PointerView & p,
	ScalarView pf, ScalarView pb, Allocator & m )
	: m_work( w ), m_pointers( p ), m_ptrs_front( pf ), m_ptrs_back( pb ),
	m_mempool( m )
	{
	// Initialize the view with the out degree of each vertex.
	Kokkos::parallel_for( work_size * STRIDE, *this );
	}

	KOKKOS_INLINE_FUNCTION
	void operator()( size_type i ) const
	{
	if ( i % STRIDE == 0 ) {
	unsigned my_work = m_work[i / STRIDE];

	if ( ( my_work & 1 ) == 0 ) {
	// Allocation.
	size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_back(), 1 );
	size_t alloc_size = my_work >> 1;
	m_pointers[pos].ptr = m_mempool.allocate( alloc_size );
	m_pointers[pos].size = alloc_size;
	}
	else {
	// Deallocation.
	size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_front(), 1 );
	m_mempool.deallocate( m_pointers[pos].ptr, m_pointers[pos].size );
	}
	}
	}
	};

	#define PRECISION 6
	#define SHIFTW 24
	#define SHIFTW2 12

	template < typename F >
	void print_results( const std::string & text, F elapsed_time )
	{
	std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
	<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
	<< std::endl;
	}

	template < typename F, typename T >
	void print_results( const std::string & text, unsigned long long width,
	F elapsed_time, T result )
	{
	std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
	<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
	<< " " << std::setw( width ) << result << std::endl;
	}

	template < typename F >
	void print_results( const std::string & text, unsigned long long width,
	F elapsed_time, const std::string & result )
	{
	std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 )
	<< std::fixed << std::setprecision( PRECISION ) << elapsed_time
	<< " " << std::setw( width ) << result << std::endl;
	}

	// This test slams allocation and deallocation in a worse than real-world usage
	// scenario to see how bad the thread-safety really is by having a loop where
	// all threads allocate and a subsequent loop where all threads deallocate.
	// All of the allocation requests are for equal-sized chunks that are the base
	// chunk size of the memory pool. It also tests initialization of the memory
	// pool and breaking large chunks into smaller chunks to fulfill allocation
	// requests. It verifies that MemoryPool(), allocate(), and deallocate() work
	// correctly.
	template < class Device >
	bool test_mempool( size_t chunk_size, size_t total_size )
	{
	typedef typename Device::execution_space execution_space;
	typedef typename Device::memory_space memory_space;
	typedef Device device_type;
	typedef Kokkos::View< pointer_obj *, device_type > pointer_view;
	typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;

	uint64_t result;
	size_t num_chunks = total_size / chunk_size;
	bool return_val = true;

	pointer_view pointers( "pointers", num_chunks );

	#ifdef TESTMEMORYPOOL_PRINT
	std::cout << "* test_mempool() *" << std::endl
	<< std::setw( SHIFTW ) << "chunk_size: " << std::setw( 12 )
	<< chunk_size << std::endl
	<< std::setw( SHIFTW ) << "total_size: " << std::setw( 12 )
	<< total_size << std::endl
	<< std::setw( SHIFTW ) << "num_chunks: " << std::setw( 12 )
	<< num_chunks << std::endl;

	double elapsed_time = 0;
	Kokkos::Timer timer;
	#endif

	pool_memory_space mempool( memory_space(), total_size * 1.2, 20 );

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "initialize mempool: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	allocate_memory< pointer_view, pool_memory_space >
	am( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "allocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	count_invalid_memory< pointer_view > sm( pointers, num_chunks, result );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "invalid chunks: ", 16, elapsed_time, result );
	timer.reset();
	#endif

	{
	fill_memory< pointer_view > fm( pointers, num_chunks );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "fill chunks: ", elapsed_time );
	timer.reset();
	#endif

	{
	sum_memory< pointer_view > sm( pointers, num_chunks, result );
	}

	execution_space::fence();

	#ifdef TESTMEMORYPOOL_PRINT
	elapsed_time = timer.seconds();
	print_results( "sum chunks: ", 16, elapsed_time, result );
	#endif

	if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) {
	std::cerr << "Invalid sum value in memory." << std::endl;
	return_val = false;
	}

	#ifdef TESTMEMORYPOOL_PRINT
	timer.reset();
	#endif

	{
	deallocate_memory< pointer_view, pool_memory_space >
	dm( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "deallocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	allocate_memory< pointer_view, pool_memory_space >
	am( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "allocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	count_invalid_memory< pointer_view > sm( pointers, num_chunks, result );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "invalid chunks: ", 16, elapsed_time, result );
	timer.reset();
	#endif

	{
	fill_memory< pointer_view > fm( pointers, num_chunks );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "fill chunks: ", elapsed_time );
	timer.reset();
	#endif

	{
	sum_memory< pointer_view > sm( pointers, num_chunks, result );
	}

	execution_space::fence();

	#ifdef TESTMEMORYPOOL_PRINT
	elapsed_time = timer.seconds();
	print_results( "sum chunks: ", 16, elapsed_time, result );
	#endif

	if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) {
	std::cerr << "Invalid sum value in memory." << std::endl;
	return_val = false;
	}

	#ifdef TESTMEMORYPOOL_PRINT
	timer.reset();
	#endif

	{
	deallocate_memory< pointer_view, pool_memory_space >
	dm( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "deallocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	#endif

	return return_val;
	}

	template < typename T >
	T smallest_power2_ge( T val )
	{
	// Find the most significant nonzero bit.
	int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val );

	// If val is an integral power of 2, ceil( log2(val) ) is equal to the
	// most significant nonzero bit. Otherwise, you need to add 1.
	int lg2_size = first_nonzero_bit +
	!Kokkos::Impl::is_integral_power_of_two( val );

	return T(1) << T(lg2_size);
	}

	// This test makes allocation requests for multiple sizes and interleaves
	// allocation and deallocation.
	//
	// There are 3 phases. The first phase does only allocations to build up a
	// working state for the allocator. The second phase interleaves allocations
	// and deletions. The third phase does only deallocations to undo all the
	// allocations from the first phase. By building first to a working state,
	// allocations and deallocations can happen in any order for the second phase.
	// Each phase performs on multiple chunk sizes.
	template < class Device >
	void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes,
	size_t phase1_size, size_t phase2_size )
	{
	#ifdef TESTMEMORYPOOL_PRINT
	typedef typename Device::execution_space execution_space;
	#endif
	typedef typename Device::memory_space memory_space;
	typedef Device device_type;
	typedef Kokkos::View< unsigned *, device_type > work_view;
	typedef Kokkos::View< size_t, device_type > scalar_view;
	typedef Kokkos::View< pointer_obj2 *, device_type > pointer_view;
	typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;

	enum {
	MIN_CHUNK_SIZE = 64,
	MIN_BASE_CHUNK_SIZE = MIN_CHUNK_SIZE / 2 + 1
	};

	// Make sure the base chunk size is at least MIN_BASE_CHUNK_SIZE bytes, so
	// all the different chunk sizes translate to different block sizes for the
	// allocator.
	if ( base_chunk_size < MIN_BASE_CHUNK_SIZE ) {
	base_chunk_size = MIN_BASE_CHUNK_SIZE;
	}

	// Get the smallest power of 2 >= the base chunk size. The size must be
	// >= MIN_CHUNK_SIZE, though.
	unsigned ceil_base_chunk_size = smallest_power2_ge( base_chunk_size );
	if ( ceil_base_chunk_size < MIN_CHUNK_SIZE ) {
	ceil_base_chunk_size = MIN_CHUNK_SIZE;
	}

	// Make sure the phase 1 size is multiples of num_chunk_sizes.
	phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) *
	num_chunk_sizes;

	// Make sure the phase 2 size is multiples of (2 * num_chunk_sizes).
	phase2_size =
	( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) *
	2 * num_chunk_sizes;

	// The phase2 size must be <= twice the phase1 size so that deallocations
	// can't happen before allocations.
	if ( phase2_size > 2 * phase1_size ) phase2_size = 2 * phase1_size;

	size_t phase3_size = phase1_size;
	size_t half_phase2_size = phase2_size / 2;

	// Each entry in the work views has the following format. The least
	// significant bit indicates allocation (0) vs. deallocation (1). For
	// allocation, the other bits indicate the desired allocation size.

	// Initialize the phase 1 work view with an equal number of allocations for
	// each chunk size.
	work_view phase1_work( "Phase 1 Work", phase1_size );
	typename work_view::HostMirror host_phase1_work =
	create_mirror_view(phase1_work);

	size_t inner_size = phase1_size / num_chunk_sizes;
	unsigned chunk_size = base_chunk_size;

	for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
	for ( size_t j = 0; j < inner_size; ++j ) {
	host_phase1_work[i * inner_size + j] = chunk_size << 1;
	}

	chunk_size *= 2;
	}

	std::random_shuffle( host_phase1_work.ptr_on_device(),
	host_phase1_work.ptr_on_device() + phase1_size );

	deep_copy( phase1_work, host_phase1_work );

	// Initialize the phase 2 work view with half allocations and half
	// deallocations with an equal number of allocations for each chunk size.
	work_view phase2_work( "Phase 2 Work", phase2_size );
	typename work_view::HostMirror host_phase2_work =
	create_mirror_view(phase2_work);

	inner_size = half_phase2_size / num_chunk_sizes;
	chunk_size = base_chunk_size;

	for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
	for ( size_t j = 0; j < inner_size; ++j ) {
	host_phase2_work[i * inner_size + j] = chunk_size << 1;
	}

	chunk_size *= 2;
	}

	for ( size_t i = half_phase2_size; i < phase2_size; ++i ) {
	host_phase2_work[i] = 1;
	}

	std::random_shuffle( host_phase2_work.ptr_on_device(),
	host_phase2_work.ptr_on_device() + phase2_size );

	deep_copy( phase2_work, host_phase2_work );

	// Initialize the phase 3 work view with all deallocations.
	work_view phase3_work( "Phase 3 Work", phase3_size );
	typename work_view::HostMirror host_phase3_work =
	create_mirror_view(phase3_work);

	inner_size = phase3_size / num_chunk_sizes;

	for ( size_t i = 0; i < phase3_size; ++i ) host_phase3_work[i] = 1;

	deep_copy( phase3_work, host_phase3_work );

	// Calculate the amount of memory needed for the allocator. We need to know
	// the number of superblocks required for each chunk size and use that to
	// calculate the amount of memory for each chunk size.
	size_t lg_sb_size = 18;
	size_t sb_size = 1 << lg_sb_size;
	size_t total_size = 0;
	size_t allocs_per_size = phase1_size / num_chunk_sizes +
	half_phase2_size / num_chunk_sizes;

	chunk_size = ceil_base_chunk_size;
	for ( size_t i = 0; i < num_chunk_sizes; ++i ) {
	size_t my_size = allocs_per_size * chunk_size;
	total_size += ( my_size + sb_size - 1 ) / sb_size * sb_size;
	chunk_size *= 2;
	}

	// Declare the queue to hold the records for allocated memory. An allocation
	// adds a record to the back of the queue, and a deallocation removes a
	// record from the front of the queue.
	size_t num_allocations = phase1_size + half_phase2_size;
	scalar_view ptrs_front( "Pointers front" );
	scalar_view ptrs_back( "Pointers back" );

	pointer_view pointers( "pointers", num_allocations );

	#ifdef TESTMEMORYPOOL_PRINT
	printf( "\n* test_mempool2() *\n" );
	printf( " num_chunk_sizes: %12zu\n", num_chunk_sizes );
	printf( " base_chunk_size: %12u\n", base_chunk_size );
	printf( " ceil_base_chunk_size: %12u\n", ceil_base_chunk_size );
	printf( " phase1_size: %12zu\n", phase1_size );
	printf( " phase2_size: %12zu\n", phase2_size );
	printf( " phase3_size: %12zu\n", phase3_size );
	printf( " allocs_per_size: %12zu\n", allocs_per_size );
	printf( " num_allocations: %12zu\n", num_allocations );
	printf( " total_size: %12zu\n", total_size );
	fflush( stdout );

	double elapsed_time = 0;
	Kokkos::Timer timer;
	#endif

	pool_memory_space mempool( memory_space(), total_size * 1.2, lg_sb_size );

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "initialize mempool: ", elapsed_time );

	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif

	timer.reset();
	#endif

	{
	allocate_deallocate_memory< work_view, pointer_view, scalar_view,
	pool_memory_space >
	adm( phase1_work, phase1_size, pointers, ptrs_front, ptrs_back, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "phase1: ", elapsed_time );

	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif

	timer.reset();
	#endif

	{
	allocate_deallocate_memory< work_view, pointer_view, scalar_view,
	pool_memory_space >
	adm( phase2_work, phase2_size, pointers, ptrs_front, ptrs_back, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "phase2: ", elapsed_time );

	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif

	timer.reset();
	#endif

	{
	allocate_deallocate_memory< work_view, pointer_view, scalar_view,
	pool_memory_space >
	adm( phase3_work, phase3_size, pointers, ptrs_front, ptrs_back, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "phase3: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	#endif
	}

	// Tests for correct behavior when the allocator is out of memory.
	template < class Device >
	void test_memory_exhaustion()
	{
	#ifdef TESTMEMORYPOOL_PRINT
	typedef typename Device::execution_space execution_space;
	#endif
	typedef typename Device::memory_space memory_space;
	typedef Device device_type;
	typedef Kokkos::View< pointer_obj *, device_type > pointer_view;
	typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;

	// The allocator will have a single superblock, and allocations will all be
	// of the same chunk size. The allocation loop will attempt to allocate
	// twice the number of chunks as are available in the allocator. The
	// deallocation loop will only free the successfully allocated chunks.

	size_t chunk_size = 128;
	size_t num_chunks = 128;
	size_t half_num_chunks = num_chunks / 2;
	size_t superblock_size = chunk_size * half_num_chunks;
	size_t lg_superblock_size =
	Kokkos::Impl::integral_power_of_two( superblock_size );

	pointer_view pointers( "pointers", num_chunks );

	#ifdef TESTMEMORYPOOL_PRINT
	std::cout << "\n* test_memory_exhaustion() *" << std::endl;

	double elapsed_time = 0;
	Kokkos::Timer timer;
	#endif

	pool_memory_space mempool( memory_space(), superblock_size,
	lg_superblock_size );

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "initialize mempool: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	allocate_memory< pointer_view, pool_memory_space >
	am( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "allocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	timer.reset();
	#endif

	{
	// In parallel, the allocations that succeeded were not put contiguously
	// into the pointers View. The whole View can still be looped over and
	// have deallocate called because deallocate will just do nothing for NULL
	// pointers.
	deallocate_memory< pointer_view, pool_memory_space >
	dm( pointers, num_chunks, chunk_size, mempool );
	}

	#ifdef TESTMEMORYPOOL_PRINT
	execution_space::fence();
	elapsed_time = timer.seconds();
	print_results( "deallocate chunks: ", elapsed_time );
	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	mempool.print_status();
	#endif
	#endif
	}

	}

	#ifdef TESTMEMORYPOOL_PRINT
	#undef TESTMEMORYPOOL_PRINT
	#endif

	#ifdef TESTMEMORYPOOL_PRINT_STATUS
	#undef TESTMEMORYPOOL_PRINT_STATUS
	#endif

	#ifdef STRIDE
	#undef STRIDE
	#endif

	#endif

TestMemoryPool.hppNo OneTemporaryActions

File Metadata

TestMemoryPool.hppView Options

Event Timeline

TestMemoryPool.hpp
No OneTemporary
Actions

TestMemoryPool.hpp
View Options