Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F83497842
TestMemoryPool.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Sep 17, 11:34
Size
24 KB
Mime Type
text/x-c++
Expires
Thu, Sep 19, 11:34 (2 d)
Engine
blob
Format
Raw Data
Handle
20848059
Attached To
rLAMMPS lammps
TestMemoryPool.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_UNITTEST_MEMPOOL_HPP
#define KOKKOS_UNITTEST_MEMPOOL_HPP
#include <stdio.h>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
//#define TESTMEMORYPOOL_PRINT
//#define TESTMEMORYPOOL_PRINT_STATUS
#ifdef KOKKOS_HAVE_CUDA
#define STRIDE 32
#else
#define STRIDE 1
#endif
namespace
TestMemoryPool
{
struct
pointer_obj
{
uint64_t
*
ptr
;
};
struct
pointer_obj2
{
void
*
ptr
;
size_t
size
;
};
template
<
typename
PointerView
,
typename
Allocator
>
struct
allocate_memory
{
typedef
typename
PointerView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
PointerView
m_pointers
;
size_t
m_chunk_size
;
Allocator
m_mempool
;
allocate_memory
(
PointerView
&
ptrs
,
size_t
num_ptrs
,
size_t
cs
,
Allocator
&
m
)
:
m_pointers
(
ptrs
),
m_chunk_size
(
cs
),
m_mempool
(
m
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_for
(
num_ptrs
*
STRIDE
,
*
this
);
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
)
const
{
if
(
i
%
STRIDE
==
0
)
{
m_pointers
[
i
/
STRIDE
].
ptr
=
static_cast
<
uint64_t
*
>
(
m_mempool
.
allocate
(
m_chunk_size
)
);
}
}
};
template
<
typename
PointerView
>
struct
count_invalid_memory
{
typedef
typename
PointerView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
typedef
uint64_t
value_type
;
PointerView
m_pointers
;
uint64_t
&
m_result
;
count_invalid_memory
(
PointerView
&
ptrs
,
size_t
num_ptrs
,
uint64_t
&
res
)
:
m_pointers
(
ptrs
),
m_result
(
res
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_reduce
(
num_ptrs
*
STRIDE
,
*
this
,
m_result
);
}
KOKKOS_INLINE_FUNCTION
void
init
(
value_type
&
v
)
const
{
v
=
0
;
}
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
value_type
&
dst
,
volatile
value_type
const
&
src
)
const
{
dst
+=
src
;
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
,
value_type
&
r
)
const
{
if
(
i
%
STRIDE
==
0
)
{
r
+=
(
m_pointers
[
i
/
STRIDE
].
ptr
==
0
);
}
}
};
template
<
typename
PointerView
>
struct
fill_memory
{
typedef
typename
PointerView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
PointerView
m_pointers
;
fill_memory
(
PointerView
&
ptrs
,
size_t
num_ptrs
)
:
m_pointers
(
ptrs
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_for
(
num_ptrs
*
STRIDE
,
*
this
);
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
)
const
{
if
(
i
%
STRIDE
==
0
)
{
*
m_pointers
[
i
/
STRIDE
].
ptr
=
i
/
STRIDE
;
}
}
};
template
<
typename
PointerView
>
struct
sum_memory
{
typedef
typename
PointerView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
typedef
uint64_t
value_type
;
PointerView
m_pointers
;
uint64_t
&
m_result
;
sum_memory
(
PointerView
&
ptrs
,
size_t
num_ptrs
,
uint64_t
&
res
)
:
m_pointers
(
ptrs
),
m_result
(
res
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_reduce
(
num_ptrs
*
STRIDE
,
*
this
,
m_result
);
}
KOKKOS_INLINE_FUNCTION
void
init
(
value_type
&
v
)
const
{
v
=
0
;
}
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
value_type
&
dst
,
volatile
value_type
const
&
src
)
const
{
dst
+=
src
;
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
,
value_type
&
r
)
const
{
if
(
i
%
STRIDE
==
0
)
{
r
+=
*
m_pointers
[
i
/
STRIDE
].
ptr
;
}
}
};
template
<
typename
PointerView
,
typename
Allocator
>
struct
deallocate_memory
{
typedef
typename
PointerView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
PointerView
m_pointers
;
size_t
m_chunk_size
;
Allocator
m_mempool
;
deallocate_memory
(
PointerView
&
ptrs
,
size_t
num_ptrs
,
size_t
cs
,
Allocator
&
m
)
:
m_pointers
(
ptrs
),
m_chunk_size
(
cs
),
m_mempool
(
m
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_for
(
num_ptrs
*
STRIDE
,
*
this
);
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
)
const
{
if
(
i
%
STRIDE
==
0
)
{
m_mempool
.
deallocate
(
m_pointers
[
i
/
STRIDE
].
ptr
,
m_chunk_size
);
}
}
};
template
<
typename
WorkView
,
typename
PointerView
,
typename
ScalarView
,
typename
Allocator
>
struct
allocate_deallocate_memory
{
typedef
typename
WorkView
::
execution_space
execution_space
;
typedef
typename
execution_space
::
size_type
size_type
;
WorkView
m_work
;
PointerView
m_pointers
;
ScalarView
m_ptrs_front
;
ScalarView
m_ptrs_back
;
Allocator
m_mempool
;
allocate_deallocate_memory
(
WorkView
&
w
,
size_t
work_size
,
PointerView
&
p
,
ScalarView
pf
,
ScalarView
pb
,
Allocator
&
m
)
:
m_work
(
w
),
m_pointers
(
p
),
m_ptrs_front
(
pf
),
m_ptrs_back
(
pb
),
m_mempool
(
m
)
{
// Initialize the view with the out degree of each vertex.
Kokkos
::
parallel_for
(
work_size
*
STRIDE
,
*
this
);
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
size_type
i
)
const
{
if
(
i
%
STRIDE
==
0
)
{
unsigned
my_work
=
m_work
[
i
/
STRIDE
];
if
(
(
my_work
&
1
)
==
0
)
{
// Allocation.
size_t
pos
=
Kokkos
::
atomic_fetch_add
(
&
m_ptrs_back
(),
1
);
size_t
alloc_size
=
my_work
>>
1
;
m_pointers
[
pos
].
ptr
=
m_mempool
.
allocate
(
alloc_size
);
m_pointers
[
pos
].
size
=
alloc_size
;
}
else
{
// Deallocation.
size_t
pos
=
Kokkos
::
atomic_fetch_add
(
&
m_ptrs_front
(),
1
);
m_mempool
.
deallocate
(
m_pointers
[
pos
].
ptr
,
m_pointers
[
pos
].
size
);
}
}
}
};
#define PRECISION 6
#define SHIFTW 24
#define SHIFTW2 12
template
<
typename
F
>
void
print_results
(
const
std
::
string
&
text
,
F
elapsed_time
)
{
std
::
cout
<<
std
::
setw
(
SHIFTW
)
<<
text
<<
std
::
setw
(
SHIFTW2
)
<<
std
::
fixed
<<
std
::
setprecision
(
PRECISION
)
<<
elapsed_time
<<
std
::
endl
;
}
template
<
typename
F
,
typename
T
>
void
print_results
(
const
std
::
string
&
text
,
unsigned
long
long
width
,
F
elapsed_time
,
T
result
)
{
std
::
cout
<<
std
::
setw
(
SHIFTW
)
<<
text
<<
std
::
setw
(
SHIFTW2
)
<<
std
::
fixed
<<
std
::
setprecision
(
PRECISION
)
<<
elapsed_time
<<
" "
<<
std
::
setw
(
width
)
<<
result
<<
std
::
endl
;
}
template
<
typename
F
>
void
print_results
(
const
std
::
string
&
text
,
unsigned
long
long
width
,
F
elapsed_time
,
const
std
::
string
&
result
)
{
std
::
cout
<<
std
::
setw
(
SHIFTW
)
<<
text
<<
std
::
setw
(
SHIFTW2
)
<<
std
::
fixed
<<
std
::
setprecision
(
PRECISION
)
<<
elapsed_time
<<
" "
<<
std
::
setw
(
width
)
<<
result
<<
std
::
endl
;
}
// This test slams allocation and deallocation in a worse than real-world usage
// scenario to see how bad the thread-safety really is by having a loop where
// all threads allocate and a subsequent loop where all threads deallocate.
// All of the allocation requests are for equal-sized chunks that are the base
// chunk size of the memory pool. It also tests initialization of the memory
// pool and breaking large chunks into smaller chunks to fulfill allocation
// requests. It verifies that MemoryPool(), allocate(), and deallocate() work
// correctly.
template
<
class
Device
>
bool
test_mempool
(
size_t
chunk_size
,
size_t
total_size
)
{
typedef
typename
Device
::
execution_space
execution_space
;
typedef
typename
Device
::
memory_space
memory_space
;
typedef
Device
device_type
;
typedef
Kokkos
::
View
<
pointer_obj
*
,
device_type
>
pointer_view
;
typedef
Kokkos
::
Experimental
::
MemoryPool
<
device_type
>
pool_memory_space
;
uint64_t
result
;
size_t
num_chunks
=
total_size
/
chunk_size
;
bool
return_val
=
true
;
pointer_view
pointers
(
"pointers"
,
num_chunks
);
#ifdef TESTMEMORYPOOL_PRINT
std
::
cout
<<
"*** test_mempool() ***"
<<
std
::
endl
<<
std
::
setw
(
SHIFTW
)
<<
"chunk_size: "
<<
std
::
setw
(
12
)
<<
chunk_size
<<
std
::
endl
<<
std
::
setw
(
SHIFTW
)
<<
"total_size: "
<<
std
::
setw
(
12
)
<<
total_size
<<
std
::
endl
<<
std
::
setw
(
SHIFTW
)
<<
"num_chunks: "
<<
std
::
setw
(
12
)
<<
num_chunks
<<
std
::
endl
;
double
elapsed_time
=
0
;
Kokkos
::
Timer
timer
;
#endif
pool_memory_space
mempool
(
memory_space
(),
total_size
*
1.2
,
20
);
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"initialize mempool: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_memory
<
pointer_view
,
pool_memory_space
>
am
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"allocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
count_invalid_memory
<
pointer_view
>
sm
(
pointers
,
num_chunks
,
result
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"invalid chunks: "
,
16
,
elapsed_time
,
result
);
timer
.
reset
();
#endif
{
fill_memory
<
pointer_view
>
fm
(
pointers
,
num_chunks
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"fill chunks: "
,
elapsed_time
);
timer
.
reset
();
#endif
{
sum_memory
<
pointer_view
>
sm
(
pointers
,
num_chunks
,
result
);
}
execution_space
::
fence
();
#ifdef TESTMEMORYPOOL_PRINT
elapsed_time
=
timer
.
seconds
();
print_results
(
"sum chunks: "
,
16
,
elapsed_time
,
result
);
#endif
if
(
result
!=
(
num_chunks
*
(
num_chunks
-
1
)
)
/
2
)
{
std
::
cerr
<<
"Invalid sum value in memory."
<<
std
::
endl
;
return_val
=
false
;
}
#ifdef TESTMEMORYPOOL_PRINT
timer
.
reset
();
#endif
{
deallocate_memory
<
pointer_view
,
pool_memory_space
>
dm
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"deallocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_memory
<
pointer_view
,
pool_memory_space
>
am
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"allocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
count_invalid_memory
<
pointer_view
>
sm
(
pointers
,
num_chunks
,
result
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"invalid chunks: "
,
16
,
elapsed_time
,
result
);
timer
.
reset
();
#endif
{
fill_memory
<
pointer_view
>
fm
(
pointers
,
num_chunks
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"fill chunks: "
,
elapsed_time
);
timer
.
reset
();
#endif
{
sum_memory
<
pointer_view
>
sm
(
pointers
,
num_chunks
,
result
);
}
execution_space
::
fence
();
#ifdef TESTMEMORYPOOL_PRINT
elapsed_time
=
timer
.
seconds
();
print_results
(
"sum chunks: "
,
16
,
elapsed_time
,
result
);
#endif
if
(
result
!=
(
num_chunks
*
(
num_chunks
-
1
)
)
/
2
)
{
std
::
cerr
<<
"Invalid sum value in memory."
<<
std
::
endl
;
return_val
=
false
;
}
#ifdef TESTMEMORYPOOL_PRINT
timer
.
reset
();
#endif
{
deallocate_memory
<
pointer_view
,
pool_memory_space
>
dm
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"deallocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
#endif
return
return_val
;
}
template
<
typename
T
>
T
smallest_power2_ge
(
T
val
)
{
// Find the most significant nonzero bit.
int
first_nonzero_bit
=
Kokkos
::
Impl
::
bit_scan_reverse
(
val
);
// If val is an integral power of 2, ceil( log2(val) ) is equal to the
// most significant nonzero bit. Otherwise, you need to add 1.
int
lg2_size
=
first_nonzero_bit
+
!
Kokkos
::
Impl
::
is_integral_power_of_two
(
val
);
return
T
(
1
)
<<
T
(
lg2_size
);
}
// This test makes allocation requests for multiple sizes and interleaves
// allocation and deallocation.
//
// There are 3 phases. The first phase does only allocations to build up a
// working state for the allocator. The second phase interleaves allocations
// and deletions. The third phase does only deallocations to undo all the
// allocations from the first phase. By building first to a working state,
// allocations and deallocations can happen in any order for the second phase.
// Each phase performs on multiple chunk sizes.
template
<
class
Device
>
void
test_mempool2
(
unsigned
base_chunk_size
,
size_t
num_chunk_sizes
,
size_t
phase1_size
,
size_t
phase2_size
)
{
#ifdef TESTMEMORYPOOL_PRINT
typedef
typename
Device
::
execution_space
execution_space
;
#endif
typedef
typename
Device
::
memory_space
memory_space
;
typedef
Device
device_type
;
typedef
Kokkos
::
View
<
unsigned
*
,
device_type
>
work_view
;
typedef
Kokkos
::
View
<
size_t
,
device_type
>
scalar_view
;
typedef
Kokkos
::
View
<
pointer_obj2
*
,
device_type
>
pointer_view
;
typedef
Kokkos
::
Experimental
::
MemoryPool
<
device_type
>
pool_memory_space
;
enum
{
MIN_CHUNK_SIZE
=
64
,
MIN_BASE_CHUNK_SIZE
=
MIN_CHUNK_SIZE
/
2
+
1
};
// Make sure the base chunk size is at least MIN_BASE_CHUNK_SIZE bytes, so
// all the different chunk sizes translate to different block sizes for the
// allocator.
if
(
base_chunk_size
<
MIN_BASE_CHUNK_SIZE
)
{
base_chunk_size
=
MIN_BASE_CHUNK_SIZE
;
}
// Get the smallest power of 2 >= the base chunk size. The size must be
// >= MIN_CHUNK_SIZE, though.
unsigned
ceil_base_chunk_size
=
smallest_power2_ge
(
base_chunk_size
);
if
(
ceil_base_chunk_size
<
MIN_CHUNK_SIZE
)
{
ceil_base_chunk_size
=
MIN_CHUNK_SIZE
;
}
// Make sure the phase 1 size is multiples of num_chunk_sizes.
phase1_size
=
(
(
phase1_size
+
num_chunk_sizes
-
1
)
/
num_chunk_sizes
)
*
num_chunk_sizes
;
// Make sure the phase 2 size is multiples of (2 * num_chunk_sizes).
phase2_size
=
(
(
phase2_size
+
2
*
num_chunk_sizes
-
1
)
/
(
2
*
num_chunk_sizes
)
)
*
2
*
num_chunk_sizes
;
// The phase2 size must be <= twice the phase1 size so that deallocations
// can't happen before allocations.
if
(
phase2_size
>
2
*
phase1_size
)
phase2_size
=
2
*
phase1_size
;
size_t
phase3_size
=
phase1_size
;
size_t
half_phase2_size
=
phase2_size
/
2
;
// Each entry in the work views has the following format. The least
// significant bit indicates allocation (0) vs. deallocation (1). For
// allocation, the other bits indicate the desired allocation size.
// Initialize the phase 1 work view with an equal number of allocations for
// each chunk size.
work_view
phase1_work
(
"Phase 1 Work"
,
phase1_size
);
typename
work_view
::
HostMirror
host_phase1_work
=
create_mirror_view
(
phase1_work
);
size_t
inner_size
=
phase1_size
/
num_chunk_sizes
;
unsigned
chunk_size
=
base_chunk_size
;
for
(
size_t
i
=
0
;
i
<
num_chunk_sizes
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inner_size
;
++
j
)
{
host_phase1_work
[
i
*
inner_size
+
j
]
=
chunk_size
<<
1
;
}
chunk_size
*=
2
;
}
std
::
random_shuffle
(
host_phase1_work
.
ptr_on_device
(),
host_phase1_work
.
ptr_on_device
()
+
phase1_size
);
deep_copy
(
phase1_work
,
host_phase1_work
);
// Initialize the phase 2 work view with half allocations and half
// deallocations with an equal number of allocations for each chunk size.
work_view
phase2_work
(
"Phase 2 Work"
,
phase2_size
);
typename
work_view
::
HostMirror
host_phase2_work
=
create_mirror_view
(
phase2_work
);
inner_size
=
half_phase2_size
/
num_chunk_sizes
;
chunk_size
=
base_chunk_size
;
for
(
size_t
i
=
0
;
i
<
num_chunk_sizes
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inner_size
;
++
j
)
{
host_phase2_work
[
i
*
inner_size
+
j
]
=
chunk_size
<<
1
;
}
chunk_size
*=
2
;
}
for
(
size_t
i
=
half_phase2_size
;
i
<
phase2_size
;
++
i
)
{
host_phase2_work
[
i
]
=
1
;
}
std
::
random_shuffle
(
host_phase2_work
.
ptr_on_device
(),
host_phase2_work
.
ptr_on_device
()
+
phase2_size
);
deep_copy
(
phase2_work
,
host_phase2_work
);
// Initialize the phase 3 work view with all deallocations.
work_view
phase3_work
(
"Phase 3 Work"
,
phase3_size
);
typename
work_view
::
HostMirror
host_phase3_work
=
create_mirror_view
(
phase3_work
);
inner_size
=
phase3_size
/
num_chunk_sizes
;
for
(
size_t
i
=
0
;
i
<
phase3_size
;
++
i
)
host_phase3_work
[
i
]
=
1
;
deep_copy
(
phase3_work
,
host_phase3_work
);
// Calculate the amount of memory needed for the allocator. We need to know
// the number of superblocks required for each chunk size and use that to
// calculate the amount of memory for each chunk size.
size_t
lg_sb_size
=
18
;
size_t
sb_size
=
1
<<
lg_sb_size
;
size_t
total_size
=
0
;
size_t
allocs_per_size
=
phase1_size
/
num_chunk_sizes
+
half_phase2_size
/
num_chunk_sizes
;
chunk_size
=
ceil_base_chunk_size
;
for
(
size_t
i
=
0
;
i
<
num_chunk_sizes
;
++
i
)
{
size_t
my_size
=
allocs_per_size
*
chunk_size
;
total_size
+=
(
my_size
+
sb_size
-
1
)
/
sb_size
*
sb_size
;
chunk_size
*=
2
;
}
// Declare the queue to hold the records for allocated memory. An allocation
// adds a record to the back of the queue, and a deallocation removes a
// record from the front of the queue.
size_t
num_allocations
=
phase1_size
+
half_phase2_size
;
scalar_view
ptrs_front
(
"Pointers front"
);
scalar_view
ptrs_back
(
"Pointers back"
);
pointer_view
pointers
(
"pointers"
,
num_allocations
);
#ifdef TESTMEMORYPOOL_PRINT
printf
(
"
\n
*** test_mempool2() ***
\n
"
);
printf
(
" num_chunk_sizes: %12zu
\n
"
,
num_chunk_sizes
);
printf
(
" base_chunk_size: %12u
\n
"
,
base_chunk_size
);
printf
(
" ceil_base_chunk_size: %12u
\n
"
,
ceil_base_chunk_size
);
printf
(
" phase1_size: %12zu
\n
"
,
phase1_size
);
printf
(
" phase2_size: %12zu
\n
"
,
phase2_size
);
printf
(
" phase3_size: %12zu
\n
"
,
phase3_size
);
printf
(
" allocs_per_size: %12zu
\n
"
,
allocs_per_size
);
printf
(
" num_allocations: %12zu
\n
"
,
num_allocations
);
printf
(
" total_size: %12zu
\n
"
,
total_size
);
fflush
(
stdout
);
double
elapsed_time
=
0
;
Kokkos
::
Timer
timer
;
#endif
pool_memory_space
mempool
(
memory_space
(),
total_size
*
1.2
,
lg_sb_size
);
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"initialize mempool: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_deallocate_memory
<
work_view
,
pointer_view
,
scalar_view
,
pool_memory_space
>
adm
(
phase1_work
,
phase1_size
,
pointers
,
ptrs_front
,
ptrs_back
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"phase1: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_deallocate_memory
<
work_view
,
pointer_view
,
scalar_view
,
pool_memory_space
>
adm
(
phase2_work
,
phase2_size
,
pointers
,
ptrs_front
,
ptrs_back
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"phase2: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_deallocate_memory
<
work_view
,
pointer_view
,
scalar_view
,
pool_memory_space
>
adm
(
phase3_work
,
phase3_size
,
pointers
,
ptrs_front
,
ptrs_back
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"phase3: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
#endif
}
// Tests for correct behavior when the allocator is out of memory.
template
<
class
Device
>
void
test_memory_exhaustion
()
{
#ifdef TESTMEMORYPOOL_PRINT
typedef
typename
Device
::
execution_space
execution_space
;
#endif
typedef
typename
Device
::
memory_space
memory_space
;
typedef
Device
device_type
;
typedef
Kokkos
::
View
<
pointer_obj
*
,
device_type
>
pointer_view
;
typedef
Kokkos
::
Experimental
::
MemoryPool
<
device_type
>
pool_memory_space
;
// The allocator will have a single superblock, and allocations will all be
// of the same chunk size. The allocation loop will attempt to allocate
// twice the number of chunks as are available in the allocator. The
// deallocation loop will only free the successfully allocated chunks.
size_t
chunk_size
=
128
;
size_t
num_chunks
=
128
;
size_t
half_num_chunks
=
num_chunks
/
2
;
size_t
superblock_size
=
chunk_size
*
half_num_chunks
;
size_t
lg_superblock_size
=
Kokkos
::
Impl
::
integral_power_of_two
(
superblock_size
);
pointer_view
pointers
(
"pointers"
,
num_chunks
);
#ifdef TESTMEMORYPOOL_PRINT
std
::
cout
<<
"
\n
*** test_memory_exhaustion() ***"
<<
std
::
endl
;
double
elapsed_time
=
0
;
Kokkos
::
Timer
timer
;
#endif
pool_memory_space
mempool
(
memory_space
(),
superblock_size
,
lg_superblock_size
);
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"initialize mempool: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
allocate_memory
<
pointer_view
,
pool_memory_space
>
am
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"allocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
timer
.
reset
();
#endif
{
// In parallel, the allocations that succeeded were not put contiguously
// into the pointers View. The whole View can still be looped over and
// have deallocate called because deallocate will just do nothing for NULL
// pointers.
deallocate_memory
<
pointer_view
,
pool_memory_space
>
dm
(
pointers
,
num_chunks
,
chunk_size
,
mempool
);
}
#ifdef TESTMEMORYPOOL_PRINT
execution_space
::
fence
();
elapsed_time
=
timer
.
seconds
();
print_results
(
"deallocate chunks: "
,
elapsed_time
);
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool
.
print_status
();
#endif
#endif
}
}
#ifdef TESTMEMORYPOOL_PRINT
#undef TESTMEMORYPOOL_PRINT
#endif
#ifdef TESTMEMORYPOOL_PRINT_STATUS
#undef TESTMEMORYPOOL_PRINT_STATUS
#endif
#ifdef STRIDE
#undef STRIDE
#endif
#endif
Event Timeline
Log In to Comment