Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F121847089
Kokkos_CudaSpace.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Jul 14, 09:23
Size
22 KB
Mime Type
text/x-c
Expires
Wed, Jul 16, 09:23 (2 d)
Engine
blob
Format
Raw Data
Handle
27400785
Attached To
rLAMMPS lammps
Kokkos_CudaSpace.cpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
namespace
Impl
{
DeepCopy
<
CudaSpace
,
CudaSpace
>::
DeepCopy
(
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpy
(
dst
,
src
,
n
,
cudaMemcpyDefault
)
);
}
DeepCopy
<
CudaSpace
,
CudaSpace
>::
DeepCopy
(
const
Cuda
&
instance
,
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpyAsync
(
dst
,
src
,
n
,
cudaMemcpyDefault
,
instance
.
cuda_stream
()
)
);
}
DeepCopy
<
HostSpace
,
CudaSpace
>::
DeepCopy
(
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpy
(
dst
,
src
,
n
,
cudaMemcpyDefault
)
);
}
DeepCopy
<
HostSpace
,
CudaSpace
>::
DeepCopy
(
const
Cuda
&
instance
,
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpyAsync
(
dst
,
src
,
n
,
cudaMemcpyDefault
,
instance
.
cuda_stream
()
)
);
}
DeepCopy
<
CudaSpace
,
HostSpace
>::
DeepCopy
(
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpy
(
dst
,
src
,
n
,
cudaMemcpyDefault
)
);
}
DeepCopy
<
CudaSpace
,
HostSpace
>::
DeepCopy
(
const
Cuda
&
instance
,
void
*
dst
,
const
void
*
src
,
size_t
n
)
{
CUDA_SAFE_CALL
(
cudaMemcpyAsync
(
dst
,
src
,
n
,
cudaMemcpyDefault
,
instance
.
cuda_stream
()
)
);
}
}
// namespace Impl
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
namespace
{
void
texture_object_attach_impl
(
Impl
::
AllocationTracker
const
&
tracker
,
unsigned
type_size
,
::
cudaChannelFormatDesc
const
&
desc
)
{
enum
{
TEXTURE_BOUND_1D
=
2u
<<
27
};
if
(
tracker
.
attribute
()
==
NULL
)
{
// check for correct allocator
const
bool
ok_alloc
=
tracker
.
allocator
()
->
support_texture_binding
();
const
bool
ok_count
=
(
tracker
.
alloc_size
()
/
type_size
)
<
TEXTURE_BOUND_1D
;
if
(
ok_alloc
&&
ok_count
)
{
Impl
::
TextureAttribute
*
attr
=
new
Impl
::
TextureAttribute
(
tracker
.
alloc_ptr
(),
tracker
.
alloc_size
(),
desc
);
tracker
.
set_attribute
(
attr
);
}
else
{
std
::
ostringstream
oss
;
oss
<<
"Error: Cannot attach texture object"
;
if
(
!
ok_alloc
)
{
oss
<<
", incompatabile allocator "
<<
tracker
.
allocator
()
->
name
();
}
if
(
!
ok_count
)
{
oss
<<
", array "
<<
tracker
.
label
()
<<
" too large"
;
}
oss
<<
"."
;
Kokkos
::
Impl
::
throw_runtime_exception
(
oss
.
str
()
);
}
}
if
(
NULL
==
dynamic_cast
<
Impl
::
TextureAttribute
*>
(
tracker
.
attribute
())
)
{
std
::
ostringstream
oss
;
oss
<<
"Error: Allocation "
<<
tracker
.
label
()
<<
" already has an attribute attached."
;
Kokkos
::
Impl
::
throw_runtime_exception
(
oss
.
str
()
);
}
}
}
// unnamed namespace
/*--------------------------------------------------------------------------*/
Impl
::
AllocationTracker
CudaSpace
::
allocate_and_track
(
const
std
::
string
&
label
,
const
size_t
size
)
{
return
Impl
::
AllocationTracker
(
allocator
(),
size
,
label
);
}
void
CudaSpace
::
texture_object_attach
(
Impl
::
AllocationTracker
const
&
tracker
,
unsigned
type_size
,
::
cudaChannelFormatDesc
const
&
desc
)
{
texture_object_attach_impl
(
tracker
,
type_size
,
desc
);
}
void
CudaSpace
::
access_error
()
{
const
std
::
string
msg
(
"Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space"
);
Kokkos
::
Impl
::
throw_runtime_exception
(
msg
);
}
void
CudaSpace
::
access_error
(
const
void
*
const
)
{
const
std
::
string
msg
(
"Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space"
);
Kokkos
::
Impl
::
throw_runtime_exception
(
msg
);
}
/*--------------------------------------------------------------------------*/
Impl
::
AllocationTracker
CudaUVMSpace
::
allocate_and_track
(
const
std
::
string
&
label
,
const
size_t
size
)
{
return
Impl
::
AllocationTracker
(
allocator
(),
size
,
label
);
}
void
CudaUVMSpace
::
texture_object_attach
(
Impl
::
AllocationTracker
const
&
tracker
,
unsigned
type_size
,
::
cudaChannelFormatDesc
const
&
desc
)
{
texture_object_attach_impl
(
tracker
,
type_size
,
desc
);
}
bool
CudaUVMSpace
::
available
()
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
enum
{
UVM_available
=
true
};
#else
enum
{
UVM_available
=
false
};
#endif
return
UVM_available
;
}
/*--------------------------------------------------------------------------*/
Impl
::
AllocationTracker
CudaHostPinnedSpace
::
allocate_and_track
(
const
std
::
string
&
label
,
const
size_t
size
)
{
return
Impl
::
AllocationTracker
(
allocator
(),
size
,
label
);
}
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
CudaSpace
::
CudaSpace
()
:
m_device
(
Kokkos
::
Cuda
().
cuda_device
()
)
{
}
CudaUVMSpace
::
CudaUVMSpace
()
:
m_device
(
Kokkos
::
Cuda
().
cuda_device
()
)
{
}
CudaHostPinnedSpace
::
CudaHostPinnedSpace
()
{
}
void
*
CudaSpace
::
allocate
(
const
size_t
arg_alloc_size
)
const
{
void
*
ptr
=
NULL
;
CUDA_SAFE_CALL
(
cudaMalloc
(
&
ptr
,
arg_alloc_size
)
);
return
ptr
;
}
void
*
CudaUVMSpace
::
allocate
(
const
size_t
arg_alloc_size
)
const
{
void
*
ptr
=
NULL
;
CUDA_SAFE_CALL
(
cudaMallocManaged
(
&
ptr
,
arg_alloc_size
,
cudaMemAttachGlobal
)
);
return
ptr
;
}
void
*
CudaHostPinnedSpace
::
allocate
(
const
size_t
arg_alloc_size
)
const
{
void
*
ptr
=
NULL
;
CUDA_SAFE_CALL
(
cudaHostAlloc
(
&
ptr
,
arg_alloc_size
,
cudaHostAllocDefault
)
);
return
ptr
;
}
void
CudaSpace
::
deallocate
(
void
*
const
arg_alloc_ptr
,
const
size_t
/* arg_alloc_size */
)
const
{
try
{
CUDA_SAFE_CALL
(
cudaFree
(
arg_alloc_ptr
)
);
}
catch
(...)
{}
}
void
CudaUVMSpace
::
deallocate
(
void
*
const
arg_alloc_ptr
,
const
size_t
/* arg_alloc_size */
)
const
{
try
{
CUDA_SAFE_CALL
(
cudaFree
(
arg_alloc_ptr
)
);
}
catch
(...)
{}
}
void
CudaHostPinnedSpace
::
deallocate
(
void
*
const
arg_alloc_ptr
,
const
size_t
/* arg_alloc_size */
)
const
{
try
{
CUDA_SAFE_CALL
(
cudaFreeHost
(
arg_alloc_ptr
)
);
}
catch
(...)
{}
}
}
// namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace
Kokkos
{
namespace
Experimental
{
namespace
Impl
{
SharedAllocationRecord
<
void
,
void
>
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
s_root_record
;
SharedAllocationRecord
<
void
,
void
>
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
s_root_record
;
SharedAllocationRecord
<
void
,
void
>
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
s_root_record
;
::
cudaTextureObject_t
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
attach_texture_object
(
const
unsigned
sizeof_alias
,
void
*
const
alloc_ptr
,
size_t
const
alloc_size
)
{
// Only valid for 300 <= __CUDA_ARCH__
// otherwise return zero.
::
cudaTextureObject_t
tex_obj
;
struct
cudaResourceDesc
resDesc
;
struct
cudaTextureDesc
texDesc
;
memset
(
&
resDesc
,
0
,
sizeof
(
resDesc
)
);
memset
(
&
texDesc
,
0
,
sizeof
(
texDesc
)
);
resDesc
.
resType
=
cudaResourceTypeLinear
;
resDesc
.
res
.
linear
.
desc
=
(
sizeof_alias
==
4
?
cudaCreateChannelDesc
<
int
>
()
:
(
sizeof_alias
==
8
?
cudaCreateChannelDesc
<
::
int2
>
()
:
/* sizeof_alias == 16 */
cudaCreateChannelDesc
<
::
int4
>
()
)
);
resDesc
.
res
.
linear
.
sizeInBytes
=
alloc_size
;
resDesc
.
res
.
linear
.
devPtr
=
alloc_ptr
;
CUDA_SAFE_CALL
(
cudaCreateTextureObject
(
&
tex_obj
,
&
resDesc
,
&
texDesc
,
NULL
)
);
return
tex_obj
;
}
std
::
string
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
get_label
()
const
{
SharedAllocationHeader
header
;
Kokkos
::
Impl
::
DeepCopy
<
Kokkos
::
HostSpace
,
Kokkos
::
CudaSpace
>
(
&
header
,
RecordBase
::
head
()
,
sizeof
(
SharedAllocationHeader
)
);
return
std
::
string
(
header
.
m_label
);
}
std
::
string
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
get_label
()
const
{
return
std
::
string
(
RecordBase
::
head
()
->
m_label
);
}
std
::
string
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
get_label
()
const
{
return
std
::
string
(
RecordBase
::
head
()
->
m_label
);
}
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
allocate
(
const
Kokkos
::
CudaSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
)
{
return
new
SharedAllocationRecord
(
arg_space
,
arg_label
,
arg_alloc_size
);
}
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
allocate
(
const
Kokkos
::
CudaUVMSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
)
{
return
new
SharedAllocationRecord
(
arg_space
,
arg_label
,
arg_alloc_size
);
}
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
allocate
(
const
Kokkos
::
CudaHostPinnedSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
)
{
return
new
SharedAllocationRecord
(
arg_space
,
arg_label
,
arg_alloc_size
);
}
void
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
deallocate
(
SharedAllocationRecord
<
void
,
void
>
*
arg_rec
)
{
delete
static_cast
<
SharedAllocationRecord
*>
(
arg_rec
);
}
void
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
deallocate
(
SharedAllocationRecord
<
void
,
void
>
*
arg_rec
)
{
delete
static_cast
<
SharedAllocationRecord
*>
(
arg_rec
);
}
void
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
deallocate
(
SharedAllocationRecord
<
void
,
void
>
*
arg_rec
)
{
delete
static_cast
<
SharedAllocationRecord
*>
(
arg_rec
);
}
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
~
SharedAllocationRecord
()
{
m_space
.
deallocate
(
SharedAllocationRecord
<
void
,
void
>::
m_alloc_ptr
,
SharedAllocationRecord
<
void
,
void
>::
m_alloc_size
);
}
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
~
SharedAllocationRecord
()
{
m_space
.
deallocate
(
SharedAllocationRecord
<
void
,
void
>::
m_alloc_ptr
,
SharedAllocationRecord
<
void
,
void
>::
m_alloc_size
);
}
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
~
SharedAllocationRecord
()
{
m_space
.
deallocate
(
SharedAllocationRecord
<
void
,
void
>::
m_alloc_ptr
,
SharedAllocationRecord
<
void
,
void
>::
m_alloc_size
);
}
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
SharedAllocationRecord
(
const
Kokkos
::
CudaSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
,
const
SharedAllocationRecord
<
void
,
void
>::
function_type
arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
:
SharedAllocationRecord
<
void
,
void
>
(
&
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
s_root_record
,
reinterpret_cast
<
SharedAllocationHeader
*>
(
arg_space
.
allocate
(
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
)
)
,
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
,
arg_dealloc
)
,
m_tex_obj
(
0
)
,
m_space
(
arg_space
)
{
SharedAllocationHeader
header
;
// Fill in the Header information
header
.
m_record
=
static_cast
<
SharedAllocationRecord
<
void
,
void
>
*
>
(
this
);
strncpy
(
header
.
m_label
,
arg_label
.
c_str
()
,
SharedAllocationHeader
::
maximum_label_length
);
// Copy to device memory
Kokkos
::
Impl
::
DeepCopy
<
CudaSpace
,
HostSpace
>::
DeepCopy
(
RecordBase
::
m_alloc_ptr
,
&
header
,
sizeof
(
SharedAllocationHeader
)
);
}
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
SharedAllocationRecord
(
const
Kokkos
::
CudaUVMSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
,
const
SharedAllocationRecord
<
void
,
void
>::
function_type
arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
:
SharedAllocationRecord
<
void
,
void
>
(
&
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
s_root_record
,
reinterpret_cast
<
SharedAllocationHeader
*>
(
arg_space
.
allocate
(
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
)
)
,
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
,
arg_dealloc
)
,
m_tex_obj
(
0
)
,
m_space
(
arg_space
)
{
// Fill in the Header information, directly accessible via UVM
RecordBase
::
m_alloc_ptr
->
m_record
=
this
;
strncpy
(
RecordBase
::
m_alloc_ptr
->
m_label
,
arg_label
.
c_str
()
,
SharedAllocationHeader
::
maximum_label_length
);
}
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
SharedAllocationRecord
(
const
Kokkos
::
CudaHostPinnedSpace
&
arg_space
,
const
std
::
string
&
arg_label
,
const
size_t
arg_alloc_size
,
const
SharedAllocationRecord
<
void
,
void
>::
function_type
arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
:
SharedAllocationRecord
<
void
,
void
>
(
&
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
s_root_record
,
reinterpret_cast
<
SharedAllocationHeader
*>
(
arg_space
.
allocate
(
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
)
)
,
sizeof
(
SharedAllocationHeader
)
+
arg_alloc_size
,
arg_dealloc
)
,
m_space
(
arg_space
)
{
// Fill in the Header information, directly accessible via UVM
RecordBase
::
m_alloc_ptr
->
m_record
=
this
;
strncpy
(
RecordBase
::
m_alloc_ptr
->
m_label
,
arg_label
.
c_str
()
,
SharedAllocationHeader
::
maximum_label_length
);
}
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
get_record
(
void
*
alloc_ptr
)
{
using
Header
=
SharedAllocationHeader
;
using
RecordBase
=
SharedAllocationRecord
<
void
,
void
>
;
using
RecordCuda
=
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>
;
#if 0
// Copy the header from the allocation
SharedAllocationHeader head ;
SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );
if ( record->m_alloc_ptr != head_cuda ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#else
// Iterate the list to search for the record among all allocations
// requires obtaining the root of the list and then locking the list.
RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
if ( record == 0 ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#endif
return
record
;
}
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
get_record
(
void
*
alloc_ptr
)
{
using
Header
=
SharedAllocationHeader
;
using
RecordCuda
=
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>
;
Header
*
const
h
=
reinterpret_cast
<
Header
*
>
(
alloc_ptr
)
-
1
;
if
(
h
->
m_record
->
m_alloc_ptr
!=
h
)
{
Kokkos
::
Impl
::
throw_runtime_exception
(
std
::
string
(
"Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR"
)
);
}
return
static_cast
<
RecordCuda
*
>
(
h
->
m_record
);
}
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>
*
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
get_record
(
void
*
alloc_ptr
)
{
using
Header
=
SharedAllocationHeader
;
using
RecordCuda
=
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>
;
Header
*
const
h
=
reinterpret_cast
<
Header
*
>
(
alloc_ptr
)
-
1
;
if
(
h
->
m_record
->
m_alloc_ptr
!=
h
)
{
Kokkos
::
Impl
::
throw_runtime_exception
(
std
::
string
(
"Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR"
)
);
}
return
static_cast
<
RecordCuda
*
>
(
h
->
m_record
);
}
// Iterate records to print orphaned memory ...
void
SharedAllocationRecord
<
Kokkos
::
CudaSpace
,
void
>::
print_records
(
std
::
ostream
&
s
,
const
Kokkos
::
CudaSpace
&
space
,
bool
detail
)
{
SharedAllocationRecord
<
void
,
void
>
*
r
=
&
s_root_record
;
char
buffer
[
256
]
;
SharedAllocationHeader
head
;
if
(
detail
)
{
do
{
if
(
r
->
m_alloc_ptr
)
{
Kokkos
::
Impl
::
DeepCopy
<
HostSpace
,
CudaSpace
>::
DeepCopy
(
&
head
,
r
->
m_alloc_ptr
,
sizeof
(
SharedAllocationHeader
)
);
}
else
{
head
.
m_label
[
0
]
=
0
;
}
snprintf
(
buffer
,
256
,
"Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s
\n
"
,
reinterpret_cast
<
unsigned
long
>
(
r
)
,
reinterpret_cast
<
unsigned
long
>
(
r
->
m_prev
)
,
reinterpret_cast
<
unsigned
long
>
(
r
->
m_next
)
,
reinterpret_cast
<
unsigned
long
>
(
r
->
m_alloc_ptr
)
,
r
->
m_alloc_size
,
r
->
m_count
,
reinterpret_cast
<
unsigned
long
>
(
r
->
m_dealloc
)
,
head
.
m_label
);
std
::
cout
<<
buffer
;
r
=
r
->
m_next
;
}
while
(
r
!=
&
s_root_record
);
}
else
{
do
{
if
(
r
->
m_alloc_ptr
)
{
Kokkos
::
Impl
::
DeepCopy
<
HostSpace
,
CudaSpace
>::
DeepCopy
(
&
head
,
r
->
m_alloc_ptr
,
sizeof
(
SharedAllocationHeader
)
);
snprintf
(
buffer
,
256
,
"Cuda [ 0x%.12lx + %ld ] %s
\n
"
,
reinterpret_cast
<
unsigned
long
>
(
r
->
data
()
)
,
r
->
size
()
,
head
.
m_label
);
}
else
{
snprintf
(
buffer
,
256
,
"Cuda [ 0 + 0 ]
\n
"
);
}
std
::
cout
<<
buffer
;
r
=
r
->
m_next
;
}
while
(
r
!=
&
s_root_record
);
}
}
void
SharedAllocationRecord
<
Kokkos
::
CudaUVMSpace
,
void
>::
print_records
(
std
::
ostream
&
s
,
const
Kokkos
::
CudaUVMSpace
&
space
,
bool
detail
)
{
SharedAllocationRecord
<
void
,
void
>::
print_host_accessible_records
(
s
,
"CudaUVM"
,
&
s_root_record
,
detail
);
}
void
SharedAllocationRecord
<
Kokkos
::
CudaHostPinnedSpace
,
void
>::
print_records
(
std
::
ostream
&
s
,
const
Kokkos
::
CudaHostPinnedSpace
&
space
,
bool
detail
)
{
SharedAllocationRecord
<
void
,
void
>::
print_host_accessible_records
(
s
,
"CudaHostPinned"
,
&
s_root_record
,
detail
);
}
}
// namespace Impl
}
// namespace Experimental
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
namespace
{
__global__
void
init_lock_array_kernel
()
{
unsigned
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
i
<
CUDA_SPACE_ATOMIC_MASK
+
1
)
kokkos_impl_cuda_atomic_lock_array
[
i
]
=
0
;
}
}
namespace
Impl
{
int
*
lock_array_cuda_space_ptr
(
bool
deallocate
)
{
static
int
*
ptr
=
NULL
;
if
(
deallocate
)
{
cudaFree
(
ptr
);
ptr
=
NULL
;
}
if
(
ptr
==
NULL
&&
!
deallocate
)
cudaMalloc
(
&
ptr
,
sizeof
(
int
)
*
(
CUDA_SPACE_ATOMIC_MASK
+
1
));
return
ptr
;
}
void
init_lock_array_cuda_space
()
{
int
is_initialized
=
0
;
if
(
!
is_initialized
)
{
int
*
lock_array_ptr
=
lock_array_cuda_space_ptr
();
cudaMemcpyToSymbol
(
kokkos_impl_cuda_atomic_lock_array
,
&
lock_array_ptr
,
sizeof
(
int
*
)
);
init_lock_array_kernel
<<<
(
CUDA_SPACE_ATOMIC_MASK
+
255
)
/
256
,
256
>>>
();
}
}
}
}
#endif
// KOKKOS_HAVE_CUDA
Event Timeline
Log In to Comment