Kokkos_CudaSpace.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Mon, Jul 14, 09:23

Kokkos_CudaSpace.cpp
View Options

	/*
	//@HEADER
	// ************************************************************************
	//
	// Kokkos v. 2.0
	// Copyright (2014) Sandia Corporation
	//
	// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
	// the U.S. Government retains certain rights in this software.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// 1. Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the Corporation nor the names of the
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
	// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	//
	// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
	//
	// ************************************************************************
	//@HEADER
	*/

	#include <stdlib.h>
	#include <iostream>
	#include <sstream>
	#include <stdexcept>
	#include <Kokkos_Macros.hpp>

	/* only compile this file if CUDA is enabled for Kokkos */
	#ifdef KOKKOS_HAVE_CUDA

	#include <Kokkos_Cuda.hpp>
	#include <Kokkos_CudaSpace.hpp>

	#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
	#include <Cuda/Kokkos_Cuda_Internal.hpp>
	#include <impl/Kokkos_Error.hpp>

	/--------------------------------------------------------------------------/
	/--------------------------------------------------------------------------/

	namespace Kokkos {
	namespace Impl {

	DeepCopy<CudaSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }

	DeepCopy<CudaSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }

	DeepCopy<HostSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }

	DeepCopy<HostSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }

	DeepCopy<CudaSpace,HostSpace>::DeepCopy( void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }

	DeepCopy<CudaSpace,HostSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
	{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }

	} // namespace Impl
	} // namespace Kokkos

	/--------------------------------------------------------------------------/
	/--------------------------------------------------------------------------/


	namespace Kokkos {

	namespace {

	void texture_object_attach_impl( Impl::AllocationTracker const & tracker
	, unsigned type_size
	, ::cudaChannelFormatDesc const & desc
	)
	{
	enum { TEXTURE_BOUND_1D = 2u << 27 };

	if ( tracker.attribute() == NULL ) {
	// check for correct allocator
	const bool ok_alloc = tracker.allocator()->support_texture_binding();

	const bool ok_count = (tracker.alloc_size() / type_size) < TEXTURE_BOUND_1D;

	if (ok_alloc && ok_count) {
	Impl::TextureAttribute * attr = new Impl::TextureAttribute( tracker.alloc_ptr(), tracker.alloc_size(), desc );
	tracker.set_attribute( attr );
	}
	else {
	std::ostringstream oss;
	oss << "Error: Cannot attach texture object";
	if (!ok_alloc) {
	oss << ", incompatabile allocator " << tracker.allocator()->name();
	}
	if (!ok_count) {
	oss << ", array " << tracker.label() << " too large";
	}
	oss << ".";
	Kokkos::Impl::throw_runtime_exception( oss.str() );
	}
	}

	if ( NULL == dynamic_cast<Impl::TextureAttribute *>(tracker.attribute()) ) {
	std::ostringstream oss;
	oss << "Error: Allocation " << tracker.label() << " already has an attribute attached.";
	Kokkos::Impl::throw_runtime_exception( oss.str() );
	}

	}

	} // unnamed namespace

	/--------------------------------------------------------------------------/

	Impl::AllocationTracker CudaSpace::allocate_and_track( const std::string & label, const size_t size )
	{
	return Impl::AllocationTracker( allocator(), size, label);
	}

	void CudaSpace::texture_object_attach( Impl::AllocationTracker const & tracker
	, unsigned type_size
	, ::cudaChannelFormatDesc const & desc
	)
	{
	texture_object_attach_impl( tracker, type_size, desc );
	}

	void CudaSpace::access_error()
	{
	const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
	Kokkos::Impl::throw_runtime_exception( msg );
	}

	void CudaSpace::access_error( const void * const )
	{
	const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
	Kokkos::Impl::throw_runtime_exception( msg );
	}

	/--------------------------------------------------------------------------/

	Impl::AllocationTracker CudaUVMSpace::allocate_and_track( const std::string & label, const size_t size )
	{
	return Impl::AllocationTracker( allocator(), size, label);
	}

	void CudaUVMSpace::texture_object_attach( Impl::AllocationTracker const & tracker
	, unsigned type_size
	, ::cudaChannelFormatDesc const & desc
	)
	{
	texture_object_attach_impl( tracker, type_size, desc );
	}

	bool CudaUVMSpace::available()
	{
	#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
	enum { UVM_available = true };
	#else
	enum { UVM_available = false };
	#endif
	return UVM_available;
	}

	/--------------------------------------------------------------------------/

	Impl::AllocationTracker CudaHostPinnedSpace::allocate_and_track( const std::string & label, const size_t size )
	{
	return Impl::AllocationTracker( allocator(), size, label);
	}

	} // namespace Kokkos

	/--------------------------------------------------------------------------/
	/--------------------------------------------------------------------------/

	namespace Kokkos {

	CudaSpace::CudaSpace()
	: m_device( Kokkos::Cuda().cuda_device() )
	{
	}

	CudaUVMSpace::CudaUVMSpace()
	: m_device( Kokkos::Cuda().cuda_device() )
	{
	}

	CudaHostPinnedSpace::CudaHostPinnedSpace()
	{
	}

	void * CudaSpace::allocate( const size_t arg_alloc_size ) const
	{
	void * ptr = NULL;

	CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );

	return ptr ;
	}

	void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
	{
	void * ptr = NULL;

	CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );

	return ptr ;
	}

	void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
	{
	void * ptr = NULL;

	CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );

	return ptr ;
	}

	void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
	{
	try {
	CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
	} catch(...) {}
	}

	void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
	{
	try {
	CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
	} catch(...) {}
	}

	void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
	{
	try {
	CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) );
	} catch(...) {}
	}

	} // namespace Kokkos

	//----------------------------------------------------------------------------
	//----------------------------------------------------------------------------

	namespace Kokkos {
	namespace Experimental {
	namespace Impl {

	SharedAllocationRecord< void , void >
	SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ;

	SharedAllocationRecord< void , void >
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ;

	SharedAllocationRecord< void , void >
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ;

	::cudaTextureObject_t
	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	attach_texture_object( const unsigned sizeof_alias
	, void * const alloc_ptr
	, size_t const alloc_size )
	{
	// Only valid for 300 <= __CUDA_ARCH__
	// otherwise return zero.

	::cudaTextureObject_t tex_obj ;

	struct cudaResourceDesc resDesc ;
	struct cudaTextureDesc texDesc ;

	memset( & resDesc , 0 , sizeof(resDesc) );
	memset( & texDesc , 0 , sizeof(texDesc) );

	resDesc.resType = cudaResourceTypeLinear ;
	resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
	( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
	/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
	resDesc.res.linear.sizeInBytes = alloc_size ;
	resDesc.res.linear.devPtr = alloc_ptr ;

	CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );

	return tex_obj ;
	}

	std::string
	SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const
	{
	SharedAllocationHeader header ;

	Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );

	return std::string( header.m_label );
	}

	std::string
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const
	{
	return std::string( RecordBase::head()->m_label );
	}

	std::string
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const
	{
	return std::string( RecordBase::head()->m_label );
	}

	SharedAllocationRecord< Kokkos::CudaSpace , void > *
	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	allocate( const Kokkos::CudaSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	)
	{
	return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
	}

	SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
	allocate( const Kokkos::CudaUVMSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	)
	{
	return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
	}

	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
	allocate( const Kokkos::CudaHostPinnedSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	)
	{
	return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
	}

	void
	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	deallocate( SharedAllocationRecord< void , void > * arg_rec )
	{
	delete static_cast<SharedAllocationRecord*>(arg_rec);
	}

	void
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
	deallocate( SharedAllocationRecord< void , void > * arg_rec )
	{
	delete static_cast<SharedAllocationRecord*>(arg_rec);
	}

	void
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
	deallocate( SharedAllocationRecord< void , void > * arg_rec )
	{
	delete static_cast<SharedAllocationRecord*>(arg_rec);
	}

	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	~SharedAllocationRecord()
	{
	m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
	, SharedAllocationRecord< void , void >::m_alloc_size
	);
	}

	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
	~SharedAllocationRecord()
	{
	m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
	, SharedAllocationRecord< void , void >::m_alloc_size
	);
	}

	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
	~SharedAllocationRecord()
	{
	m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
	, SharedAllocationRecord< void , void >::m_alloc_size
	);
	}

	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	, const SharedAllocationRecord< void , void >::function_type arg_dealloc
	)
	// Pass through allocated [ SharedAllocationHeader , user_memory ]
	// Pass through deallocation function
	: SharedAllocationRecord< void , void >
	( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record
	, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
	, sizeof(SharedAllocationHeader) + arg_alloc_size
	, arg_dealloc
	)
	, m_tex_obj( 0 )
	, m_space( arg_space )
	{
	SharedAllocationHeader header ;

	// Fill in the Header information
	header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );

	strncpy( header.m_label
	, arg_label.c_str()
	, SharedAllocationHeader::maximum_label_length
	);

	// Copy to device memory
	Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
	}

	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
	SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	, const SharedAllocationRecord< void , void >::function_type arg_dealloc
	)
	// Pass through allocated [ SharedAllocationHeader , user_memory ]
	// Pass through deallocation function
	: SharedAllocationRecord< void , void >
	( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record
	, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
	, sizeof(SharedAllocationHeader) + arg_alloc_size
	, arg_dealloc
	)
	, m_tex_obj( 0 )
	, m_space( arg_space )
	{
	// Fill in the Header information, directly accessible via UVM

	RecordBase::m_alloc_ptr->m_record = this ;

	strncpy( RecordBase::m_alloc_ptr->m_label
	, arg_label.c_str()
	, SharedAllocationHeader::maximum_label_length
	);
	}

	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
	SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
	, const std::string & arg_label
	, const size_t arg_alloc_size
	, const SharedAllocationRecord< void , void >::function_type arg_dealloc
	)
	// Pass through allocated [ SharedAllocationHeader , user_memory ]
	// Pass through deallocation function
	: SharedAllocationRecord< void , void >
	( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record
	, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
	, sizeof(SharedAllocationHeader) + arg_alloc_size
	, arg_dealloc
	)
	, m_space( arg_space )
	{
	// Fill in the Header information, directly accessible via UVM

	RecordBase::m_alloc_ptr->m_record = this ;

	strncpy( RecordBase::m_alloc_ptr->m_label
	, arg_label.c_str()
	, SharedAllocationHeader::maximum_label_length
	);
	}

	SharedAllocationRecord< Kokkos::CudaSpace , void > *
	SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
	{
	using Header = SharedAllocationHeader ;
	using RecordBase = SharedAllocationRecord< void , void > ;
	using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;

	#if 0
	// Copy the header from the allocation
	SharedAllocationHeader head ;

	SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );

	Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );

	RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );

	if ( record->m_alloc_ptr != head_cuda ) {
	Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
	}

	#else

	// Iterate the list to search for the record among all allocations
	// requires obtaining the root of the list and then locking the list.

	RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );

	if ( record == 0 ) {
	Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
	}

	#endif

	return record ;
	}

	SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr )
	{
	using Header = SharedAllocationHeader ;
	using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;

	Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;

	if ( h->m_record->m_alloc_ptr != h ) {
	Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
	}

	return static_cast< RecordCuda * >( h->m_record );
	}

	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr )
	{
	using Header = SharedAllocationHeader ;
	using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ;

	Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;

	if ( h->m_record->m_alloc_ptr != h ) {
	Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
	}

	return static_cast< RecordCuda * >( h->m_record );
	}

	// Iterate records to print orphaned memory ...
	void
	SharedAllocationRecord< Kokkos::CudaSpace , void >::
	print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail )
	{
	SharedAllocationRecord< void , void > * r = & s_root_record ;

	char buffer[256] ;

	SharedAllocationHeader head ;

	if ( detail ) {
	do {
	if ( r->m_alloc_ptr ) {
	Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
	}
	else {
	head.m_label[0] = 0 ;
	}

	snprintf( buffer , 256 , "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
	, reinterpret_cast<unsigned long>( r )
	, reinterpret_cast<unsigned long>( r->m_prev )
	, reinterpret_cast<unsigned long>( r->m_next )
	, reinterpret_cast<unsigned long>( r->m_alloc_ptr )
	, r->m_alloc_size
	, r->m_count
	, reinterpret_cast<unsigned long>( r->m_dealloc )
	, head.m_label
	);
	std::cout << buffer ;
	r = r->m_next ;
	} while ( r != & s_root_record );
	}
	else {
	do {
	if ( r->m_alloc_ptr ) {

	Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );

	snprintf( buffer , 256 , "Cuda [ 0x%.12lx + %ld ] %s\n"
	, reinterpret_cast< unsigned long >( r->data() )
	, r->size()
	, head.m_label
	);
	}
	else {
	snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" );
	}
	std::cout << buffer ;
	r = r->m_next ;
	} while ( r != & s_root_record );
	}
	}

	void
	SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
	print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail )
	{
	SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail );
	}

	void
	SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
	print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail )
	{
	SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
	}

	} // namespace Impl
	} // namespace Experimental
	} // namespace Kokkos

	/--------------------------------------------------------------------------/
	/--------------------------------------------------------------------------/

	namespace Kokkos {
	namespace {
	__global__ void init_lock_array_kernel() {
	unsigned i = blockIdx.x*blockDim.x + threadIdx.x;

	if(i<CUDA_SPACE_ATOMIC_MASK+1)
	kokkos_impl_cuda_atomic_lock_array[i] = 0;
	}
	}

	namespace Impl {
	int* lock_array_cuda_space_ptr(bool deallocate) {
	static int* ptr = NULL;
	if(deallocate) {
	cudaFree(ptr);
	ptr = NULL;
	}

	if(ptr==NULL && !deallocate)
	cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1));
	return ptr;
	}

	void init_lock_array_cuda_space() {
	int is_initialized = 0;
	if(! is_initialized) {
	int* lock_array_ptr = lock_array_cuda_space_ptr();
	cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
	init_lock_array_kernel<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>();
	}
	}

	}
	}
	#endif // KOKKOS_HAVE_CUDA

Kokkos_CudaSpace.cppNo OneTemporaryActions

File Metadata

Kokkos_CudaSpace.cppView Options

Event Timeline

Kokkos_CudaSpace.cpp
No OneTemporary
Actions

Kokkos_CudaSpace.cpp
View Options