Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F79096592
Kokkos_Serial.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Aug 24, 17:56
Size
38 KB
Mime Type
text/x-c++
Expires
Mon, Aug 26, 17:56 (2 d)
Engine
blob
Format
Raw Data
Handle
20150666
Attached To
rLAMMPS lammps
Kokkos_Serial.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Serial.hpp
/// \brief Declaration and definition of Kokkos::Serial device.
#ifndef KOKKOS_SERIAL_HPP
#define KOKKOS_SERIAL_HPP
#include <cstddef>
#include <iosfwd>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <KokkosExp_MDRangePolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
namespace
Kokkos
{
/// \class Serial
/// \brief Kokkos device for non-parallel execution
///
/// A "device" represents a parallel execution model. It tells Kokkos
/// how to parallelize the execution of kernels in a parallel_for or
/// parallel_reduce. For example, the Threads device uses Pthreads or
/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language
/// extensions, and the Cuda device uses NVIDIA's CUDA programming
/// model. The Serial device executes "parallel" kernels
/// sequentially. This is useful if you really do not want to use
/// threads, or if you want to explore different combinations of MPI
/// and shared-memory parallel programming models.
class
Serial
{
public
:
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as an execution space:
typedef
Serial
execution_space
;
//! The size_type typedef best suited for this device.
typedef
HostSpace
::
size_type
size_type
;
//! This device's preferred memory space.
typedef
HostSpace
memory_space
;
//! This execution space preferred device_type
typedef
Kokkos
::
Device
<
execution_space
,
memory_space
>
device_type
;
//! This device's preferred array layout.
typedef
LayoutRight
array_layout
;
/// \brief Scratch memory space
typedef
ScratchMemorySpace
<
Kokkos
::
Serial
>
scratch_memory_space
;
//@}
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
///
/// For the Serial device, this method <i>always</i> returns false,
/// because parallel_for or parallel_reduce with the Serial device
/// always execute sequentially.
inline
static
int
in_parallel
()
{
return
false
;
}
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static
bool
sleep
();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static
bool
wake
();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static
void
fence
()
{}
static
void
initialize
(
unsigned
threads_count
=
1
,
unsigned
use_numa_count
=
0
,
unsigned
use_cores_per_numa
=
0
,
bool
allow_asynchronous_threadpool
=
false
)
{
(
void
)
threads_count
;
(
void
)
use_numa_count
;
(
void
)
use_cores_per_numa
;
(
void
)
allow_asynchronous_threadpool
;
// Init the array of locks used for arbitrarily sized atomics
Impl
::
init_lock_array_host_space
();
#if (KOKKOS_ENABLE_PROFILING)
Kokkos
::
Profiling
::
initialize
();
#endif
}
static
int
is_initialized
()
{
return
1
;
}
/** \brief Return the maximum amount of concurrency. */
static
int
concurrency
()
{
return
1
;};
//! Free any resources being consumed by the device.
static
void
finalize
()
{
#if (KOKKOS_ENABLE_PROFILING)
Kokkos
::
Profiling
::
finalize
();
#endif
}
//! Print configuration information to the given output stream.
static
void
print_configuration
(
std
::
ostream
&
,
const
bool
/* detail */
=
false
)
{}
//--------------------------------------------------------------------------
inline
static
int
thread_pool_size
(
int
=
0
)
{
return
1
;
}
KOKKOS_INLINE_FUNCTION
static
int
thread_pool_rank
()
{
return
0
;
}
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
static
unsigned
hardware_thread_id
()
{
return
thread_pool_rank
();
}
inline
static
unsigned
max_hardware_threads
()
{
return
thread_pool_size
(
0
);
}
//--------------------------------------------------------------------------
static
void
*
scratch_memory_resize
(
unsigned
reduce_size
,
unsigned
shared_size
);
//--------------------------------------------------------------------------
};
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
namespace
Impl
{
template
<>
struct
VerifyExecutionCanAccessMemorySpace
<
Kokkos
::
Serial
::
memory_space
,
Kokkos
::
Serial
::
scratch_memory_space
>
{
enum
{
value
=
true
};
inline
static
void
verify
(
void
)
{
}
inline
static
void
verify
(
const
void
*
)
{
}
};
namespace
SerialImpl
{
struct
Sentinel
{
void
*
m_scratch
;
unsigned
m_reduce_end
;
unsigned
m_shared_end
;
Sentinel
();
~
Sentinel
();
static
Sentinel
&
singleton
();
};
inline
unsigned
align
(
unsigned
n
);
}
}
// namespace Impl
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace
Kokkos
{
namespace
Impl
{
class
SerialTeamMember
{
private
:
typedef
Kokkos
::
ScratchMemorySpace
<
Kokkos
::
Serial
>
scratch_memory_space
;
const
scratch_memory_space
m_space
;
const
int
m_league_rank
;
const
int
m_league_size
;
SerialTeamMember
&
operator
=
(
const
SerialTeamMember
&
);
public
:
KOKKOS_INLINE_FUNCTION
const
scratch_memory_space
&
team_shmem
()
const
{
return
m_space
;
}
KOKKOS_INLINE_FUNCTION
const
scratch_memory_space
&
team_scratch
(
int
)
const
{
return
m_space
;
}
KOKKOS_INLINE_FUNCTION
const
scratch_memory_space
&
thread_scratch
(
int
)
const
{
return
m_space
;
}
KOKKOS_INLINE_FUNCTION
int
league_rank
()
const
{
return
m_league_rank
;
}
KOKKOS_INLINE_FUNCTION
int
league_size
()
const
{
return
m_league_size
;
}
KOKKOS_INLINE_FUNCTION
int
team_rank
()
const
{
return
0
;
}
KOKKOS_INLINE_FUNCTION
int
team_size
()
const
{
return
1
;
}
KOKKOS_INLINE_FUNCTION
void
team_barrier
()
const
{}
template
<
class
ValueType
>
KOKKOS_INLINE_FUNCTION
void
team_broadcast
(
const
ValueType
&
,
const
int
&
)
const
{}
template
<
class
ValueType
,
class
JoinOp
>
KOKKOS_INLINE_FUNCTION
ValueType
team_reduce
(
const
ValueType
&
value
,
const
JoinOp
&
)
const
{
return
value
;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template
<
typename
Type
>
KOKKOS_INLINE_FUNCTION
Type
team_scan
(
const
Type
&
value
,
Type
*
const
global_accum
)
const
{
const
Type
tmp
=
global_accum
?
*
global_accum
:
Type
(
0
)
;
if
(
global_accum
)
{
*
global_accum
+=
value
;
}
return
tmp
;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template
<
typename
Type
>
KOKKOS_INLINE_FUNCTION
Type
team_scan
(
const
Type
&
)
const
{
return
Type
(
0
);
}
//----------------------------------------
// Execution space specific:
SerialTeamMember
(
int
arg_league_rank
,
int
arg_league_size
,
int
arg_shared_size
);
};
}
// namespace Impl
/*
* < Kokkos::Serial , WorkArgTag >
* < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
*
*/
namespace
Impl
{
template
<
class
...
Properties
>
class
TeamPolicyInternal
<
Kokkos
::
Serial
,
Properties
...
>:
public
PolicyTraits
<
Properties
...
>
{
private
:
size_t
m_team_scratch_size
[
2
]
;
size_t
m_thread_scratch_size
[
2
]
;
int
m_league_size
;
int
m_chunk_size
;
public
:
//! Tag this class as a kokkos execution policy
typedef
TeamPolicyInternal
execution_policy
;
typedef
PolicyTraits
<
Properties
...
>
traits
;
//! Execution space of this execution policy:
typedef
Kokkos
::
Serial
execution_space
;
TeamPolicyInternal
&
operator
=
(
const
TeamPolicyInternal
&
p
)
{
m_league_size
=
p
.
m_league_size
;
m_team_scratch_size
[
0
]
=
p
.
m_team_scratch_size
[
0
];
m_thread_scratch_size
[
0
]
=
p
.
m_thread_scratch_size
[
0
];
m_team_scratch_size
[
1
]
=
p
.
m_team_scratch_size
[
1
];
m_thread_scratch_size
[
1
]
=
p
.
m_thread_scratch_size
[
1
];
m_chunk_size
=
p
.
m_chunk_size
;
return
*
this
;
}
//----------------------------------------
template
<
class
FunctorType
>
static
int
team_size_max
(
const
FunctorType
&
)
{
return
1
;
}
template
<
class
FunctorType
>
static
int
team_size_recommended
(
const
FunctorType
&
)
{
return
1
;
}
template
<
class
FunctorType
>
static
int
team_size_recommended
(
const
FunctorType
&
,
const
int
&
)
{
return
1
;
}
//----------------------------------------
inline
int
team_size
()
const
{
return
1
;
}
inline
int
league_size
()
const
{
return
m_league_size
;
}
inline
size_t
scratch_size
(
const
int
&
level
,
int
=
0
)
const
{
return
m_team_scratch_size
[
level
]
+
m_thread_scratch_size
[
level
];
}
/** \brief Specify league size, request team size */
TeamPolicyInternal
(
execution_space
&
,
int
league_size_request
,
int
/* team_size_request */
,
int
/* vector_length_request */
=
1
)
:
m_team_scratch_size
{
0
,
0
}
,
m_thread_scratch_size
{
0
,
0
}
,
m_league_size
(
league_size_request
)
,
m_chunk_size
(
32
)
{}
TeamPolicyInternal
(
execution_space
&
,
int
league_size_request
,
const
Kokkos
::
AUTO_t
&
/* team_size_request */
,
int
/* vector_length_request */
=
1
)
:
m_team_scratch_size
{
0
,
0
}
,
m_thread_scratch_size
{
0
,
0
}
,
m_league_size
(
league_size_request
)
,
m_chunk_size
(
32
)
{}
TeamPolicyInternal
(
int
league_size_request
,
int
/* team_size_request */
,
int
/* vector_length_request */
=
1
)
:
m_team_scratch_size
{
0
,
0
}
,
m_thread_scratch_size
{
0
,
0
}
,
m_league_size
(
league_size_request
)
,
m_chunk_size
(
32
)
{}
TeamPolicyInternal
(
int
league_size_request
,
const
Kokkos
::
AUTO_t
&
/* team_size_request */
,
int
/* vector_length_request */
=
1
)
:
m_team_scratch_size
{
0
,
0
}
,
m_thread_scratch_size
{
0
,
0
}
,
m_league_size
(
league_size_request
)
,
m_chunk_size
(
32
)
{}
inline
int
chunk_size
()
const
{
return
m_chunk_size
;
}
/** \brief set chunk_size to a discrete value*/
inline
TeamPolicyInternal
set_chunk_size
(
typename
traits
::
index_type
chunk_size_
)
const
{
TeamPolicyInternal
p
=
*
this
;
p
.
m_chunk_size
=
chunk_size_
;
return
p
;
}
/** \brief set per team scratch size for a specific level of the scratch hierarchy */
inline
TeamPolicyInternal
set_scratch_size
(
const
int
&
level
,
const
PerTeamValue
&
per_team
)
const
{
TeamPolicyInternal
p
=
*
this
;
p
.
m_team_scratch_size
[
level
]
=
per_team
.
value
;
return
p
;
};
/** \brief set per thread scratch size for a specific level of the scratch hierarchy */
inline
TeamPolicyInternal
set_scratch_size
(
const
int
&
level
,
const
PerThreadValue
&
per_thread
)
const
{
TeamPolicyInternal
p
=
*
this
;
p
.
m_thread_scratch_size
[
level
]
=
per_thread
.
value
;
return
p
;
};
/** \brief set per thread and per team scratch size for a specific level of the scratch hierarchy */
inline
TeamPolicyInternal
set_scratch_size
(
const
int
&
level
,
const
PerTeamValue
&
per_team
,
const
PerThreadValue
&
per_thread
)
const
{
TeamPolicyInternal
p
=
*
this
;
p
.
m_team_scratch_size
[
level
]
=
per_team
.
value
;
p
.
m_thread_scratch_size
[
level
]
=
per_thread
.
value
;
return
p
;
};
typedef
Impl
::
SerialTeamMember
member_type
;
};
}
/* namespace Impl */
}
/* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Parallel patterns for Kokkos::Serial with RangePolicy */
namespace
Kokkos
{
namespace
Impl
{
template
<
class
FunctorType
,
class
...
Traits
>
class
ParallelFor
<
FunctorType
,
Kokkos
::
RangePolicy
<
Traits
...
>
,
Kokkos
::
Serial
>
{
private
:
typedef
Kokkos
::
RangePolicy
<
Traits
...
>
Policy
;
const
FunctorType
m_functor
;
const
Policy
m_policy
;
template
<
class
TagType
>
typename
std
::
enable_if
<
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
()
const
{
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
i
);
}
}
template
<
class
TagType
>
typename
std
::
enable_if
<
!
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
()
const
{
const
TagType
t
{}
;
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
t
,
i
);
}
}
public
:
inline
void
execute
()
const
{
this
->
template
exec
<
typename
Policy
::
work_tag
>
();
}
inline
ParallelFor
(
const
FunctorType
&
arg_functor
,
const
Policy
&
arg_policy
)
:
m_functor
(
arg_functor
)
,
m_policy
(
arg_policy
)
{}
};
/*--------------------------------------------------------------------------*/
template
<
class
FunctorType
,
class
ReducerType
,
class
...
Traits
>
class
ParallelReduce
<
FunctorType
,
Kokkos
::
RangePolicy
<
Traits
...
>
,
ReducerType
,
Kokkos
::
Serial
>
{
private
:
typedef
Kokkos
::
RangePolicy
<
Traits
...
>
Policy
;
typedef
typename
Policy
::
work_tag
WorkTag
;
typedef
Kokkos
::
Impl
::
if_c
<
std
::
is_same
<
InvalidType
,
ReducerType
>::
value
,
FunctorType
,
ReducerType
>
ReducerConditional
;
typedef
typename
ReducerConditional
::
type
ReducerTypeFwd
;
typedef
Kokkos
::
Impl
::
FunctorValueTraits
<
ReducerTypeFwd
,
WorkTag
>
ValueTraits
;
typedef
Kokkos
::
Impl
::
FunctorValueInit
<
ReducerTypeFwd
,
WorkTag
>
ValueInit
;
typedef
typename
ValueTraits
::
pointer_type
pointer_type
;
typedef
typename
ValueTraits
::
reference_type
reference_type
;
const
FunctorType
m_functor
;
const
Policy
m_policy
;
const
ReducerType
m_reducer
;
const
pointer_type
m_result_ptr
;
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
reference_type
update
=
ValueInit
::
init
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
i
,
update
);
}
Kokkos
::
Impl
::
FunctorFinal
<
ReducerTypeFwd
,
TagType
>::
final
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
}
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
!
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
const
TagType
t
{}
;
reference_type
update
=
ValueInit
::
init
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
t
,
i
,
update
);
}
Kokkos
::
Impl
::
FunctorFinal
<
ReducerTypeFwd
,
TagType
>::
final
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
}
public
:
inline
void
execute
()
const
{
pointer_type
ptr
=
(
pointer_type
)
Kokkos
::
Serial
::
scratch_memory_resize
(
ValueTraits
::
value_size
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
)
,
0
);
this
->
template
exec
<
WorkTag
>
(
m_result_ptr
?
m_result_ptr
:
ptr
);
}
template
<
class
HostViewType
>
ParallelReduce
(
const
FunctorType
&
arg_functor
,
const
Policy
&
arg_policy
,
const
HostViewType
&
arg_result_view
,
typename
std
::
enable_if
<
Kokkos
::
is_view
<
HostViewType
>::
value
&&
!
Kokkos
::
is_reducer_type
<
ReducerType
>::
value
,
void
*>::
type
=
NULL
)
:
m_functor
(
arg_functor
)
,
m_policy
(
arg_policy
)
,
m_reducer
(
InvalidType
()
)
,
m_result_ptr
(
arg_result_view
.
ptr_on_device
()
)
{
static_assert
(
Kokkos
::
is_view
<
HostViewType
>::
value
,
"Kokkos::Serial reduce result must be a View"
);
static_assert
(
std
::
is_same
<
typename
HostViewType
::
memory_space
,
HostSpace
>::
value
,
"Kokkos::Serial reduce result must be a View in HostSpace"
);
}
inline
ParallelReduce
(
const
FunctorType
&
arg_functor
,
Policy
arg_policy
,
const
ReducerType
&
reducer
)
:
m_functor
(
arg_functor
)
,
m_policy
(
arg_policy
)
,
m_reducer
(
reducer
)
,
m_result_ptr
(
reducer
.
result_view
().
data
()
)
{
/*static_assert( std::is_same< typename ViewType::memory_space
, Kokkos::HostSpace >::value
, "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/
}
};
/*--------------------------------------------------------------------------*/
template
<
class
FunctorType
,
class
...
Traits
>
class
ParallelScan
<
FunctorType
,
Kokkos
::
RangePolicy
<
Traits
...
>
,
Kokkos
::
Serial
>
{
private
:
typedef
Kokkos
::
RangePolicy
<
Traits
...
>
Policy
;
typedef
typename
Policy
::
work_tag
WorkTag
;
typedef
Kokkos
::
Impl
::
FunctorValueTraits
<
FunctorType
,
WorkTag
>
ValueTraits
;
typedef
Kokkos
::
Impl
::
FunctorValueInit
<
FunctorType
,
WorkTag
>
ValueInit
;
typedef
typename
ValueTraits
::
pointer_type
pointer_type
;
typedef
typename
ValueTraits
::
reference_type
reference_type
;
const
FunctorType
m_functor
;
const
Policy
m_policy
;
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
reference_type
update
=
ValueInit
::
init
(
m_functor
,
ptr
);
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
i
,
update
,
true
);
}
}
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
!
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
const
TagType
t
{}
;
reference_type
update
=
ValueInit
::
init
(
m_functor
,
ptr
);
const
typename
Policy
::
member_type
e
=
m_policy
.
end
();
for
(
typename
Policy
::
member_type
i
=
m_policy
.
begin
()
;
i
<
e
;
++
i
)
{
m_functor
(
t
,
i
,
update
,
true
);
}
}
public
:
inline
void
execute
()
const
{
pointer_type
ptr
=
(
pointer_type
)
Kokkos
::
Serial
::
scratch_memory_resize
(
ValueTraits
::
value_size
(
m_functor
)
,
0
);
this
->
template
exec
<
WorkTag
>
(
ptr
);
}
inline
ParallelScan
(
const
FunctorType
&
arg_functor
,
const
Policy
&
arg_policy
)
:
m_functor
(
arg_functor
)
,
m_policy
(
arg_policy
)
{}
};
}
// namespace Impl
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Parallel patterns for Kokkos::Serial with TeamPolicy */
namespace
Kokkos
{
namespace
Impl
{
template
<
class
FunctorType
,
class
...
Properties
>
class
ParallelFor
<
FunctorType
,
Kokkos
::
TeamPolicy
<
Properties
...
>
,
Kokkos
::
Serial
>
{
private
:
typedef
TeamPolicyInternal
<
Kokkos
::
Serial
,
Properties
...
>
Policy
;
typedef
typename
Policy
::
member_type
Member
;
const
FunctorType
m_functor
;
const
int
m_league
;
const
int
m_shared
;
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
()
const
{
for
(
int
ileague
=
0
;
ileague
<
m_league
;
++
ileague
)
{
m_functor
(
Member
(
ileague
,
m_league
,
m_shared
)
);
}
}
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
!
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
()
const
{
const
TagType
t
{}
;
for
(
int
ileague
=
0
;
ileague
<
m_league
;
++
ileague
)
{
m_functor
(
t
,
Member
(
ileague
,
m_league
,
m_shared
)
);
}
}
public
:
inline
void
execute
()
const
{
Kokkos
::
Serial
::
scratch_memory_resize
(
0
,
m_shared
);
this
->
template
exec
<
typename
Policy
::
work_tag
>
();
}
ParallelFor
(
const
FunctorType
&
arg_functor
,
const
Policy
&
arg_policy
)
:
m_functor
(
arg_functor
)
,
m_league
(
arg_policy
.
league_size
()
)
,
m_shared
(
arg_policy
.
scratch_size
(
0
)
+
arg_policy
.
scratch_size
(
1
)
+
FunctorTeamShmemSize
<
FunctorType
>::
value
(
arg_functor
,
1
)
)
{
}
};
/*--------------------------------------------------------------------------*/
template
<
class
FunctorType
,
class
ReducerType
,
class
...
Properties
>
class
ParallelReduce
<
FunctorType
,
Kokkos
::
TeamPolicy
<
Properties
...
>
,
ReducerType
,
Kokkos
::
Serial
>
{
private
:
typedef
TeamPolicyInternal
<
Kokkos
::
Serial
,
Properties
...
>
Policy
;
typedef
typename
Policy
::
member_type
Member
;
typedef
typename
Policy
::
work_tag
WorkTag
;
typedef
Kokkos
::
Impl
::
if_c
<
std
::
is_same
<
InvalidType
,
ReducerType
>::
value
,
FunctorType
,
ReducerType
>
ReducerConditional
;
typedef
typename
ReducerConditional
::
type
ReducerTypeFwd
;
typedef
Kokkos
::
Impl
::
FunctorValueTraits
<
ReducerTypeFwd
,
WorkTag
>
ValueTraits
;
typedef
Kokkos
::
Impl
::
FunctorValueInit
<
ReducerTypeFwd
,
WorkTag
>
ValueInit
;
typedef
typename
ValueTraits
::
pointer_type
pointer_type
;
typedef
typename
ValueTraits
::
reference_type
reference_type
;
const
FunctorType
m_functor
;
const
int
m_league
;
const
ReducerType
m_reducer
;
pointer_type
m_result_ptr
;
const
int
m_shared
;
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
reference_type
update
=
ValueInit
::
init
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
for
(
int
ileague
=
0
;
ileague
<
m_league
;
++
ileague
)
{
m_functor
(
Member
(
ileague
,
m_league
,
m_shared
)
,
update
);
}
Kokkos
::
Impl
::
FunctorFinal
<
ReducerTypeFwd
,
TagType
>::
final
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
}
template
<
class
TagType
>
inline
typename
std
::
enable_if
<
!
std
::
is_same
<
TagType
,
void
>::
value
>::
type
exec
(
pointer_type
ptr
)
const
{
const
TagType
t
{}
;
reference_type
update
=
ValueInit
::
init
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
for
(
int
ileague
=
0
;
ileague
<
m_league
;
++
ileague
)
{
m_functor
(
t
,
Member
(
ileague
,
m_league
,
m_shared
)
,
update
);
}
Kokkos
::
Impl
::
FunctorFinal
<
ReducerTypeFwd
,
TagType
>::
final
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
,
ptr
);
}
public
:
inline
void
execute
()
const
{
pointer_type
ptr
=
(
pointer_type
)
Kokkos
::
Serial
::
scratch_memory_resize
(
ValueTraits
::
value_size
(
ReducerConditional
::
select
(
m_functor
,
m_reducer
)
)
,
m_shared
);
this
->
template
exec
<
WorkTag
>
(
m_result_ptr
?
m_result_ptr
:
ptr
);
}
template
<
class
ViewType
>
ParallelReduce
(
const
FunctorType
&
arg_functor
,
const
Policy
&
arg_policy
,
const
ViewType
&
arg_result
,
typename
std
::
enable_if
<
Kokkos
::
is_view
<
ViewType
>::
value
&&
!
Kokkos
::
is_reducer_type
<
ReducerType
>::
value
,
void
*>::
type
=
NULL
)
:
m_functor
(
arg_functor
)
,
m_league
(
arg_policy
.
league_size
()
)
,
m_reducer
(
InvalidType
()
)
,
m_result_ptr
(
arg_result
.
ptr_on_device
()
)
,
m_shared
(
arg_policy
.
scratch_size
(
0
)
+
arg_policy
.
scratch_size
(
1
)
+
FunctorTeamShmemSize
<
FunctorType
>::
value
(
m_functor
,
1
)
)
{
static_assert
(
Kokkos
::
is_view
<
ViewType
>::
value
,
"Reduction result on Kokkos::Serial must be a Kokkos::View"
);
static_assert
(
std
::
is_same
<
typename
ViewType
::
memory_space
,
Kokkos
::
HostSpace
>::
value
,
"Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace"
);
}
inline
ParallelReduce
(
const
FunctorType
&
arg_functor
,
Policy
arg_policy
,
const
ReducerType
&
reducer
)
:
m_functor
(
arg_functor
)
,
m_league
(
arg_policy
.
league_size
()
)
,
m_reducer
(
reducer
)
,
m_result_ptr
(
reducer
.
result_view
().
data
()
)
,
m_shared
(
arg_policy
.
scratch_size
(
0
)
+
arg_policy
.
scratch_size
(
1
)
+
FunctorTeamShmemSize
<
FunctorType
>::
value
(
arg_functor
,
arg_policy
.
team_size
()
)
)
{
/*static_assert( std::is_same< typename ViewType::memory_space
, Kokkos::HostSpace >::value
, "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/
}
};
}
// namespace Impl
}
// namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Nested parallel patterns for Kokkos::Serial with TeamPolicy */
namespace
Kokkos
{
namespace
Impl
{
template
<
typename
iType
>
struct
TeamThreadRangeBoundariesStruct
<
iType
,
SerialTeamMember
>
{
typedef
iType
index_type
;
const
iType
begin
;
const
iType
end
;
enum
{
increment
=
1
};
const
SerialTeamMember
&
thread
;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct
(
const
SerialTeamMember
&
arg_thread
,
const
iType
&
arg_count
)
:
begin
(
0
)
,
end
(
arg_count
)
,
thread
(
arg_thread
)
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct
(
const
SerialTeamMember
&
arg_thread
,
const
iType
&
arg_begin
,
const
iType
&
arg_end
)
:
begin
(
arg_begin
)
,
end
(
arg_end
)
,
thread
(
arg_thread
)
{}
};
template
<
typename
iType
>
struct
ThreadVectorRangeBoundariesStruct
<
iType
,
SerialTeamMember
>
{
typedef
iType
index_type
;
enum
{
start
=
0
};
const
iType
end
;
enum
{
increment
=
1
};
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct
(
const
SerialTeamMember
&
thread
,
const
iType
&
count
)
:
end
(
count
)
{}
};
}
// namespace Impl
template
<
typename
iType
>
KOKKOS_INLINE_FUNCTION
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
TeamThreadRange
(
const
Impl
::
SerialTeamMember
&
thread
,
const
iType
&
count
)
{
return
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
(
thread
,
count
);
}
template
<
typename
iType
>
KOKKOS_INLINE_FUNCTION
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
TeamThreadRange
(
const
Impl
::
SerialTeamMember
&
thread
,
const
iType
&
begin
,
const
iType
&
end
)
{
return
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
(
thread
,
begin
,
end
);
}
template
<
typename
iType
>
KOKKOS_INLINE_FUNCTION
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
ThreadVectorRange
(
const
Impl
::
SerialTeamMember
&
thread
,
const
iType
&
count
)
{
return
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>
(
thread
,
count
);
}
KOKKOS_INLINE_FUNCTION
Impl
::
ThreadSingleStruct
<
Impl
::
SerialTeamMember
>
PerTeam
(
const
Impl
::
SerialTeamMember
&
thread
)
{
return
Impl
::
ThreadSingleStruct
<
Impl
::
SerialTeamMember
>
(
thread
);
}
KOKKOS_INLINE_FUNCTION
Impl
::
VectorSingleStruct
<
Impl
::
SerialTeamMember
>
PerThread
(
const
Impl
::
SerialTeamMember
&
thread
)
{
return
Impl
::
VectorSingleStruct
<
Impl
::
SerialTeamMember
>
(
thread
);
}
}
// namespace Kokkos
namespace
Kokkos
{
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
>
KOKKOS_INLINE_FUNCTION
void
parallel_for
(
const
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
)
{
for
(
iType
i
=
loop_boundaries
.
begin
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
lambda
(
i
);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
,
typename
ValueType
>
KOKKOS_INLINE_FUNCTION
void
parallel_reduce
(
const
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
,
ValueType
&
result
)
{
result
=
ValueType
();
for
(
iType
i
=
loop_boundaries
.
begin
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
{
ValueType
tmp
=
ValueType
();
lambda
(
i
,
tmp
);
result
+=
tmp
;
}
result
=
loop_boundaries
.
thread
.
team_reduce
(
result
,
Impl
::
JoinAdd
<
ValueType
>
());
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
,
typename
ValueType
,
class
JoinType
>
KOKKOS_INLINE_FUNCTION
void
parallel_reduce
(
const
Impl
::
TeamThreadRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
,
const
JoinType
&
join
,
ValueType
&
init_result
)
{
ValueType
result
=
init_result
;
for
(
iType
i
=
loop_boundaries
.
begin
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
{
ValueType
tmp
=
ValueType
();
lambda
(
i
,
tmp
);
join
(
result
,
tmp
);
}
init_result
=
loop_boundaries
.
thread
.
team_reduce
(
result
,
Impl
::
JoinLambdaAdapter
<
ValueType
,
JoinType
>
(
join
));
}
}
//namespace Kokkos
namespace
Kokkos
{
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
>
KOKKOS_INLINE_FUNCTION
void
parallel_for
(
const
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
)
{
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for
(
iType
i
=
loop_boundaries
.
start
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
lambda
(
i
);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
,
typename
ValueType
>
KOKKOS_INLINE_FUNCTION
void
parallel_reduce
(
const
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
,
ValueType
&
result
)
{
result
=
ValueType
();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for
(
iType
i
=
loop_boundaries
.
start
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
{
ValueType
tmp
=
ValueType
();
lambda
(
i
,
tmp
);
result
+=
tmp
;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template
<
typename
iType
,
class
Lambda
,
typename
ValueType
,
class
JoinType
>
KOKKOS_INLINE_FUNCTION
void
parallel_reduce
(
const
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
Lambda
&
lambda
,
const
JoinType
&
join
,
ValueType
&
init_result
)
{
ValueType
result
=
init_result
;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for
(
iType
i
=
loop_boundaries
.
start
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
{
ValueType
tmp
=
ValueType
();
lambda
(
i
,
tmp
);
join
(
result
,
tmp
);
}
init_result
=
result
;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template
<
typename
iType
,
class
FunctorType
>
KOKKOS_INLINE_FUNCTION
void
parallel_scan
(
const
Impl
::
ThreadVectorRangeBoundariesStruct
<
iType
,
Impl
::
SerialTeamMember
>&
loop_boundaries
,
const
FunctorType
&
lambda
)
{
typedef
Kokkos
::
Impl
::
FunctorValueTraits
<
FunctorType
,
void
>
ValueTraits
;
typedef
typename
ValueTraits
::
value_type
value_type
;
value_type
scan_val
=
value_type
();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for
(
iType
i
=
loop_boundaries
.
start
;
i
<
loop_boundaries
.
end
;
i
+=
loop_boundaries
.
increment
)
{
lambda
(
i
,
scan_val
,
true
);
}
}
}
// namespace Kokkos
namespace
Kokkos
{
template
<
class
FunctorType
>
KOKKOS_INLINE_FUNCTION
void
single
(
const
Impl
::
VectorSingleStruct
<
Impl
::
SerialTeamMember
>&
,
const
FunctorType
&
lambda
)
{
lambda
();
}
template
<
class
FunctorType
>
KOKKOS_INLINE_FUNCTION
void
single
(
const
Impl
::
ThreadSingleStruct
<
Impl
::
SerialTeamMember
>&
,
const
FunctorType
&
lambda
)
{
lambda
();
}
template
<
class
FunctorType
,
class
ValueType
>
KOKKOS_INLINE_FUNCTION
void
single
(
const
Impl
::
VectorSingleStruct
<
Impl
::
SerialTeamMember
>&
,
const
FunctorType
&
lambda
,
ValueType
&
val
)
{
lambda
(
val
);
}
template
<
class
FunctorType
,
class
ValueType
>
KOKKOS_INLINE_FUNCTION
void
single
(
const
Impl
::
ThreadSingleStruct
<
Impl
::
SerialTeamMember
>&
,
const
FunctorType
&
lambda
,
ValueType
&
val
)
{
lambda
(
val
);
}
}
//----------------------------------------------------------------------------
#include <impl/Kokkos_Serial_Task.hpp>
#endif
// defined( KOKKOS_HAVE_SERIAL )
#endif
/* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
Event Timeline
Log In to Comment