* The cub::BlockDiscontinuity class provides [<em>collective</em>](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block.
*/
#pragma once
#include "../util_type.cuh"
#include "../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/**
* \brief The BlockDiscontinuity class provides [<em>collective</em>](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. ![](discont_logo.png)
* \ingroup BlockModule
*
* \par Overview
* A set of "head flags" (or "tail flags") is often used to indicate corresponding items
* that differ from their predecessors (or successors). For example, head flags are convenient
* for demarcating disjoint data segments as part of a segmented scan or reduction.
*
* \tparam T The data type to be flagged.
* \tparam BLOCK_THREADS The thread block size in threads.
*
* \par A Simple Example
* \blockcollective{BlockDiscontinuity}
* \par
* The code snippet below illustrates the head flagging of 512 integer items that
* are partitioned in a [<em>blocked arrangement</em>](index.html#sec5sec4) across 128 threads
* where each thread owns 4 consecutive items.
* \par
* \code
* #include <cub/cub.cuh>
*
* __global__ void ExampleKernel(...)
* {
* // Specialize BlockDiscontinuity for 128 threads on type int
* \brief Collective constructor for 1D thread blocks using a private static allocation of shared memory as temporary storage. Threads are identified using <tt>threadIdx.x</tt>.
*/
__device__ __forceinline__ BlockDiscontinuity()
:
temp_storage(PrivateStorage()),
linear_tid(threadIdx.x)
{}
/**
* \brief Collective constructor for 1D thread blocks using the specified memory allocation as temporary storage. Threads are identified using <tt>threadIdx.x</tt>.
*/
__device__ __forceinline__ BlockDiscontinuity(
TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage
:
temp_storage(temp_storage.Alias()),
linear_tid(threadIdx.x)
{}
/**
* \brief Collective constructor using a private static allocation of shared memory as temporary storage. Each thread is identified using the supplied linear thread identifier
*/
__device__ __forceinline__ BlockDiscontinuity(
int linear_tid) ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., <tt>(threadIdx.y * blockDim.x) + linear_tid</tt> for 2D thread blocks)
:
temp_storage(PrivateStorage()),
linear_tid(linear_tid)
{}
/**
* \brief Collective constructor using the specified memory allocation as temporary storage. Each thread is identified using the supplied linear thread identifier.
*/
__device__ __forceinline__ BlockDiscontinuity(
TempStorage &temp_storage, ///< [in] Reference to memory allocation having layout type TempStorage
int linear_tid) ///< [in] <b>[optional]</b> A suitable 1D thread-identifier for the calling thread (e.g., <tt>(threadIdx.y * blockDim.x) + linear_tid</tt> for 2D thread blocks)
* \brief Sets head flags indicating discontinuities between items partitioned across the thread block, for which the first item has no reference and is always flagged.
*
* The flag <tt>head_flags<sub><em>i</em></sub></tt> is set for item
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam FlagT <b>[inferred]</b> The flag type (must be an integer type)
* \tparam FlagOp <b>[inferred]</b> Binary predicate functor type having member <tt>T operator()(const T &a, const T &b)</tt> or member <tt>T operator()(const T &a, const T &b, unsigned int b_index)</tt>, and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data.
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam FlagT <b>[inferred]</b> The flag type (must be an integer type)
* \tparam FlagOp <b>[inferred]</b> Binary predicate functor type having member <tt>T operator()(const T &a, const T &b)</tt> or member <tt>T operator()(const T &a, const T &b, unsigned int b_index)</tt>, and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data.
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
FlagOp flag_op, ///< [in] Binary boolean flag predicate
T tile_predecessor_item) ///< [in] <b>[<em>thread</em><sub>0</sub> only]</b> Item with which to compare the first tile item (<tt>input<sub>0</sub></tt> from <em>thread</em><sub>0</sub>).
* \brief Sets tail flags indicating discontinuities between items partitioned across the thread block, for which the last item has no reference and is always flagged.
*
* The flag <tt>tail_flags<sub><em>i</em></sub></tt> is set for item
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam FlagT <b>[inferred]</b> The flag type (must be an integer type)
* \tparam FlagOp <b>[inferred]</b> Binary predicate functor type having member <tt>T operator()(const T &a, const T &b)</tt> or member <tt>T operator()(const T &a, const T &b, unsigned int b_index)</tt>, and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data.
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam FlagT <b>[inferred]</b> The flag type (must be an integer type)
* \tparam FlagOp <b>[inferred]</b> Binary predicate functor type having member <tt>T operator()(const T &a, const T &b)</tt> or member <tt>T operator()(const T &a, const T &b, unsigned int b_index)</tt>, and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data.
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
FlagOp flag_op, ///< [in] Binary boolean flag predicate
T tile_successor_item) ///< [in] <b>[<em>thread</em><sub><tt>BLOCK_THREADS</tt>-1</sub> only]</b> Item with which to compare the last tile item (<tt>input</tt><sub><em>ITEMS_PER_THREAD</em>-1</sub> from <em>thread</em><sub><em>BLOCK_THREADS</em>-1</sub>).
{
// Share first item
temp_storage[linear_tid] = input[0];
__syncthreads();
// Set flag for last item
int successor_item = (linear_tid == BLOCK_THREADS - 1) ?