* cub::BlockHistogramTilesSort implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using local sorting
*/
#pragma once
#include <iterator>
#include "../../../block/block_radix_sort.cuh"
#include "../../../block/block_discontinuity.cuh"
#include "../../../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/**
* BlockHistogramTilesSort implements a stateful abstraction of CUDA thread blocks for histogramming multiple tiles as part of device-wide histogram using local sorting
int BINS, ///< Number of histogram bins per channel
int CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of active channels being histogrammed)
int ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed
typename InputIteratorRA, ///< The input iterator type (may be a simple pointer type). Must have a value type that can be cast as an integer in the range [0..BINS-1]
typename HistoCounter, ///< Integral type for counting sample occurrences per histogram bin