cub::BlockRadixRankMatchEarlyCounts#

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, WarpMatchAlgorithm MATCH_ALGORITHM = WARP_MATCH_ANY, int NUM_PARTS = 1>
struct BlockRadixRankMatchEarlyCounts#

Radix-rank using matching which computes the counts of keys for each digit value early, at the expense of doing more work.

This may be useful e.g. for decoupled look-back, where it reduces the time other thread blocks need to wait for digit counts to become available.

Public Types

enum [anonymous]#

Values:

enumerator BLOCK_THREADS = BLOCK_DIM_X#
enumerator RADIX_DIGITS = 1 << RADIX_BITS#
enumerator BINS_PER_THREAD = (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS#
enumerator BINS_TRACKED_PER_THREAD = BINS_PER_THREAD#
enumerator FULL_BINS = BINS_PER_THREAD * BLOCK_THREADS == RADIX_DIGITS#
enumerator WARP_THREADS = detail::warp_threads#
enumerator PARTIAL_WARP_THREADS = BLOCK_THREADS % WARP_THREADS#
enumerator BLOCK_WARPS = BLOCK_THREADS / WARP_THREADS#
enumerator PARTIAL_WARP_ID = BLOCK_WARPS - 1#
enumerator WARP_MASK = ~0#
enumerator NUM_MATCH_MASKS = MATCH_ALGORITHM == WARP_MATCH_ATOMIC_OR ? BLOCK_WARPS : 0#
using BlockScan = cub::BlockScan<int, BLOCK_THREADS, INNER_SCAN_ALGORITHM>#

Public Functions

inline BlockRadixRankMatchEarlyCounts(TempStorage &temp_storage)#
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
inline void RankKeys(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
DigitExtractorT digit_extractor,
int (&exclusive_digit_prefix)[BINS_PER_THREAD],
CountsCallback callback,
)#

Rank keys.

For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread.

template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
DigitExtractorT digit_extractor,
int (&exclusive_digit_prefix)[BINS_PER_THREAD],
)#
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
DigitExtractorT digit_extractor,
)#

Public Members

TempStorage &temp_storage#
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
struct BlockRadixRankMatchInternal#

Public Functions

inline ::cuda::std::uint32_t Digit(UnsignedBits key)#
inline int ThreadBin(int u)#
inline void ComputeHistogramsWarp(
UnsignedBits (&keys)[KEYS_PER_THREAD],
)#
inline void ComputeOffsetsWarpUpsweep(int (&bins)[BINS_PER_THREAD])#
inline void ComputeOffsetsWarpDownsweep(
int (&offsets)[BINS_PER_THREAD],
)#
inline void ComputeRanksItem(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
detail::constant_t<WARP_MATCH_ATOMIC_OR>,
)#
inline void ComputeRanksItem(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
detail::constant_t<WARP_MATCH_ANY>,
)#
inline void RankKeys(
UnsignedBits (&keys)[KEYS_PER_THREAD],
int (&ranks)[KEYS_PER_THREAD],
int (&exclusive_digit_prefix)[BINS_PER_THREAD],
)#
inline BlockRadixRankMatchInternal(
TempStorage &temp_storage,
DigitExtractorT digit_extractor,
CountsCallback callback,
)#

Public Members

TempStorage &s#
DigitExtractorT digit_extractor#
CountsCallback callback#
int warp#
int lane#
struct TempStorage#

Public Members

int warp_offsets[BLOCK_WARPS][RADIX_DIGITS]#
int warp_histograms[BLOCK_WARPS][RADIX_DIGITS][NUM_PARTS]#
union cub::BlockRadixRankMatchEarlyCounts::TempStorage::[anonymous] [anonymous]#
int match_masks[MATCH_MASKS_ALLOC_SIZE][RADIX_DIGITS]#
BlockScan::TempStorage prefix_tmp#