cub::BlockRadixRankMatchEarlyCounts

Defined in /home/runner/work/cccl/cccl/cub/cub/block/block_radix_rank.cuh

template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, WarpMatchAlgorithm MATCH_ALGORITHM = WARP_MATCH_ANY, int NUM_PARTS = 1>
struct BlockRadixRankMatchEarlyCounts

Radix-rank using matching which computes the counts of keys for each digit value early, at the expense of doing more work.

This may be useful e.g. for decoupled look-back, where it reduces the time other thread blocks need to wait for digit counts to become available.

Public Types

enum [anonymous]

Values:

enumerator BLOCK_THREADS
enumerator RADIX_DIGITS
enumerator BINS_PER_THREAD
enumerator BINS_TRACKED_PER_THREAD
enumerator FULL_BINS
enumerator WARP_THREADS
enumerator PARTIAL_WARP_THREADS
enumerator BLOCK_WARPS
enumerator PARTIAL_WARP_ID
enumerator WARP_MASK
enumerator NUM_MATCH_MASKS
using BlockScan = cub::BlockScan<int, BLOCK_THREADS, INNER_SCAN_ALGORITHM>

Public Functions

inline BlockRadixRankMatchEarlyCounts(TempStorage &temp_storage)
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
inline void RankKeys(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor, int (&exclusive_digit_prefix)[BINS_PER_THREAD], CountsCallback callback)

Rank keys.

For the lower RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread.

template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor, int (&exclusive_digit_prefix)[BINS_PER_THREAD])
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], DigitExtractorT digit_extractor)

Public Members

TempStorage &temp_storage
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
struct BlockRadixRankMatchInternal

Public Functions

inline ::cuda::std::uint32_t Digit(UnsignedBits key)
inline int ThreadBin(int u)
inline void ComputeHistogramsWarp(UnsignedBits (&keys)[KEYS_PER_THREAD])
inline void ComputeOffsetsWarpUpsweep(int (&bins)[BINS_PER_THREAD])
inline void ComputeOffsetsWarpDownsweep(int (&offsets)[BINS_PER_THREAD])
inline void ComputeRanksItem(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], Int2Type<WARP_MATCH_ATOMIC_OR>)
inline void ComputeRanksItem(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], Int2Type<WARP_MATCH_ANY>)
inline void RankKeys(UnsignedBits (&keys)[KEYS_PER_THREAD], int (&ranks)[KEYS_PER_THREAD], int (&exclusive_digit_prefix)[BINS_PER_THREAD])
inline BlockRadixRankMatchInternal(TempStorage &temp_storage, DigitExtractorT digit_extractor, CountsCallback callback)

Public Members

TempStorage &s
DigitExtractorT digit_extractor
CountsCallback callback
int warp
int lane
struct TempStorage

Public Members

int warp_offsets[BLOCK_WARPS][RADIX_DIGITS]
int warp_histograms[BLOCK_WARPS][RADIX_DIGITS][NUM_PARTS]
union cub::BlockRadixRankMatchEarlyCounts::TempStorage::[anonymous] [anonymous]
int match_masks[MATCH_MASKS_ALLOC_SIZE][RADIX_DIGITS]
BlockScan::TempStorage prefix_tmp