cub::BlockRadixRankMatchEarlyCounts#
-
template<int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, WarpMatchAlgorithm MATCH_ALGORITHM = WARP_MATCH_ANY, int NUM_PARTS = 1>
struct BlockRadixRankMatchEarlyCounts# Radix-rank using matching which computes the counts of keys for each digit value early, at the expense of doing more work.
This may be useful e.g. for decoupled look-back, where it reduces the time other thread blocks need to wait for digit counts to become available.
Public Types
-
enum [anonymous]#
Values:
-
enumerator BLOCK_THREADS = BLOCK_DIM_X#
-
enumerator RADIX_DIGITS = 1 << RADIX_BITS#
-
enumerator BINS_PER_THREAD = (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS#
-
enumerator BINS_TRACKED_PER_THREAD = BINS_PER_THREAD#
-
enumerator FULL_BINS = BINS_PER_THREAD * BLOCK_THREADS == RADIX_DIGITS#
-
enumerator WARP_THREADS = detail::warp_threads#
-
enumerator PARTIAL_WARP_THREADS = BLOCK_THREADS % WARP_THREADS#
-
enumerator BLOCK_WARPS = BLOCK_THREADS / WARP_THREADS#
-
enumerator PARTIAL_WARP_ID = BLOCK_WARPS - 1#
-
enumerator WARP_MASK = ~0#
-
enumerator NUM_MATCH_MASKS = MATCH_ALGORITHM == WARP_MATCH_ATOMIC_OR ? BLOCK_WARPS : 0#
-
enumerator BLOCK_THREADS = BLOCK_DIM_X#
-
using BlockScan = cub::BlockScan<int, BLOCK_THREADS, INNER_SCAN_ALGORITHM>#
Public Functions
-
inline BlockRadixRankMatchEarlyCounts(TempStorage &temp_storage)#
-
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
inline void RankKeys( - UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- DigitExtractorT digit_extractor,
- int (&exclusive_digit_prefix)[BINS_PER_THREAD],
- CountsCallback callback,
Rank keys.
For the lower
RADIX_DIGITS
threads, digit counts for each digit are provided for the corresponding thread.
-
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys( - UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- DigitExtractorT digit_extractor,
- int (&exclusive_digit_prefix)[BINS_PER_THREAD],
-
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT>
inline void RankKeys( - UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- DigitExtractorT digit_extractor,
Public Members
-
TempStorage &temp_storage#
-
template<typename UnsignedBits, int KEYS_PER_THREAD, typename DigitExtractorT, typename CountsCallback>
struct BlockRadixRankMatchInternal# Public Functions
-
inline ::cuda::std::uint32_t Digit(UnsignedBits key)#
-
inline int ThreadBin(int u)#
- inline void ComputeHistogramsWarp(
- UnsignedBits (&keys)[KEYS_PER_THREAD],
-
inline void ComputeOffsetsWarpUpsweep(int (&bins)[BINS_PER_THREAD])#
- inline void ComputeOffsetsWarpDownsweep(
- int (&offsets)[BINS_PER_THREAD],
- inline void ComputeRanksItem(
- UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- detail::constant_t<WARP_MATCH_ATOMIC_OR>,
- inline void ComputeRanksItem(
- UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- detail::constant_t<WARP_MATCH_ANY>,
- inline void RankKeys(
- UnsignedBits (&keys)[KEYS_PER_THREAD],
- int (&ranks)[KEYS_PER_THREAD],
- int (&exclusive_digit_prefix)[BINS_PER_THREAD],
- inline BlockRadixRankMatchInternal(
- TempStorage &temp_storage,
- DigitExtractorT digit_extractor,
- CountsCallback callback,
Public Members
-
TempStorage &s#
-
DigitExtractorT digit_extractor#
-
CountsCallback callback#
-
int warp#
-
int lane#
-
inline ::cuda::std::uint32_t Digit(UnsignedBits key)#
-
struct TempStorage#
Public Members
-
int warp_offsets[BLOCK_WARPS][RADIX_DIGITS]#
-
int warp_histograms[BLOCK_WARPS][RADIX_DIGITS][NUM_PARTS]#
-
union cub::BlockRadixRankMatchEarlyCounts::TempStorage::[anonymous] [anonymous]#
-
int match_masks[MATCH_MASKS_ALLOC_SIZE][RADIX_DIGITS]#
-
int warp_offsets[BLOCK_WARPS][RADIX_DIGITS]#
-
enum [anonymous]#