cub::DispatchReduceByKey#

template<typename KeysInputIteratorT, typename UniqueOutputIteratorT, typename ValuesInputIteratorT, typename AggregatesOutputIteratorT, typename NumRunsOutputIteratorT, typename EqualityOpT, typename ReductionOpT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::it_value_t<ValuesInputIteratorT>, cub::detail::it_value_t<ValuesInputIteratorT>>, typename PolicyHub = detail::reduce_by_key::policy_hub<ReductionOpT, AccumT, cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::it_value_t<KeysInputIteratorT>>>> struct DispatchReduceByKey#

Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey.

Template Parameters:

KeysInputIteratorT – Random-access input iterator type for keys
UniqueOutputIteratorT – Random-access output iterator type for keys
ValuesInputIteratorT – Random-access input iterator type for values
AggregatesOutputIteratorT – Random-access output iterator type for values
NumRunsOutputIteratorT – Output iterator type for recording number of segments encountered
EqualityOpT – KeyT equality operator type
ReductionOpT – ValueT reduction operator type
OffsetT – Signed integer type for global offsets
PolicyHub – Implementation detail, do not specify directly, requirements on the content of this type are subject to breaking change.

Public Types

using ValueInputT = cub::detail::it_value_t<ValuesInputIteratorT>#

using streaming_context_t = NullType#

using ScanTileStateT = ReduceByKeyScanTileState<AccumT, OffsetT>#

Public Functions

inline DispatchReduceByKey( void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream )#

template<typename ActivePolicyT, typename ScanInitKernelT, typename ReduceByKeyKernelT> inline cudaError_t Invoke( ScanInitKernelT init_kernel, ReduceByKeyKernelT reduce_by_key_kernel )#

template<typename ActivePolicyT> inline cudaError_t Invoke()#

Public Members

void *d_temp_storage#

size_t &temp_storage_bytes#

KeysInputIteratorT d_keys_in#

UniqueOutputIteratorT d_unique_out#

ValuesInputIteratorT d_values_in#

AggregatesOutputIteratorT d_aggregates_out#

NumRunsOutputIteratorT d_num_runs_out#

EqualityOpT equality_op#

ReductionOpT reduction_op#

OffsetT num_items#

cudaStream_t stream#

Public Static Functions

static inline cudaError_t Dispatch( void *d_temp_storage, size_t &temp_storage_bytes, KeysInputIteratorT d_keys_in, UniqueOutputIteratorT d_unique_out, ValuesInputIteratorT d_values_in, AggregatesOutputIteratorT d_aggregates_out, NumRunsOutputIteratorT d_num_runs_out, EqualityOpT equality_op, ReductionOpT reduction_op, OffsetT num_items, cudaStream_t stream )#

Internal dispatch routine.

Parameters:

d_temp_storage – [in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.
temp_storage_bytes – [inout] Reference to size in bytes of d_temp_storage allocation
d_keys_in – [in] Pointer to the input sequence of keys
d_unique_out – [out] Pointer to the output sequence of unique keys (one key per run)
d_values_in – [in] Pointer to the input sequence of corresponding values
d_aggregates_out – [out] Pointer to the output sequence of value aggregates (one aggregate per run)
d_num_runs_out – [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out)
equality_op – [in] KeyT equality operator
reduction_op – [in] ValueT reduction operator
num_items – [in] Total number of items to select from
stream – [in] CUDA stream to launch kernels within. Default is stream₀.

Public Static Attributes

static constexpr int INIT_KERNEL_THREADS = 128#