cub::DispatchReduceByKey#
-
template<typename KeysInputIteratorT, typename UniqueOutputIteratorT, typename ValuesInputIteratorT, typename AggregatesOutputIteratorT, typename NumRunsOutputIteratorT, typename EqualityOpT, typename ReductionOpT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::it_value_t<ValuesInputIteratorT>, cub::detail::it_value_t<ValuesInputIteratorT>>, typename PolicyHub = detail::reduce_by_key::policy_hub<ReductionOpT, AccumT, cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::it_value_t<KeysInputIteratorT>>>>
struct DispatchReduceByKey# Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey.
- Template Parameters:
KeysInputIteratorT – Random-access input iterator type for keys
UniqueOutputIteratorT – Random-access output iterator type for keys
ValuesInputIteratorT – Random-access input iterator type for values
AggregatesOutputIteratorT – Random-access output iterator type for values
NumRunsOutputIteratorT – Output iterator type for recording number of segments encountered
EqualityOpT – KeyT equality operator type
ReductionOpT – ValueT reduction operator type
OffsetT – Signed integer type for global offsets
PolicyHub – Implementation detail, do not specify directly, requirements on the content of this type are subject to breaking change.
Public Types
-
using ValueInputT = cub::detail::it_value_t<ValuesInputIteratorT>#
-
using streaming_context_t = NullType#
-
using ScanTileStateT = ReduceByKeyScanTileState<AccumT, OffsetT>#
Public Functions
- inline DispatchReduceByKey(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- KeysInputIteratorT d_keys_in,
- UniqueOutputIteratorT d_unique_out,
- ValuesInputIteratorT d_values_in,
- AggregatesOutputIteratorT d_aggregates_out,
- NumRunsOutputIteratorT d_num_runs_out,
- EqualityOpT equality_op,
- ReductionOpT reduction_op,
- OffsetT num_items,
- cudaStream_t stream,
-
template<typename ActivePolicyT, typename ScanInitKernelT, typename ReduceByKeyKernelT>
inline cudaError_t Invoke( - ScanInitKernelT init_kernel,
- ReduceByKeyKernelT reduce_by_key_kernel,
-
template<typename ActivePolicyT>
inline cudaError_t Invoke()#
Public Members
-
void *d_temp_storage#
-
size_t &temp_storage_bytes#
-
KeysInputIteratorT d_keys_in#
-
UniqueOutputIteratorT d_unique_out#
-
ValuesInputIteratorT d_values_in#
-
AggregatesOutputIteratorT d_aggregates_out#
-
NumRunsOutputIteratorT d_num_runs_out#
-
EqualityOpT equality_op#
-
ReductionOpT reduction_op#
-
cudaStream_t stream#
Public Static Functions
- static inline cudaError_t Dispatch(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- KeysInputIteratorT d_keys_in,
- UniqueOutputIteratorT d_unique_out,
- ValuesInputIteratorT d_values_in,
- AggregatesOutputIteratorT d_aggregates_out,
- NumRunsOutputIteratorT d_num_runs_out,
- EqualityOpT equality_op,
- ReductionOpT reduction_op,
- OffsetT num_items,
- cudaStream_t stream,
Internal dispatch routine.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_keys_in – [in] Pointer to the input sequence of keys
d_unique_out – [out] Pointer to the output sequence of unique keys (one key per run)
d_values_in – [in] Pointer to the input sequence of corresponding values
d_aggregates_out – [out] Pointer to the output sequence of value aggregates (one aggregate per run)
d_num_runs_out – [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out)
equality_op – [in] KeyT equality operator
reduction_op – [in] ValueT reduction operator
num_items – [in] Total number of items to select from
stream – [in] CUDA stream to launch kernels within. Default is stream0.
Public Static Attributes
-
static constexpr int INIT_KERNEL_THREADS = 128#