cub::DispatchScanByKey#
-
template<typename KeysInputIteratorT, typename ValuesInputIteratorT, typename ValuesOutputIteratorT, typename EqualityOp, typename ScanOpT, typename InitValueT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::it_value_t<ValuesInputIteratorT>, ::cuda::std::_If<::cuda::std::is_same_v<InitValueT, NullType>, cub::detail::it_value_t<ValuesInputIteratorT>, InitValueT>>, typename PolicyHub = detail::scan_by_key::policy_hub<KeysInputIteratorT, AccumT, cub::detail::it_value_t<ValuesInputIteratorT>, ScanOpT>>
struct DispatchScanByKey# Utility class for dispatching the appropriately-tuned kernels for DeviceScan.
- Template Parameters:
KeysInputIteratorT – Random-access input iterator type
ValuesInputIteratorT – Random-access input iterator type
ValuesOutputIteratorT – Random-access output iterator type
EqualityOp – Equality functor type
ScanOpT – Scan functor type
InitValueT – The init_value element for ScanOpT type (cub::NullType for inclusive scan)
OffsetT – Unsigned integer type for global offsets
Public Types
-
using KeyT = cub::detail::it_value_t<KeysInputIteratorT>#
-
using InputT = cub::detail::it_value_t<ValuesInputIteratorT>#
-
using ScanByKeyTileStateT = ReduceByKeyScanTileState<AccumT, int>#
Public Functions
- inline DispatchScanByKey(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- KeysInputIteratorT d_keys_in,
- ValuesInputIteratorT d_values_in,
- ValuesOutputIteratorT d_values_out,
- EqualityOp equality_op,
- ScanOpT scan_op,
- InitValueT init_value,
- OffsetT num_items,
- cudaStream_t stream,
- int ptx_version,
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_keys_in – [in] Iterator to the input sequence of key items
d_values_in – [in] Iterator to the input sequence of value items
d_values_out – [out] Iterator to the input sequence of value items
equality_op – [in] Binary equality functor
scan_op – [in] Binary scan functor
init_value – [in] Initial value to seed the exclusive scan
num_items – [in] Total number of input items (i.e., the length of
d_in
)stream – [in] CUDA stream to launch kernels within.
-
template<typename ActivePolicyT, typename InitKernel, typename ScanKernel>
inline cudaError_t Invoke( - InitKernel init_kernel,
- ScanKernel scan_kernel,
-
template<typename ActivePolicyT>
inline cudaError_t Invoke()#
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
KeysInputIteratorT d_keys_in#
Iterator to the input sequence of key items.
-
ValuesInputIteratorT d_values_in#
Iterator to the input sequence of value items.
-
ValuesOutputIteratorT d_values_out#
Iterator to the input sequence of value items.
-
EqualityOp equality_op#
Binary equality functor.
-
InitValueT init_value#
Initial value to seed the exclusive scan.
-
cudaStream_t stream#
CUDA stream to launch kernels within.
-
int ptx_version#
Public Static Functions
- static inline cudaError_t Dispatch(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- KeysInputIteratorT d_keys_in,
- ValuesInputIteratorT d_values_in,
- ValuesOutputIteratorT d_values_out,
- EqualityOp equality_op,
- ScanOpT scan_op,
- InitValueT init_value,
- OffsetT num_items,
- cudaStream_t stream,
Internal dispatch routine.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_keys_in – [in] Iterator to the input sequence of key items
d_values_in – [in] Iterator to the input sequence of value items
d_values_out – [out] Iterator to the input sequence of value items
equality_op – [in] Binary equality functor
scan_op – [in] Binary scan functor
init_value – [in] Initial value to seed the exclusive scan
num_items – [in] Total number of input items (i.e., the length of
d_in
)stream – [in] CUDA stream to launch kernels within.
Public Static Attributes
-
static constexpr int INIT_KERNEL_THREADS = 128#