cub::DispatchScanByKey#

template<typename KeysInputIteratorT, typename ValuesInputIteratorT, typename ValuesOutputIteratorT, typename EqualityOp, typename ScanOpT, typename InitValueT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::it_value_t<ValuesInputIteratorT>, ::cuda::std::_If<::cuda::std::is_same_v<InitValueT, NullType>, cub::detail::it_value_t<ValuesInputIteratorT>, InitValueT>>, typename PolicyHub = detail::scan_by_key::policy_hub<KeysInputIteratorT, AccumT, cub::detail::it_value_t<ValuesInputIteratorT>, ScanOpT>>
struct DispatchScanByKey#

Utility class for dispatching the appropriately-tuned kernels for DeviceScan.

Template Parameters:
  • KeysInputIteratorT – Random-access input iterator type

  • ValuesInputIteratorT – Random-access input iterator type

  • ValuesOutputIteratorT – Random-access output iterator type

  • EqualityOp – Equality functor type

  • ScanOpT – Scan functor type

  • InitValueT – The init_value element for ScanOpT type (cub::NullType for inclusive scan)

  • OffsetT – Unsigned integer type for global offsets

Public Types

using KeyT = cub::detail::it_value_t<KeysInputIteratorT>#
using InputT = cub::detail::it_value_t<ValuesInputIteratorT>#
using ScanByKeyTileStateT = ReduceByKeyScanTileState<AccumT, int>#

Public Functions

inline DispatchScanByKey(
void *d_temp_storage,
size_t &temp_storage_bytes,
KeysInputIteratorT d_keys_in,
ValuesInputIteratorT d_values_in,
ValuesOutputIteratorT d_values_out,
EqualityOp equality_op,
ScanOpT scan_op,
InitValueT init_value,
OffsetT num_items,
cudaStream_t stream,
int ptx_version,
)#
Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_keys_in[in] Iterator to the input sequence of key items

  • d_values_in[in] Iterator to the input sequence of value items

  • d_values_out[out] Iterator to the input sequence of value items

  • equality_op[in] Binary equality functor

  • scan_op[in] Binary scan functor

  • init_value[in] Initial value to seed the exclusive scan

  • num_items[in] Total number of input items (i.e., the length of d_in)

  • stream[in] CUDA stream to launch kernels within.

template<typename ActivePolicyT, typename InitKernel, typename ScanKernel>
inline cudaError_t Invoke(
InitKernel init_kernel,
ScanKernel scan_kernel,
)#
template<typename ActivePolicyT>
inline cudaError_t Invoke()#

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

KeysInputIteratorT d_keys_in#

Iterator to the input sequence of key items.

ValuesInputIteratorT d_values_in#

Iterator to the input sequence of value items.

ValuesOutputIteratorT d_values_out#

Iterator to the input sequence of value items.

EqualityOp equality_op#

Binary equality functor.

ScanOpT scan_op#

Binary scan functor.

InitValueT init_value#

Initial value to seed the exclusive scan.

OffsetT num_items#

Total number of input items (i.e., the length of d_in)

cudaStream_t stream#

CUDA stream to launch kernels within.

int ptx_version#

Public Static Functions

static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
KeysInputIteratorT d_keys_in,
ValuesInputIteratorT d_values_in,
ValuesOutputIteratorT d_values_out,
EqualityOp equality_op,
ScanOpT scan_op,
InitValueT init_value,
OffsetT num_items,
cudaStream_t stream,
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_keys_in[in] Iterator to the input sequence of key items

  • d_values_in[in] Iterator to the input sequence of value items

  • d_values_out[out] Iterator to the input sequence of value items

  • equality_op[in] Binary equality functor

  • scan_op[in] Binary scan functor

  • init_value[in] Initial value to seed the exclusive scan

  • num_items[in] Total number of input items (i.e., the length of d_in)

  • stream[in] CUDA stream to launch kernels within.

Public Static Attributes

static constexpr int INIT_KERNEL_THREADS = 128#