cub::DispatchUniqueByKey#

template<typename KeyInputIteratorT, typename ValueInputIteratorT, typename KeyOutputIteratorT, typename ValueOutputIteratorT, typename NumSelectedIteratorT, typename EqualityOpT, typename OffsetT, typename PolicyHub = detail::unique_by_key::policy_hub<detail::it_value_t<KeyInputIteratorT>, detail::it_value_t<ValueInputIteratorT>>, typename KernelSource = detail::unique_by_key::DeviceUniqueByKeyKernelSource<typename PolicyHub::MaxPolicy, KeyInputIteratorT, ValueInputIteratorT, KeyOutputIteratorT, ValueOutputIteratorT, NumSelectedIteratorT, ScanTileState<OffsetT>, EqualityOpT, OffsetT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY, typename VSMemHelperT = detail::unique_by_key::VSMemHelper, typename KeyT = detail::it_value_t<KeyInputIteratorT>, typename ValueT = detail::it_value_t<ValueInputIteratorT>>
struct DispatchUniqueByKey#

Utility class for dispatching the appropriately-tuned kernels for DeviceSelect.

Template Parameters:
  • KeyInputIteratorT – Random-access input iterator type for keys

  • ValueInputIteratorT – Random-access input iterator type for values

  • KeyOutputIteratorT – Random-access output iterator type for keys

  • ValueOutputIteratorT – Random-access output iterator type for values

  • NumSelectedIteratorT – Output iterator type for recording the number of items selected

  • EqualityOpT – Equality operator type

  • OffsetT – Signed integer type for global offsets

Public Types

enum [anonymous]#

Values:

enumerator INIT_KERNEL_THREADS = 128#

Public Functions

inline DispatchUniqueByKey(
void *d_temp_storage,
size_t &temp_storage_bytes,
KeyInputIteratorT d_keys_in,
ValueInputIteratorT d_values_in,
KeyOutputIteratorT d_keys_out,
ValueOutputIteratorT d_values_out,
NumSelectedIteratorT d_num_selected_out,
EqualityOpT equality_op,
OffsetT num_items,
cudaStream_t stream,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
)#
Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • d_keys_in[in] Pointer to the input sequence of keys

  • d_values_in[in] Pointer to the input sequence of values

  • d_keys_out[out] Pointer to the output sequence of selected data items

  • d_values_out[out] Pointer to the output sequence of selected data items

  • d_num_selected_out[out] Pointer to the total number of items selected (i.e., length of d_keys_out or d_values_out)

  • equality_op[in] Equality operator

  • num_items[in] Total number of input items (i.e., length of d_keys_in or d_values_in)

  • stream[in] [optional] CUDA stream to launch kernels within. Default is stream0.

Template Parameters:

temp_storage_bytes – [in,out] Reference to size in bytes of d_temp_storage allocation

template<typename ActivePolicyT, typename InitKernelT, typename UniqueByKeySweepKernelT>
inline cudaError_t Invoke(
InitKernelT init_kernel,
UniqueByKeySweepKernelT sweep_kernel,
ActivePolicyT policy = {},
)#
template<typename ActivePolicyT>
inline cudaError_t Invoke(
ActivePolicyT active_policy = {},
)#

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

KeyInputIteratorT d_keys_in#

Pointer to the input sequence of keys.

ValueInputIteratorT d_values_in#

Pointer to the input sequence of values.

KeyOutputIteratorT d_keys_out#

Pointer to the output sequence of selected data items.

ValueOutputIteratorT d_values_out#

Pointer to the output sequence of selected data items.

NumSelectedIteratorT d_num_selected_out#

Pointer to the total number of items selected (i.e., length of d_keys_out or d_values_out)

EqualityOpT equality_op#

Equality operator.

OffsetT num_items#

Total number of input items (i.e., length of d_keys_in or d_values_in)

cudaStream_t stream#

[optional] CUDA stream to launch kernels within. Default is stream0.

KernelSource kernel_source#
KernelLauncherFactory launcher_factory#

Public Static Functions

template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
KeyInputIteratorT d_keys_in,
ValueInputIteratorT d_values_in,
KeyOutputIteratorT d_keys_out,
ValueOutputIteratorT d_values_out,
NumSelectedIteratorT d_num_selected_out,
EqualityOpT equality_op,
OffsetT num_items,
cudaStream_t stream,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • &temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_keys_in[in] Pointer to the input sequence of keys

  • d_values_in[in] Pointer to the input sequence of values

  • d_keys_out[out] Pointer to the output sequence of selected data items

  • d_values_out[out] Pointer to the output sequence of selected data items

  • d_num_selected_out[out] Pointer to the total number of items selected (i.e., length of d_keys_out or d_values_out)

  • equality_op[in] Equality operator

  • num_items[in] Total number of input items (i.e., the length of d_in)

  • stream[in] [optional] CUDA stream to launch kernels within. Default is stream0.