cub::DispatchSelectIf#

template<typename InputIteratorT, typename FlagsInputIteratorT, typename SelectedOutputIteratorT, typename NumSelectedIteratorT, typename SelectOpT, typename EqualityOpT, typename OffsetT, SelectImpl SelectionOpt, typename PolicyHub = detail::select::policy_hub<detail::it_value_t<InputIteratorT>, detail::it_value_t<FlagsInputIteratorT>, ::cuda::std::conditional_t<SelectionOpt == SelectImpl::Partition, OffsetT, detail::select::per_partition_offset_t>, detail::select::is_partition_distinct_output_t<SelectedOutputIteratorT>::value, SelectionOpt>>
struct DispatchSelectIf#

Utility class for dispatching the appropriately-tuned kernels for DeviceSelect and DevicePartition.

Template Parameters:
  • InputIteratorT – Random-access input iterator type for reading input items

  • FlagsInputIteratorT – Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is used for selection)

  • SelectedOutputIteratorT – Random-access output iterator type for writing selected items

  • NumSelectedIteratorT – Output iterator type for recording the number of items selected

  • SelectOpT – Selection operator type (NullType if selection flags or discontinuity flagging is used for selection)

  • EqualityOpT – Equality operator type (NullType if selection functor or selection flags are used for selection)

  • OffsetT – Signed integer type for global offsets

  • SelectionOpt – SelectImpl indicating whether to partition, just selection or selection where the memory for the input and output may alias each other.

Public Types

using per_partition_offset_t = detail::select::per_partition_offset_t#
using num_total_items_t = OffsetT#
using streaming_context_t = detail::select::streaming_context_t<num_total_items_t, may_require_streaming>#
using ScanTileStateT = ScanTileState<per_partition_offset_t>#

Public Functions

inline DispatchSelectIf(
void *d_temp_storage,
size_t &temp_storage_bytes,
InputIteratorT d_in,
FlagsInputIteratorT d_flags,
SelectedOutputIteratorT d_selected_out,
NumSelectedIteratorT d_num_selected_out,
SelectOpT select_op,
EqualityOpT equality_op,
OffsetT num_items,
cudaStream_t stream,
int ptx_version,
)#
Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_in – Pointer to the input sequence of data items

  • d_flags – Pointer to the input sequence of selection flags (if applicable)

  • d_selected_out – Pointer to the output sequence of selected data items

  • d_num_selected_out – Pointer to the total number of items selected (i.e., length of d_selected_out)

  • select_op – Selection operator

  • equality_op – Equality operator

  • num_items – Total number of input items (i.e., length of d_in)

  • stream – CUDA stream to launch kernels within. Default is stream0.

template<typename ActivePolicyT, typename ScanInitKernelPtrT, typename SelectIfKernelPtrT>
inline cudaError_t Invoke(
ScanInitKernelPtrT scan_init_kernel,
SelectIfKernelPtrT select_if_kernel,
)#

Internal dispatch routine for computing a device-wide selection using the specified kernel functions.

template<typename ActivePolicyT>
inline cudaError_t Invoke()#

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

InputIteratorT d_in#

Pointer to the input sequence of data items.

FlagsInputIteratorT d_flags#

Pointer to the input sequence of selection flags (if applicable)

SelectedOutputIteratorT d_selected_out#

Pointer to the output sequence of selected data items.

NumSelectedIteratorT d_num_selected_out#

Pointer to the total number of items selected (i.e., length of d_selected_out)

SelectOpT select_op#

Selection operator.

EqualityOpT equality_op#

Equality operator.

OffsetT num_items#

Total number of input items (i.e., length of d_in)

cudaStream_t stream#

CUDA stream to launch kernels within. Default is stream0.

int ptx_version#

Public Static Functions

static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
InputIteratorT d_in,
FlagsInputIteratorT d_flags,
SelectedOutputIteratorT d_selected_out,
NumSelectedIteratorT d_num_selected_out,
SelectOpT select_op,
EqualityOpT equality_op,
OffsetT num_items,
cudaStream_t stream,
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_in – Pointer to the input sequence of data items

  • d_flags – Pointer to the input sequence of selection flags (if applicable)

  • d_selected_out – Pointer to the output sequence of selected data items

  • d_num_selected_out – Pointer to the total number of items selected (i.e., length of d_selected_out)

  • select_op – Selection operator

  • equality_op – Equality operator

  • num_items – Total number of input items (i.e., length of d_in)

  • stream – CUDA stream to launch kernels within. Default is stream0.

Public Static Attributes

static constexpr per_partition_offset_t partition_size = ::cuda::std::numeric_limits<per_partition_offset_t>::max()#
static constexpr bool may_require_streaming = (static_cast<::cuda::std::uint64_t>(partition_size) < static_cast<::cuda::std::uint64_t>(::cuda::std::numeric_limits<OffsetT>::max()))#
static constexpr int INIT_KERNEL_THREADS = 128#