cub::DispatchSelectIf#
-
template<typename InputIteratorT, typename FlagsInputIteratorT, typename SelectedOutputIteratorT, typename NumSelectedIteratorT, typename SelectOpT, typename EqualityOpT, typename OffsetT, SelectImpl SelectionOpt, typename PolicyHub = detail::select::policy_hub<detail::it_value_t<InputIteratorT>, detail::it_value_t<FlagsInputIteratorT>, ::cuda::std::conditional_t<SelectionOpt == SelectImpl::Partition, OffsetT, detail::select::per_partition_offset_t>, detail::select::is_partition_distinct_output_t<SelectedOutputIteratorT>::value, SelectionOpt>>
struct DispatchSelectIf# Utility class for dispatching the appropriately-tuned kernels for DeviceSelect and DevicePartition.
- Template Parameters:
InputIteratorT – Random-access input iterator type for reading input items
FlagsInputIteratorT – Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is used for selection)
SelectedOutputIteratorT – Random-access output iterator type for writing selected items
NumSelectedIteratorT – Output iterator type for recording the number of items selected
SelectOpT – Selection operator type (NullType if selection flags or discontinuity flagging is used for selection)
EqualityOpT – Equality operator type (NullType if selection functor or selection flags are used for selection)
OffsetT – Signed integer type for global offsets
SelectionOpt – SelectImpl indicating whether to partition, just selection or selection where the memory for the input and output may alias each other.
Public Types
-
using per_partition_offset_t = detail::select::per_partition_offset_t#
-
using streaming_context_t = detail::select::streaming_context_t<num_total_items_t, may_require_streaming>#
-
using ScanTileStateT = ScanTileState<per_partition_offset_t>#
Public Functions
- inline DispatchSelectIf(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- FlagsInputIteratorT d_flags,
- SelectedOutputIteratorT d_selected_out,
- NumSelectedIteratorT d_num_selected_out,
- SelectOpT select_op,
- EqualityOpT equality_op,
- OffsetT num_items,
- cudaStream_t stream,
- int ptx_version,
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_in – Pointer to the input sequence of data items
d_flags – Pointer to the input sequence of selection flags (if applicable)
d_selected_out – Pointer to the output sequence of selected data items
d_num_selected_out – Pointer to the total number of items selected (i.e., length of
d_selected_out
)select_op – Selection operator
equality_op – Equality operator
num_items – Total number of input items (i.e., length of
d_in
)stream – CUDA stream to launch kernels within. Default is stream0.
-
template<typename ActivePolicyT, typename ScanInitKernelPtrT, typename SelectIfKernelPtrT>
inline cudaError_t Invoke( - ScanInitKernelPtrT scan_init_kernel,
- SelectIfKernelPtrT select_if_kernel,
Internal dispatch routine for computing a device-wide selection using the specified kernel functions.
-
template<typename ActivePolicyT>
inline cudaError_t Invoke()#
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
InputIteratorT d_in#
Pointer to the input sequence of data items.
-
FlagsInputIteratorT d_flags#
Pointer to the input sequence of selection flags (if applicable)
-
SelectedOutputIteratorT d_selected_out#
Pointer to the output sequence of selected data items.
-
NumSelectedIteratorT d_num_selected_out#
Pointer to the total number of items selected (i.e., length of
d_selected_out
)
-
EqualityOpT equality_op#
Equality operator.
-
cudaStream_t stream#
CUDA stream to launch kernels within. Default is stream0.
-
int ptx_version#
Public Static Functions
- static inline cudaError_t Dispatch(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- FlagsInputIteratorT d_flags,
- SelectedOutputIteratorT d_selected_out,
- NumSelectedIteratorT d_num_selected_out,
- SelectOpT select_op,
- EqualityOpT equality_op,
- OffsetT num_items,
- cudaStream_t stream,
Internal dispatch routine.
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_in – Pointer to the input sequence of data items
d_flags – Pointer to the input sequence of selection flags (if applicable)
d_selected_out – Pointer to the output sequence of selected data items
d_num_selected_out – Pointer to the total number of items selected (i.e., length of
d_selected_out
)select_op – Selection operator
equality_op – Equality operator
num_items – Total number of input items (i.e., length of
d_in
)stream – CUDA stream to launch kernels within. Default is stream0.
Public Static Attributes
-
static constexpr per_partition_offset_t partition_size = ::cuda::std::numeric_limits<per_partition_offset_t>::max()#
-
static constexpr bool may_require_streaming = (static_cast<::cuda::std::uint64_t>(partition_size) < static_cast<::cuda::std::uint64_t>(::cuda::std::numeric_limits<OffsetT>::max()))#
-
static constexpr int INIT_KERNEL_THREADS = 128#