cub::DispatchScan#

template<typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename InitValueT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::it_value_t<InputIteratorT>, ::cuda::std::_If<::cuda::std::is_same_v<InitValueT, NullType>, cub::detail::it_value_t<InputIteratorT>, typename InitValueT::value_type>>, ForceInclusive EnforceInclusive = ForceInclusive::No, typename PolicyHub = detail::scan::policy_hub<detail::it_value_t<InputIteratorT>, detail::it_value_t<OutputIteratorT>, AccumT, OffsetT, ScanOpT>, typename KernelSource = detail::scan::DeviceScanKernelSource<typename PolicyHub::MaxPolicy, InputIteratorT, OutputIteratorT, ScanOpT, InitValueT, OffsetT, AccumT, EnforceInclusive>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchScan#

Utility class for dispatching the appropriately-tuned kernels for DeviceScan.

Template Parameters:
  • InputIteratorT – Random-access input iterator type for reading scan inputs (may be a simple pointer type)

  • OutputIteratorT – Random-access output iterator type for writing scan outputs (may be a simple pointer type)

  • ScanOpT – Binary scan functor type having member auto operator()(const T &a, const U &b)

  • InitValueT – The init_value element type for ScanOpT (cub::NullType for inclusive scans)

  • OffsetT – Unsigned integer type for global offsets

  • EnforceInclusive – Enum flag to specify whether to enforce inclusive scan.

Public Functions

inline DispatchScan(
void *d_temp_storage,
size_t &temp_storage_bytes,
InputIteratorT d_in,
OutputIteratorT d_out,
OffsetT num_items,
ScanOpT scan_op,
InitValueT init_value,
cudaStream_t stream,
int ptx_version,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
)#
Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_in[in] Iterator to the input sequence of data items

  • d_out[out] Iterator to the output sequence of data items

  • num_items[in] Total number of input items (i.e., the length of d_in)

  • scan_op[in] Binary scan functor

  • init_value[in] Initial value to seed the exclusive scan

  • stream[in] [optional] CUDA stream to launch kernels within. Default is stream0.

template<typename ActivePolicyT, typename InitKernelT, typename ScanKernelT>
inline cudaError_t Invoke(
InitKernelT init_kernel,
ScanKernelT scan_kernel,
ActivePolicyT policy = {},
)#
template<typename ActivePolicyT>
inline cudaError_t Invoke(
ActivePolicyT active_policy = {},
)#

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

InputIteratorT d_in#

Iterator to the input sequence of data items.

OutputIteratorT d_out#

Iterator to the output sequence of data items.

ScanOpT scan_op#

Binary scan functor.

InitValueT init_value#

Initial value to seed the exclusive scan.

OffsetT num_items#

Total number of input items (i.e., the length of d_in)

cudaStream_t stream#

CUDA stream to launch kernels within. Default is stream0.

int ptx_version#
KernelSource kernel_source#
KernelLauncherFactory launcher_factory#

Public Static Functions

template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
InputIteratorT d_in,
OutputIteratorT d_out,
ScanOpT scan_op,
InitValueT init_value,
OffsetT num_items,
cudaStream_t stream,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_in[in] Iterator to the input sequence of data items

  • d_out[out] Iterator to the output sequence of data items

  • scan_op[in] Binary scan functor

  • init_value[in] Initial value to seed the exclusive scan

  • num_items[in] Total number of input items (i.e., the length of d_in)

  • stream[in] [optional] CUDA stream to launch kernels within. Default is stream0.

Public Static Attributes

static constexpr int INIT_KERNEL_THREADS = 128#