cub::DispatchScan#
-
template<typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename InitValueT, typename OffsetT, typename AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::it_value_t<InputIteratorT>, ::cuda::std::_If<::cuda::std::is_same_v<InitValueT, NullType>, cub::detail::it_value_t<InputIteratorT>, typename InitValueT::value_type>>, ForceInclusive EnforceInclusive = ForceInclusive::No, typename PolicyHub = detail::scan::policy_hub<detail::it_value_t<InputIteratorT>, detail::it_value_t<OutputIteratorT>, AccumT, OffsetT, ScanOpT>, typename KernelSource = detail::scan::DeviceScanKernelSource<typename PolicyHub::MaxPolicy, InputIteratorT, OutputIteratorT, ScanOpT, InitValueT, OffsetT, AccumT, EnforceInclusive>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchScan# Utility class for dispatching the appropriately-tuned kernels for DeviceScan.
- Template Parameters:
InputIteratorT – Random-access input iterator type for reading scan inputs (may be a simple pointer type)
OutputIteratorT – Random-access output iterator type for writing scan outputs (may be a simple pointer type)
ScanOpT – Binary scan functor type having member
auto operator()(const T &a, const U &b)
InitValueT – The init_value element type for ScanOpT (cub::NullType for inclusive scans)
OffsetT – Unsigned integer type for global offsets
EnforceInclusive – Enum flag to specify whether to enforce inclusive scan.
Public Functions
- inline DispatchScan(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- OffsetT num_items,
- ScanOpT scan_op,
- InitValueT init_value,
- cudaStream_t stream,
- int ptx_version,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_in – [in] Iterator to the input sequence of data items
d_out – [out] Iterator to the output sequence of data items
num_items – [in] Total number of input items (i.e., the length of
d_in
)scan_op – [in] Binary scan functor
init_value – [in] Initial value to seed the exclusive scan
stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.
-
template<typename ActivePolicyT, typename InitKernelT, typename ScanKernelT>
inline cudaError_t Invoke( - InitKernelT init_kernel,
- ScanKernelT scan_kernel,
- ActivePolicyT policy = {},
-
template<typename ActivePolicyT>
inline cudaError_t Invoke( - ActivePolicyT active_policy = {},
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
InputIteratorT d_in#
Iterator to the input sequence of data items.
-
OutputIteratorT d_out#
Iterator to the output sequence of data items.
-
InitValueT init_value#
Initial value to seed the exclusive scan.
-
cudaStream_t stream#
CUDA stream to launch kernels within. Default is stream0.
-
int ptx_version#
-
KernelSource kernel_source#
-
KernelLauncherFactory launcher_factory#
Public Static Functions
-
template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- ScanOpT scan_op,
- InitValueT init_value,
- OffsetT num_items,
- cudaStream_t stream,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Internal dispatch routine.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_in – [in] Iterator to the input sequence of data items
d_out – [out] Iterator to the output sequence of data items
scan_op – [in] Binary scan functor
init_value – [in] Initial value to seed the exclusive scan
num_items – [in] Total number of input items (i.e., the length of
d_in
)stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.
Public Static Attributes
-
static constexpr int INIT_KERNEL_THREADS = 128#