cub::DispatchReduce#
-
template<typename InputIteratorT, typename OutputIteratorT, typename OffsetT, typename ReductionOpT, typename InitT = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::it_value_t<InputIteratorT>>, typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::it_value_t<InputIteratorT>, InitT>, typename TransformOpT = ::cuda::std::identity, typename PolicyHub = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>, typename KernelSource = detail::reduce::DeviceReduceKernelSource<typename PolicyHub::MaxPolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT, InitT, AccumT, TransformOpT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchReduce# Utility class for dispatching the appropriately-tuned kernels for device-wide reduction.
- Template Parameters:
InputIteratorT – Random-access input iterator type for reading input items (may be a simple pointer type)
OutputIteratorT – Output iterator type for recording the reduced aggregate (may be a simple pointer type)
OffsetT – Signed integer type for global offsets
ReductionOpT – Binary reduction functor type having member
auto operator()(const T &a, const U &b)
InitT – Initial value type
Public Functions
- inline DispatchReduce(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- OffsetT num_items,
- ReductionOpT reduction_op,
- InitT init,
- cudaStream_t stream,
- int ptx_version,
- TransformOpT transform_op = {},
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
Constructor.
-
template<typename ActivePolicyT, typename SingleTileKernelT>
inline cudaError_t InvokeSingleTile( - SingleTileKernelT single_tile_kernel,
- ActivePolicyT policy = {},
Invoke a single block block to reduce in-core.
- Template Parameters:
ActivePolicyT – Umbrella policy active for the target device
SingleTileKernelT – Function type of cub::DeviceReduceSingleTileKernel
- Parameters:
single_tile_kernel – [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel
-
template<typename ActivePolicyT, typename ReduceKernelT, typename SingleTileKernelT>
inline cudaError_t InvokePasses( - ReduceKernelT reduce_kernel,
- SingleTileKernelT single_tile_kernel,
- ActivePolicyT active_policy = {},
Invoke two-passes to reduce.
- Template Parameters:
ActivePolicyT – Umbrella policy active for the target device
ReduceKernelT – Function type of cub::DeviceReduceKernel
SingleTileKernelT – Function type of cub::DeviceReduceSingleTileKernel
- Parameters:
reduce_kernel – [in] Kernel function pointer to parameterization of cub::DeviceReduceKernel
single_tile_kernel – [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel
-
template<typename ActivePolicyT>
inline cudaError_t Invoke( - ActivePolicyT active_policy = {},
Invocation.
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
InputIteratorT d_in#
Pointer to the input sequence of data items.
-
OutputIteratorT d_out#
Pointer to the output aggregate.
-
ReductionOpT reduction_op#
Binary reduction functor.
-
cudaStream_t stream#
CUDA stream to launch kernels within. Default is stream0.
-
int ptx_version#
-
TransformOpT transform_op#
-
KernelSource kernel_source#
-
KernelLauncherFactory launcher_factory#
Public Static Functions
-
template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- OffsetT num_items,
- ReductionOpT reduction_op,
- InitT init,
- cudaStream_t stream,
- TransformOpT transform_op = {},
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Internal dispatch routine for computing a device-wide reduction.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_in – [in] Pointer to the input sequence of data items
d_out – [out] Pointer to the output aggregate
num_items – [in] Total number of input items (i.e., length of
d_in
)reduction_op – [in] Binary reduction functor
init – [in] The initial value of the reduction
stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.