cub::DispatchSegmentedReduce#
-
template<typename InputIteratorT, typename OutputIteratorT, typename BeginOffsetIteratorT, typename EndOffsetIteratorT, typename OffsetT, typename ReductionOpT, typename InitT = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::it_value_t<InputIteratorT>>, typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::it_value_t<InputIteratorT>, InitT>, typename PolicyHub = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>, typename KernelSource = detail::reduce::DeviceSegmentedReduceKernelSource<typename PolicyHub::MaxPolicy, InputIteratorT, OutputIteratorT, BeginOffsetIteratorT, EndOffsetIteratorT, OffsetT, ReductionOpT, InitT, AccumT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchSegmentedReduce# Utility class for dispatching the appropriately-tuned kernels for device-wide reduction.
- Template Parameters:
InputIteratorT – Random-access input iterator type for reading input items (may be a simple pointer type)
OutputIteratorT – Output iterator type for recording the reduced aggregate (may be a simple pointer type)
BeginOffsetIteratorT – Random-access input iterator type for reading segment beginning offsets (may be a simple pointer type)
EndOffsetIteratorT – Random-access input iterator type for reading segment ending offsets (may be a simple pointer type)
OffsetT – Signed integer type for global offsets
ReductionOpT – Binary reduction functor type having member
auto operator()(const T &a, const U &b)
InitT – value type
Public Functions
- inline DispatchSegmentedReduce(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- ::cuda::std::int64_t num_segments,
- BeginOffsetIteratorT d_begin_offsets,
- EndOffsetIteratorT d_end_offsets,
- ReductionOpT reduction_op,
- InitT init,
- cudaStream_t stream,
- int ptx_version,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
Constructor.
-
template<typename ActivePolicyT, typename DeviceSegmentedReduceKernelT>
inline cudaError_t InvokePasses( - DeviceSegmentedReduceKernelT segmented_reduce_kernel,
- ActivePolicyT policy = {},
Invocation.
- Template Parameters:
ActivePolicyT – Umbrella policy active for the target device
DeviceSegmentedReduceKernelT – Function type of cub::DeviceSegmentedReduceKernel
- Parameters:
segmented_reduce_kernel – [in] Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel
-
template<typename ActivePolicyT>
inline cudaError_t Invoke( - ActivePolicyT policy = {},
Invocation.
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
InputIteratorT d_in#
Pointer to the input sequence of data items.
-
OutputIteratorT d_out#
Pointer to the output aggregate.
-
::cuda::std::int64_t num_segments#
The number of segments that comprise the segmented reduction data.
-
BeginOffsetIteratorT d_begin_offsets#
Random-access input iterator to the sequence of beginning offsets of length
num_segments
, such thatd_begin_offsets[i]
is the first element of the ith data segment ind_keys_*
andd_values_*
-
EndOffsetIteratorT d_end_offsets#
Random-access input iterator to the sequence of ending offsets of length
num_segments
, such thatd_end_offsets[i] - 1
is the last element of the ith data segment ind_keys_*
andd_values_*
.If
d_end_offsets[i] - 1 <= d_begin_offsets[i]
, the ith is considered empty.
-
ReductionOpT reduction_op#
Binary reduction functor.
-
cudaStream_t stream#
CUDA stream to launch kernels within. Default is stream0.
-
int ptx_version#
-
KernelSource kernel_source#
-
KernelLauncherFactory launcher_factory#
Public Static Functions
-
template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- InputIteratorT d_in,
- OutputIteratorT d_out,
- ::cuda::std::int64_t num_segments,
- BeginOffsetIteratorT d_begin_offsets,
- EndOffsetIteratorT d_end_offsets,
- ReductionOpT reduction_op,
- InitT init,
- cudaStream_t stream,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Internal dispatch routine for computing a device-wide reduction.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When
nullptr
, the required allocation size is written totemp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_in – [in] Pointer to the input sequence of data items
d_out – [out] Pointer to the output aggregate
num_segments – [in] The number of segments that comprise the sorting data
d_begin_offsets – [in] Random-access input iterator to the sequence of beginning offsets of length
num_segments
, such thatd_begin_offsets[i]
is the first element of the ith data segment ind_keys_*
andd_values_*
d_end_offsets – [in] Random-access input iterator to the sequence of ending offsets of length
num_segments
, such thatd_end_offsets[i] - 1
is the last element of the ith data segment ind_keys_*
andd_values_*
. Ifd_end_offsets[i] - 1 <= d_begin_offsets[i]
, the ith is considered empty.reduction_op – [in] Binary reduction functor
init – [in] The initial value of the reduction
stream – [in] [optional] CUDA stream to launch kernels within. Default is stream0.