cub::DispatchHistogram#
-
template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT, typename PolicyHub = void, typename KernelSource = detail::histogram::DeviceHistogramKernelSource<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, OffsetT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY, typename SampleT = cub::detail::it_value_t<SampleIteratorT>, typename TransformsT = detail::histogram::Transforms<LevelT, OffsetT, SampleT>>
struct DispatchHistogram# Utility class for dispatching the appropriately-tuned kernels for DeviceHistogram.
- Template Parameters:
NUM_CHANNELS – Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)
NUM_ACTIVE_CHANNELS – Number of channels actively being histogrammed
SampleIteratorT – Random-access input iterator type for reading input items (may be a simple pointer type)
CounterT – Integer type for counting sample occurrences per histogram bin
LevelT – Type for specifying bin level boundaries
OffsetT – Signed integer type for global offsets
PolicyHub – Implementation detail, do not specify directly, requirements on the content of this type are subject to breaking change.
Public Static Functions
-
template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 0>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchRange( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- SampleIteratorT d_samples,
- ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
- ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
- ::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
- OffsetT num_row_pixels,
- OffsetT num_rows,
- OffsetT row_stride_samples,
- cudaStream_t stream,
- ::cuda::std::false_type,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Dispatch routine for HistogramRange, specialized for sample types larger than 8bit.
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_samples – The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of
d_histograms[i]
should benum_output_levels[i] - 1
.num_output_levels – The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channel is
num_output_levels[i] - 1
.d_levels – The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
num_row_pixels – The number of multi-channel pixels per row in the region of interest
num_rows – The number of rows in the region of interest
row_stride_samples – The number of samples between starts of consecutive rows in the region of interest
stream – CUDA stream to launch kernels within. Default is stream0.
-
template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 0>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchRange( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- SampleIteratorT d_samples,
- ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
- ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
- ::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
- OffsetT num_row_pixels,
- OffsetT num_rows,
- OffsetT row_stride_samples,
- cudaStream_t stream,
- ::cuda::std::true_type,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Dispatch routine for HistogramRange, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels)
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_samples – The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of
d_histograms[i]
should benum_output_levels[i] - 1
.num_output_levels – The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channel is
num_output_levels[i] - 1
.d_levels – The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.
num_row_pixels – The number of multi-channel pixels per row in the region of interest
num_rows – The number of rows in the region of interest
row_stride_samples – The number of samples between starts of consecutive rows in the region of interest
stream – CUDA stream to launch kernels within. Default is stream0.
-
template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 1>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchEven( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- SampleIteratorT d_samples,
- ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
- ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
- ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
- ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
- OffsetT num_row_pixels,
- OffsetT num_rows,
- OffsetT row_stride_samples,
- cudaStream_t stream,
- ::cuda::std::false_type,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit.
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_samples – The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of
d_histograms[i]
should benum_output_levels[i] - 1
.num_output_levels – The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channel is
num_output_levels[i] - 1
.lower_level – The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
upper_level – The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
num_row_pixels – The number of multi-channel pixels per row in the region of interest
num_rows – The number of rows in the region of interest
row_stride_samples – The number of samples between starts of consecutive rows in the region of interest
stream – CUDA stream to launch kernels within. Default is stream0.
-
template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 1>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchEven( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- SampleIteratorT d_samples,
- ::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
- ::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
- ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
- ::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
- OffsetT num_row_pixels,
- OffsetT num_rows,
- OffsetT row_stride_samples,
- cudaStream_t stream,
- ::cuda::std::true_type,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Dispatch routine for HistogramEven, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels)
- Parameters:
d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.temp_storage_bytes – Reference to size in bytes of
d_temp_storage
allocationd_samples – The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).
d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of
d_histograms[i]
should benum_output_levels[i] - 1
.num_output_levels – The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channel is
num_output_levels[i] - 1
.lower_level – The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
upper_level – The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
num_row_pixels – The number of multi-channel pixels per row in the region of interest
num_rows – The number of rows in the region of interest
row_stride_samples – The number of samples between starts of consecutive rows in the region of interest
stream – CUDA stream to launch kernels within. Default is stream0.