cub::DispatchHistogram#

template<int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT, typename PolicyHub = void, typename KernelSource = detail::histogram::DeviceHistogramKernelSource<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, OffsetT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY, typename SampleT = cub::detail::it_value_t<SampleIteratorT>, typename TransformsT = detail::histogram::Transforms<LevelT, OffsetT, SampleT>>
struct DispatchHistogram#

Utility class for dispatching the appropriately-tuned kernels for DeviceHistogram.

Template Parameters:
  • NUM_CHANNELS – Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed)

  • NUM_ACTIVE_CHANNELS – Number of channels actively being histogrammed

  • SampleIteratorT – Random-access input iterator type for reading input items (may be a simple pointer type)

  • CounterT – Integer type for counting sample occurrences per histogram bin

  • LevelT – Type for specifying bin level boundaries

  • OffsetT – Signed integer type for global offsets

  • PolicyHub – Implementation detail, do not specify directly, requirements on the content of this type are subject to breaking change.

Public Static Functions

template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 0>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchRange(
void *d_temp_storage,
size_t &temp_storage_bytes,
SampleIteratorT d_samples,
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
OffsetT num_row_pixels,
OffsetT num_rows,
OffsetT row_stride_samples,
cudaStream_t stream,
::cuda::std::false_type,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Dispatch routine for HistogramRange, specialized for sample types larger than 8bit.

Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_samples – The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).

  • d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of d_histograms[i] should be num_output_levels[i] - 1.

  • num_output_levels – The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channel is num_output_levels[i] - 1.

  • d_levels – The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.

  • num_row_pixels – The number of multi-channel pixels per row in the region of interest

  • num_rows – The number of rows in the region of interest

  • row_stride_samples – The number of samples between starts of consecutive rows in the region of interest

  • stream – CUDA stream to launch kernels within. Default is stream0.

template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 0>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchRange(
void *d_temp_storage,
size_t &temp_storage_bytes,
SampleIteratorT d_samples,
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
OffsetT num_row_pixels,
OffsetT num_rows,
OffsetT row_stride_samples,
cudaStream_t stream,
::cuda::std::true_type,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Dispatch routine for HistogramRange, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels)

Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_samples – The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).

  • d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of d_histograms[i] should be num_output_levels[i] - 1.

  • num_output_levels – The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channel is num_output_levels[i] - 1.

  • d_levels – The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive.

  • num_row_pixels – The number of multi-channel pixels per row in the region of interest

  • num_rows – The number of rows in the region of interest

  • row_stride_samples – The number of samples between starts of consecutive rows in the region of interest

  • stream – CUDA stream to launch kernels within. Default is stream0.

template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 1>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchEven(
void *d_temp_storage,
size_t &temp_storage_bytes,
SampleIteratorT d_samples,
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
OffsetT num_row_pixels,
OffsetT num_rows,
OffsetT row_stride_samples,
cudaStream_t stream,
::cuda::std::false_type,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit.

Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_samples – The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).

  • d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of d_histograms[i] should be num_output_levels[i] - 1.

  • num_output_levels – The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channel is num_output_levels[i] - 1.

  • lower_level – The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.

  • upper_level – The upper sample value bound (exclusive) for the highest histogram bin in each active channel.

  • num_row_pixels – The number of multi-channel pixels per row in the region of interest

  • num_rows – The number of rows in the region of interest

  • row_stride_samples – The number of samples between starts of consecutive rows in the region of interest

  • stream – CUDA stream to launch kernels within. Default is stream0.

template<typename MaxPolicyT = typename ::cuda::std::_If<::cuda::std::is_void_v<PolicyHub>, detail::histogram::policy_hub<SampleT, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, 1>, PolicyHub>::MaxPolicy>
static inline cudaError_t DispatchEven(
void *d_temp_storage,
size_t &temp_storage_bytes,
SampleIteratorT d_samples,
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_output_histograms,
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_output_levels,
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
OffsetT num_row_pixels,
OffsetT num_rows,
OffsetT row_stride_samples,
cudaStream_t stream,
::cuda::std::true_type,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Dispatch routine for HistogramEven, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels)

Parameters:
  • d_temp_storage – Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes – Reference to size in bytes of d_temp_storage allocation

  • d_samples – The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples).

  • d_output_histograms – The pointers to the histogram counter output arrays, one for each active channel. For channel, the allocation length of d_histograms[i] should be num_output_levels[i] - 1.

  • num_output_levels – The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channel is num_output_levels[i] - 1.

  • lower_level – The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.

  • upper_level – The upper sample value bound (exclusive) for the highest histogram bin in each active channel.

  • num_row_pixels – The number of multi-channel pixels per row in the region of interest

  • num_rows – The number of rows in the region of interest

  • row_stride_samples – The number of samples between starts of consecutive rows in the region of interest

  • stream – CUDA stream to launch kernels within. Default is stream0.