cub::DispatchSegmentedRadixSort#

template<SortOrder Order, typename KeyT, typename ValueT, typename BeginOffsetIteratorT, typename EndOffsetIteratorT, typename SegmentSizeT, typename PolicyHub = detail::radix::policy_hub<KeyT, ValueT, SegmentSizeT>, typename DecomposerT = detail::identity_decomposer_t, typename KernelSource = detail::radix_sort::DeviceSegmentedRadixSortKernelSource<typename PolicyHub::MaxPolicy, Order, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT, SegmentSizeT, DecomposerT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchSegmentedRadixSort#

Utility class for dispatching the appropriately-tuned kernels for segmented device-wide radix sort.

Template Parameters:
  • SortOrder – Whether to sort in ascending or descending order

  • KeyT – Key type

  • ValueT – Value type

  • BeginOffsetIteratorT – Random-access input iterator type for reading segment beginning offsets (may be a simple pointer type)

  • EndOffsetIteratorT – Random-access input iterator type for reading segment ending offsets (may be a simple pointer type)

  • SegmentSizeT – Integer type to index items within a segment

Public Functions

inline DispatchSegmentedRadixSort(
void *d_temp_storage,
size_t &temp_storage_bytes,
DoubleBuffer<KeyT> &d_keys,
DoubleBuffer<ValueT> &d_values,
::cuda::std::int64_t num_items,
::cuda::std::int64_t num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
int begin_bit,
int end_bit,
bool is_overwrite_okay,
cudaStream_t stream,
int ptx_version,
DecomposerT decomposer = {},
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
)#

Constructor.

template<typename PassConfigT>
inline cudaError_t InvokePass(
const KeyT *d_keys_in,
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
int &current_bit,
PassConfigT &pass_config,
)#

Invoke a three-kernel sorting pass at the current bit.

template<typename ActivePolicyT, typename SegmentedKernelT>
inline cudaError_t InvokePasses(
SegmentedKernelT segmented_kernel,
SegmentedKernelT alt_segmented_kernel,
ActivePolicyT policy = {},
)#

Invocation (run multiple digit passes)

Template Parameters:
  • ActivePolicyT – Umbrella policy active for the target device

  • SegmentedKernelT – Function type of cub::DeviceSegmentedRadixSortKernel

Parameters:
  • segmented_kernel[in] Kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel

  • alt_segmented_kernel[in] Alternate kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel

template<typename ActivePolicyT>
inline cudaError_t Invoke(
ActivePolicyT policy = {},
)#

Invocation.

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

DoubleBuffer<KeyT> &d_keys#

Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys.

DoubleBuffer<ValueT> &d_values#

Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values.

::cuda::std::int64_t num_items#

Number of items to sort.

::cuda::std::int64_t num_segments#

The number of segments that comprise the sorting data.

BeginOffsetIteratorT d_begin_offsets#

Random-access input iterator to the sequence of beginning offsets of length num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_*

EndOffsetIteratorT d_end_offsets#

Random-access input iterator to the sequence of ending offsets of length num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*.

If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty.

int begin_bit#

The beginning (least-significant) bit index needed for key comparison.

int end_bit#

The past-the-end (most-significant) bit index needed for key comparison.

cudaStream_t stream#

CUDA stream to launch kernels within. Default is stream0.

int ptx_version#

PTX version.

bool is_overwrite_okay#

Whether is okay to overwrite source buffers.

DecomposerT decomposer#
KernelSource kernel_source#
KernelLauncherFactory launcher_factory#

Public Static Functions

template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
DoubleBuffer<KeyT> &d_keys,
DoubleBuffer<ValueT> &d_values,
::cuda::std::int64_t num_items,
::cuda::std::int64_t num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
int begin_bit,
int end_bit,
bool is_overwrite_okay,
cudaStream_t stream,
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_keys[inout] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys

  • d_values[inout] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values

  • num_items[in] Number of items to sort

  • num_segments[in] The number of segments that comprise the sorting data

  • d_begin_offsets[in] Random-access input iterator to the sequence of beginning offsets of length num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_*

  • d_end_offsets[in] Random-access input iterator to the sequence of ending offsets of length num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty.

  • begin_bit[in] The beginning (least-significant) bit index needed for key comparison

  • end_bit[in] The past-the-end (most-significant) bit index needed for key comparison

  • is_overwrite_okay[in] Whether is okay to overwrite source buffers

  • stream[in] CUDA stream to launch kernels within. Default is stream0.

Public Static Attributes

static constexpr bool KEYS_ONLY = ::cuda::std::is_same_v<ValueT, NullType>#
template<typename SegmentedKernelT>
struct PassConfig#

PassConfig data structure.

Public Functions

template<typename SegmentedPolicyT>
inline cudaError_t InitPassConfig(
SegmentedKernelT segmented_kernel,
int radix_bits,
SegmentedPolicyT policy = {},
KernelLauncherFactory launcher_factory = {},
)#

Initialize pass configuration.

Public Members

SegmentedKernelT segmented_kernel#
detail::KernelConfig segmented_config#
int radix_bits#
int radix_digits#