cub::DispatchSegmentedRadixSort#
-
template<SortOrder Order, typename KeyT, typename ValueT, typename BeginOffsetIteratorT, typename EndOffsetIteratorT, typename SegmentSizeT, typename PolicyHub = detail::radix::policy_hub<KeyT, ValueT, SegmentSizeT>, typename DecomposerT = detail::identity_decomposer_t, typename KernelSource = detail::radix_sort::DeviceSegmentedRadixSortKernelSource<typename PolicyHub::MaxPolicy, Order, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT, SegmentSizeT, DecomposerT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchSegmentedRadixSort# Utility class for dispatching the appropriately-tuned kernels for segmented device-wide radix sort.
- Template Parameters:
SortOrder – Whether to sort in ascending or descending order
KeyT – Key type
ValueT – Value type
BeginOffsetIteratorT – Random-access input iterator type for reading segment beginning offsets (may be a simple pointer type)
EndOffsetIteratorT – Random-access input iterator type for reading segment ending offsets (may be a simple pointer type)
SegmentSizeT – Integer type to index items within a segment
Public Functions
- inline DispatchSegmentedRadixSort(
- void *d_temp_storage,
- size_t &temp_storage_bytes,
- DoubleBuffer<KeyT> &d_keys,
- DoubleBuffer<ValueT> &d_values,
- ::cuda::std::int64_t num_items,
- ::cuda::std::int64_t num_segments,
- BeginOffsetIteratorT d_begin_offsets,
- EndOffsetIteratorT d_end_offsets,
- int begin_bit,
- int end_bit,
- bool is_overwrite_okay,
- cudaStream_t stream,
- int ptx_version,
- DecomposerT decomposer = {},
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
Constructor.
-
template<typename PassConfigT>
inline cudaError_t InvokePass( - const KeyT *d_keys_in,
- KeyT *d_keys_out,
- const ValueT *d_values_in,
- ValueT *d_values_out,
- int ¤t_bit,
- PassConfigT &pass_config,
Invoke a three-kernel sorting pass at the current bit.
-
template<typename ActivePolicyT, typename SegmentedKernelT>
inline cudaError_t InvokePasses( - SegmentedKernelT segmented_kernel,
- SegmentedKernelT alt_segmented_kernel,
- ActivePolicyT policy = {},
Invocation (run multiple digit passes)
- Template Parameters:
ActivePolicyT – Umbrella policy active for the target device
SegmentedKernelT – Function type of cub::DeviceSegmentedRadixSortKernel
- Parameters:
segmented_kernel – [in] Kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel
alt_segmented_kernel – [in] Alternate kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel
-
template<typename ActivePolicyT>
inline cudaError_t Invoke( - ActivePolicyT policy = {},
Invocation.
Public Members
-
void *d_temp_storage#
Device-accessible allocation of temporary storage.
When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.
-
size_t &temp_storage_bytes#
Reference to size in bytes of
d_temp_storage
allocation.
-
DoubleBuffer<KeyT> &d_keys#
Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys.
-
DoubleBuffer<ValueT> &d_values#
Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values.
-
::cuda::std::int64_t num_items#
Number of items to sort.
-
::cuda::std::int64_t num_segments#
The number of segments that comprise the sorting data.
-
BeginOffsetIteratorT d_begin_offsets#
Random-access input iterator to the sequence of beginning offsets of length
num_segments
, such thatd_begin_offsets[i]
is the first element of the ith data segment ind_keys_*
andd_values_*
-
EndOffsetIteratorT d_end_offsets#
Random-access input iterator to the sequence of ending offsets of length
num_segments
, such thatd_end_offsets[i]-1
is the last element of the ith data segment ind_keys_*
andd_values_*
.If
d_end_offsets[i]-1
<=d_begin_offsets[i]
, the ith is considered empty.
-
int begin_bit#
The beginning (least-significant) bit index needed for key comparison.
-
int end_bit#
The past-the-end (most-significant) bit index needed for key comparison.
-
cudaStream_t stream#
CUDA stream to launch kernels within. Default is stream0.
-
int ptx_version#
PTX version.
-
bool is_overwrite_okay#
Whether is okay to overwrite source buffers.
-
DecomposerT decomposer#
-
KernelSource kernel_source#
-
KernelLauncherFactory launcher_factory#
Public Static Functions
-
template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch( - void *d_temp_storage,
- size_t &temp_storage_bytes,
- DoubleBuffer<KeyT> &d_keys,
- DoubleBuffer<ValueT> &d_values,
- ::cuda::std::int64_t num_items,
- ::cuda::std::int64_t num_segments,
- BeginOffsetIteratorT d_begin_offsets,
- EndOffsetIteratorT d_end_offsets,
- int begin_bit,
- int end_bit,
- bool is_overwrite_okay,
- cudaStream_t stream,
- KernelSource kernel_source = {},
- KernelLauncherFactory launcher_factory = {},
- MaxPolicyT max_policy = {},
Internal dispatch routine.
- Parameters:
d_temp_storage – [in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to
temp_storage_bytes
and no work is done.temp_storage_bytes – [inout] Reference to size in bytes of
d_temp_storage
allocationd_keys – [inout] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys
d_values – [inout] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values
num_items – [in] Number of items to sort
num_segments – [in] The number of segments that comprise the sorting data
d_begin_offsets – [in] Random-access input iterator to the sequence of beginning offsets of length
num_segments
, such thatd_begin_offsets[i]
is the first element of the ith data segment ind_keys_*
andd_values_*
d_end_offsets – [in] Random-access input iterator to the sequence of ending offsets of length
num_segments
, such thatd_end_offsets[i]-1
is the last element of the ith data segment ind_keys_*
andd_values_*
. Ifd_end_offsets[i]-1
<=d_begin_offsets[i]
, the ith is considered empty.begin_bit – [in] The beginning (least-significant) bit index needed for key comparison
end_bit – [in] The past-the-end (most-significant) bit index needed for key comparison
is_overwrite_okay – [in] Whether is okay to overwrite source buffers
stream – [in] CUDA stream to launch kernels within. Default is stream0.
Public Static Attributes
-
template<typename SegmentedKernelT>
struct PassConfig# PassConfig data structure.
Public Functions
-
template<typename SegmentedPolicyT>
inline cudaError_t InitPassConfig( - SegmentedKernelT segmented_kernel,
- int radix_bits,
- SegmentedPolicyT policy = {},
- KernelLauncherFactory launcher_factory = {},
Initialize pass configuration.
Public Members
-
SegmentedKernelT segmented_kernel#
-
detail::KernelConfig segmented_config#
-
int radix_bits#
-
int radix_digits#
-
template<typename SegmentedPolicyT>