cub::DispatchRadixSort#

template<SortOrder Order, typename KeyT, typename ValueT, typename OffsetT, typename DecomposerT = detail::identity_decomposer_t, typename PolicyHub = detail::radix::policy_hub<KeyT, ValueT, OffsetT>, typename KernelSource = detail::radix_sort::DeviceRadixSortKernelSource<typename PolicyHub::MaxPolicy, Order, KeyT, ValueT, OffsetT, DecomposerT>, typename KernelLauncherFactory = CUB_DETAIL_DEFAULT_KERNEL_LAUNCHER_FACTORY>
struct DispatchRadixSort#

Utility class for dispatching the appropriately-tuned kernels for device-wide radix sort.

Template Parameters:
  • SortOrder – Whether to sort in ascending or descending order

  • KeyT – Key type

  • ValueT – Value type

  • OffsetT – Signed integer type for global offsets

  • DecomposerT – Implementation detail, do not specify directly, requirements on the content of this type are subject to breaking change.

Public Functions

inline DispatchRadixSort(
void *d_temp_storage,
size_t &temp_storage_bytes,
DoubleBuffer<KeyT> &d_keys,
DoubleBuffer<ValueT> &d_values,
OffsetT num_items,
int begin_bit,
int end_bit,
bool is_overwrite_okay,
cudaStream_t stream,
int ptx_version,
DecomposerT decomposer = {},
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
)#

Constructor.

template<typename ActivePolicyT, typename SingleTileKernelT>
inline cudaError_t InvokeSingleTile(
SingleTileKernelT single_tile_kernel,
ActivePolicyT policy = {},
)#

Invoke a single block to sort in-core.

Template Parameters:
  • ActivePolicyT – Umbrella policy active for the target device

  • SingleTileKernelT – Function type of cub::DeviceRadixSortSingleTileKernel

Parameters:

single_tile_kernel[in] Kernel function pointer to parameterization of cub::DeviceRadixSortSingleTileKernel

template<typename PassConfigT>
inline cudaError_t InvokePass(
const KeyT *d_keys_in,
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
OffsetT *d_spine,
int,
int &current_bit,
PassConfigT &pass_config,
)#

Invoke a three-kernel sorting pass at the current bit.

template<typename ActivePolicyT>
inline cudaError_t InvokeOnesweep(
ActivePolicyT policy = {},
)#
template<typename ActivePolicyT, typename UpsweepKernelT, typename ScanKernelT, typename DownsweepKernelT>
inline cudaError_t InvokePasses(
UpsweepKernelT upsweep_kernel,
UpsweepKernelT alt_upsweep_kernel,
ScanKernelT scan_kernel,
DownsweepKernelT downsweep_kernel,
DownsweepKernelT alt_downsweep_kernel,
ActivePolicyT policy = {},
)#

Invocation (run multiple digit passes)

Template Parameters:
  • ActivePolicyT – Umbrella policy active for the target device

  • UpsweepKernelT – Function type of cub::DeviceRadixSortUpsweepKernel

  • ScanKernelT – Function type of cub::SpineScanKernel

  • DownsweepKernelT – Function type of cub::DeviceRadixSortDownsweepKernel

Parameters:
  • upsweep_kernel[in] Kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel

  • alt_upsweep_kernel[in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel

  • scan_kernel[in] Kernel function pointer to parameterization of cub::SpineScanKernel

  • downsweep_kernel[in] Kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel

  • alt_downsweep_kernel[in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel

inline cudaError_t InvokeCopy()#
template<typename ActivePolicyT>
inline cudaError_t Invoke(
ActivePolicyT policy = {},
)#

Invocation.

Public Members

void *d_temp_storage#

Device-accessible allocation of temporary storage.

size_t &temp_storage_bytes#

Reference to size in bytes of d_temp_storage allocation.

DoubleBuffer<KeyT> &d_keys#

Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys.

DoubleBuffer<ValueT> &d_values#

Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values.

OffsetT num_items#

Number of items to sort.

int begin_bit#

The beginning (least-significant) bit index needed for key comparison.

int end_bit#

The past-the-end (most-significant) bit index needed for key comparison.

cudaStream_t stream#

CUDA stream to launch kernels within. Default is stream0.

int ptx_version#

PTX version.

bool is_overwrite_okay#

Whether is okay to overwrite source buffers.

DecomposerT decomposer#
KernelSource kernel_source#
KernelLauncherFactory launcher_factory#

Public Static Functions

template<typename MaxPolicyT = typename PolicyHub::MaxPolicy>
static inline cudaError_t Dispatch(
void *d_temp_storage,
size_t &temp_storage_bytes,
DoubleBuffer<KeyT> &d_keys,
DoubleBuffer<ValueT> &d_values,
OffsetT num_items,
int begin_bit,
int end_bit,
bool is_overwrite_okay,
cudaStream_t stream,
DecomposerT decomposer = {},
KernelSource kernel_source = {},
KernelLauncherFactory launcher_factory = {},
MaxPolicyT max_policy = {},
)#

Internal dispatch routine.

Parameters:
  • d_temp_storage[in] Device-accessible allocation of temporary storage. When nullptr, the required allocation size is written to temp_storage_bytes and no work is done.

  • temp_storage_bytes[inout] Reference to size in bytes of d_temp_storage allocation

  • d_keys[inout] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys

  • d_values[inout] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values

  • num_items[in] Number of items to sort

  • begin_bit[in] The beginning (least-significant) bit index needed for key comparison

  • end_bit[in] The past-the-end (most-significant) bit index needed for key comparison

  • is_overwrite_okay[in] Whether is okay to overwrite source buffers

  • stream[in] CUDA stream to launch kernels within. Default is stream0.

Public Static Attributes

static constexpr bool KEYS_ONLY = ::cuda::std::is_same_v<ValueT, NullType>#
template<typename UpsweepKernelT, typename ScanKernelT, typename DownsweepKernelT>
struct PassConfig#

Pass configuration structure.

Public Functions

template<typename ActivePolicyT, typename UpsweepPolicyT, typename ScanPolicyT, typename DownsweepPolicyT>
inline cudaError_t InitPassConfig(
UpsweepKernelT upsweep_kernel,
ScanKernelT scan_kernel,
DownsweepKernelT downsweep_kernel,
int,
int sm_count,
OffsetT num_items,
ActivePolicyT policy = {},
UpsweepPolicyT upsweep_policy = {},
ScanPolicyT scan_policy = {},
DownsweepPolicyT downsweep_policy = {},
KernelLauncherFactory launcher_factory = {},
)#

Initialize pass configuration.

Public Members

UpsweepKernelT upsweep_kernel#
detail::KernelConfig upsweep_config#
ScanKernelT scan_kernel#
detail::KernelConfig scan_config#
DownsweepKernelT downsweep_kernel#
detail::KernelConfig downsweep_config#
int radix_bits#
int radix_digits#
int max_downsweep_grid_size#
GridEvenShare<OffsetT> even_share#