cub::AgentWarpReducePolicy#
-
template<int ThreadsPerBlock, int WarpThreads, int NominalItemsPerThread4B, typename ComputeT, int VectorLoadLength, CacheLoadModifier LoadModifier>
struct AgentWarpReducePolicy# Parameterizable tuning policy type for AgentWarpReduce.
- Template Parameters:
ThreadsPerBlock – Threads per thread block
WarpThreads – Threads per warp
NominalItemsPerThread4B – Items per thread (per tile of input)
ComputeT – Dominant compute type
VectorLoadLength – Number of items per vectorized load
LoadModifier – Cache load modifier for reading input elements
Public Static Attributes
-
static constexpr int WARP_THREADS = WarpThreads#
Number of threads per warp.
-
static constexpr int VECTOR_LOAD_LENGTH = VectorLoadLength#
Number of items per vectorized load.
-
static constexpr int BLOCK_THREADS = ThreadsPerBlock#
Number of threads per block.
-
static constexpr int ITEMS_PER_THREAD = ::cuda::std::conditional_t<::cuda::std::is_same_v<ComputeT, void>, detail::NoScaling<0, NominalItemsPerThread4B>, detail::MemBoundScaling<0, NominalItemsPerThread4B, ComputeT>>::ITEMS_PER_THREAD#
Number of items per thread.
When
ComputeTisvoid, the nominal value is used as-is (no scaling), allowing to pass actual items_per_thread to opt out of the legacy 4B scaling.
-
static constexpr CacheLoadModifier LOAD_MODIFIER = LoadModifier#
Cache load modifier for reading input elements.
-
static constexpr int ITEMS_PER_TILE = ITEMS_PER_THREAD * WARP_THREADS#
Number of items per tile.
-
static constexpr int SEGMENTS_PER_BLOCK = BLOCK_THREADS / WARP_THREADS#
Number of segments per block.