CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Namespaces | |
kernel | |
thread | |
Classes | |
struct | BatchedReduction |
struct | BatchedReductionTraits |
struct | DefaultBlockSwizzle |
Functions | |
template<typename batched_reduction_ > | |
__global__ | __launch_bounds__ (batched_reduction_::Traits::kThreads, 1) void batched_reduction_kernel(typename batched_reduction_ |
__global__ cutlass::reduction::__launch_bounds__ | ( | batched_reduction_::Traits::kThreads | , |
1 | |||
) |