CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Classes | Functions
cutlass::gemm::kernel Namespace Reference

Namespaces

 detail
 

Classes

struct  DefaultGemm
 
struct  DefaultGemm< ElementA, layout::ColumnMajorInterleaved< InterleavedK >, kAlignmentA, ElementB, layout::RowMajorInterleaved< InterleavedK >, kAlignmentB, ElementC, layout::ColumnMajorInterleaved< InterleavedK >, int32_t, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, IsBetaZero >
 Partial specialization for Turing Integer Matrix Multiply Interleaved layout. More...
 
struct  DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 1 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator >
 Partial specialization for SIMT. More...
 
struct  DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm70, ThreadblockShape, WarpShape, GemmShape< 8, 8, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator >
 Partial specialization for Volta architecture. More...
 
struct  DefaultGemm< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementC, layout::RowMajor, ElementAccumulator, arch::OpClassTensorOp, arch::Sm75, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator >
 Partial specialization for Turing Architecture. More...
 
struct  DefaultGemm< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementC, LayoutC, ElementAccumulator, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, EpilogueOutputOp, ThreadblockSwizzle, 2, SplitKSerial, Operator, false >
 Partial specialization for SIMT DP4A. More...
 
struct  DefaultGemmSplitKParallel
 
struct  DefaultGemv
 
struct  Gemm
 
struct  GemmBatched
 
struct  GemmSplitKParallel
 

Functions

template<typename Mma , typename Epilogue , typename ThreadblockSwizzle >
__global__ void GemmPipelined (cutlass::gemm::GemmCoord problem_size, cutlass::gemm::GemmCoord grid_tiled_shape, typename Mma::IteratorA::Params params_A, typename Mma::IteratorA::TensorRef ref_A, typename Mma::IteratorB::Params params_B, typename Mma::IteratorB::TensorRef ref_B, typename Epilogue::Params params_epilogue)
 
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false>
CUTLASS_DEVICE void GemvBatchedStridedDevice (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd)
 
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero>
__global__ void GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, ElementAlphaBeta beta, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_C, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldc, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd)
 
template<typename GemvKernel , typename ElementAlphaBeta >
__global__ void GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, ElementAlphaBeta alpha, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd)
 
template<typename GemvKernel >
__global__ void GemvBatchedStrided (cutlass::gemm::BatchedGemmCoord problem_size, typename GemvKernel::IteratorA::TensorRef ref_A, typename GemvKernel::IteratorA::TensorRef::LongIndex lda, typename GemvKernel::IteratorB::TensorRef ref_B, typename GemvKernel::IteratorB::TensorRef::LongIndex ldb, typename GemvKernel::IteratorCD::TensorRef ref_D, typename GemvKernel::IteratorCD::TensorRef::LongIndex ldd)
 

Function Documentation

template<typename Mma , typename Epilogue , typename ThreadblockSwizzle >
__global__ void cutlass::gemm::kernel::GemmPipelined ( cutlass::gemm::GemmCoord  problem_size,
cutlass::gemm::GemmCoord  grid_tiled_shape,
typename Mma::IteratorA::Params  params_A,
typename Mma::IteratorA::TensorRef  ref_A,
typename Mma::IteratorB::Params  params_B,
typename Mma::IteratorB::TensorRef  ref_B,
typename Epilogue::Params  params_epilogue 
)
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero>
__global__ void cutlass::gemm::kernel::GemvBatchedStrided ( cutlass::gemm::BatchedGemmCoord  problem_size,
ElementAlphaBeta  alpha,
ElementAlphaBeta  beta,
typename GemvKernel::IteratorA::TensorRef  ref_A,
typename GemvKernel::IteratorA::TensorRef::LongIndex  lda,
typename GemvKernel::IteratorB::TensorRef  ref_B,
typename GemvKernel::IteratorB::TensorRef::LongIndex  ldb,
typename GemvKernel::IteratorCD::TensorRef  ref_C,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldc,
typename GemvKernel::IteratorCD::TensorRef  ref_D,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldd 
)
template<typename GemvKernel , typename ElementAlphaBeta >
__global__ void cutlass::gemm::kernel::GemvBatchedStrided ( cutlass::gemm::BatchedGemmCoord  problem_size,
ElementAlphaBeta  alpha,
typename GemvKernel::IteratorA::TensorRef  ref_A,
typename GemvKernel::IteratorA::TensorRef::LongIndex  lda,
typename GemvKernel::IteratorB::TensorRef  ref_B,
typename GemvKernel::IteratorB::TensorRef::LongIndex  ldb,
typename GemvKernel::IteratorCD::TensorRef  ref_D,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldd 
)
template<typename GemvKernel >
__global__ void cutlass::gemm::kernel::GemvBatchedStrided ( cutlass::gemm::BatchedGemmCoord  problem_size,
typename GemvKernel::IteratorA::TensorRef  ref_A,
typename GemvKernel::IteratorA::TensorRef::LongIndex  lda,
typename GemvKernel::IteratorB::TensorRef  ref_B,
typename GemvKernel::IteratorB::TensorRef::LongIndex  ldb,
typename GemvKernel::IteratorCD::TensorRef  ref_D,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldd 
)
template<typename GemvKernel , typename ElementAlphaBeta , bool BetaIsZero = false>
CUTLASS_DEVICE void cutlass::gemm::kernel::GemvBatchedStridedDevice ( cutlass::gemm::BatchedGemmCoord  problem_size,
ElementAlphaBeta  alpha,
ElementAlphaBeta  beta,
typename GemvKernel::IteratorA::TensorRef  ref_A,
typename GemvKernel::IteratorA::TensorRef::LongIndex  lda,
typename GemvKernel::IteratorB::TensorRef  ref_B,
typename GemvKernel::IteratorB::TensorRef::LongIndex  ldb,
typename GemvKernel::IteratorCD::TensorRef  ref_C,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldc,
typename GemvKernel::IteratorCD::TensorRef  ref_D,
typename GemvKernel::IteratorCD::TensorRef::LongIndex  ldd 
)