CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <default_mma_core_sm75.h>
Static Public Attributes | |
static int const | kInterleavedK = InterleavedK |
static int const | kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value |
Number of threads per warp. More... | |
static int const | kThreads = WarpCount::kCount * kWarpSize |
Number of threads total. More... | |
static int const | kAccessSizeInBits = 128 |
Size of a threadblock-scoped access. More... | |
static int const | kElementsPerAccess |
static int const | kWarpThreadArrangementContiguous |
static int const | kWarpThreadArrangementStrided |
Partial specialization:
A: column-major-interleave32 B: row-major-interleave32 Operator: tensor op class
This uses the default warp-level operator given tile sizes
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementA = ElementA_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementB = ElementB_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::ElementC = ElementC_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::InstructionShape = InstructionShape_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::IteratorThreadMapA = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kM * kInterleavedK, Shape::kK / kInterleavedK>, kThreads, layout::PitchLinearShape<32, 1>, kElementsPerAccess> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::IteratorThreadMapB = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kN * kInterleavedK, Shape::kK / kInterleavedK>, kThreads, layout::PitchLinearShape<32, 1>, kElementsPerAccess> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutA = layout::ColumnMajorInterleaved<InterleavedK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutB = layout::RowMajorInterleaved<InterleavedK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::LayoutC = LayoutC_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::MmaPolicy = MmaPolicy<MmaTensorOp, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::MmaTensorOp = typename cutlass::gemm::warp::DefaultMmaTensorOp< WarpShape, InstructionShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Operator, WarpCount::kK, AccumulatorsInRowMajor>::Type |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::Operator = Operator_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::OperatorClass = arch::OpClassTensorOp |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::Shape = Shape_ |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 0, SmemThreadMapA> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 1, SmemThreadMapB> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemLayoutA = layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementA>::value, kInterleavedK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemLayoutB = layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementB>::value, kInterleavedK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemThreadMapA = transform::TransposePitchLinearThreadMap< IteratorThreadMapA, layout::PitchLinearShape<kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided>> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::SmemThreadMapB = transform::TransposePitchLinearThreadMap< IteratorThreadMapB, layout::PitchLinearShape<kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided>> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::WarpCount = GemmShape<Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK> |
using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >::WarpShape = WarpShape_ |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |