52 namespace threadblock {
81 ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
99 Shape::kM / WarpShape::kM,
100 Shape::kN / WarpShape::kN,
101 Shape::kK / WarpShape::kK
106 !(Shape::kM % WarpShape::kM) &&
107 !(Shape::kN % WarpShape::kN),
108 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 115 static int const kThreads = WarpCount::kCount * kWarpSize;
118 static int const kAccessSizeInBits = 128;
158 layout::PitchLinearShape<8, 4>,
186 cutlass::arch::OpMultiplyAdd
236 ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
254 Shape::kM / WarpShape::kM,
255 Shape::kN / WarpShape::kN,
256 Shape::kK / WarpShape::kK
261 !(Shape::kM % WarpShape::kM) &&
262 !(Shape::kN % WarpShape::kN),
263 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 270 static int const kThreads = WarpCount::kCount * kWarpSize;
273 static int const kAccessSizeInBits = 128;
311 layout::PitchLinearShape<4, 8>,
339 cutlass::arch::OpMultiplyAdd
391 LayoutC_, arch::OpClassTensorOp, 2, Operator_
409 Shape::kM / WarpShape::kM,
410 Shape::kN / WarpShape::kN,
411 Shape::kK / WarpShape::kK
416 !(Shape::kM % WarpShape::kM) &&
417 !(Shape::kN % WarpShape::kN),
418 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 425 static int const kThreads = WarpCount::kCount * kWarpSize;
428 static int const kAccessSizeInBits = 128;
494 cutlass::arch::OpMultiplyAdd
546 ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_
564 Shape::kM / WarpShape::kM,
565 Shape::kN / WarpShape::kN,
566 Shape::kK / WarpShape::kK
571 !(Shape::kM % WarpShape::kM) &&
572 !(Shape::kN % WarpShape::kN),
573 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 580 static int const kThreads = WarpCount::kCount * kWarpSize;
583 static int const kAccessSizeInBits = 128;
649 cutlass::arch::OpMultiplyAdd
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:247
Template mapping a row-major view of pitch-linear memory to VoltaTensorOpMultiplicandCongruous.
Definition: tensor_op_multiplicand_sm70.h:630
Describes the size of a matrix tile.
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:551
Templates implementing loading of tiles from pitch-linear rank=2 tensors.
Query the number of threads per warp.
Definition: gemm/warp/mma.h:43
Templates implementing warp-level matrix multiply-accumulate operations targeting Tensor Cores...
Definition: default_mma_core.h:90
Templates implementing how threads are mapped to a given tile.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:396
Definition: tensor_op_multiplicand_sm70.h:848
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:398
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_sm70.h:560
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:557
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:243
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:402
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:548
Defines a Shape template for matrix tiles.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_sm70.h:671
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:91
Defines the size of an element in bits.
Definition: numeric_types.h:42
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:400
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_sm70.h:208
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:88
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:83
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_sm70.h:553
Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the glo...
Structure to compute the matrix product targeting CUDA cores and SIMT math instructions.
Definition: mma_tensor_op_sm70.h:77
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_sm70.h:361
Top-level include for all CUTLASS numeric types.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:86
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:84
Policy.
Definition: mma_tensor_op_policy.h:48
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:549
Definition: tensor_op_multiplicand_sm70.h:943
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:556
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_sm70.h:250
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:394
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass arch::OpClassTensorOp OperatorClass
Definition: default_mma_core_sm70.h:92
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_sm70.h:241
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_sm70.h:405
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_sm70.h:516
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_sm70.h:95
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:555
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:90
Matrix multiply-add operation.
Definition: arch/mma.h:92
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:401
Template mapping a column-major view of pitch-linear memory to VoltaTensorOpMultiplicandCongruous.
Definition: tensor_op_multiplicand_sm70.h:191
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_sm70.h:239
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:238
Basic include for CUTLASS.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_sm70.h:393
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_sm70.h:246
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_sm70.h:245