54 namespace threadblock {
61 template<
typename WarpShape>
63 return (WarpShape::kM > WarpShape::kN) ? 8 : 4;
68 return (size_in_bits >= 32 ?
69 threads / crosswise / (size_in_bits / 32) :
70 threads / crosswise * (32 / size_in_bits)
103 ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
115 static int const PartitionsK = Shape::kK / WarpShape::kK;
122 Shape::kM / WarpShape::kM,
123 Shape::kN / WarpShape::kN,
129 !(Shape::kM % WarpShape::kM) &&
130 !(Shape::kN % WarpShape::kN),
131 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 138 static int const kThreads = WarpCount::kCount * kWarpSize;
140 static int const kElementsPerAccess = 1;
190 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
191 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
192 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
193 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
194 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
195 "WarpShape must be divisible by ThreadTile shape.");
196 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
259 ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
264 using ElementA = ElementA_;
266 using ElementB = ElementB_;
271 static int const PartitionsK = Shape::kK / WarpShape::kK;
278 Shape::kM / WarpShape::kM,
279 Shape::kN / WarpShape::kN,
285 !(Shape::kM % WarpShape::kM) &&
286 !(Shape::kN % WarpShape::kN),
287 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 294 static int const kThreads = WarpCount::kCount * kWarpSize;
296 static int const kElementsPerAccess = 1;
321 MatrixShape<Shape::kM, Shape::kK>,
340 MatrixShape<Shape::kK, Shape::kN>,
352 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
353 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
354 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
355 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
356 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
357 "WarpShape must be divisible by ThreadTile shape.");
358 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
425 LayoutC_, arch::OpClassSimt, 2, Operator_
430 using ElementA = ElementA_;
432 using ElementB = ElementB_;
437 static int const PartitionsK = Shape::kK / WarpShape::kK;
444 Shape::kM / WarpShape::kM,
445 Shape::kN / WarpShape::kN,
451 !(Shape::kM % WarpShape::kM) &&
452 !(Shape::kN % WarpShape::kN),
453 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 460 static int const kThreads = WarpCount::kCount * kWarpSize;
462 static int const kElementsPerAccess = 1;
477 layout::PitchLinearShape<Shape::kK, Shape::kM>,
487 MatrixShape<Shape::kM, Shape::kK>,
496 layout::PitchLinearShape<Shape::kN, Shape::kK>,
503 MatrixShape<Shape::kK, Shape::kN>,
515 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
516 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
517 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
518 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
519 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
520 "WarpShape must be divisible by ThreadTile shape.");
521 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
587 ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_
592 using ElementA = ElementA_;
594 using ElementB = ElementB_;
599 static int const PartitionsK = Shape::kK / WarpShape::kK;
606 Shape::kM / WarpShape::kM,
607 Shape::kN / WarpShape::kN,
613 !(Shape::kM % WarpShape::kM) &&
614 !(Shape::kN % WarpShape::kN),
615 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 622 static int const kThreads = WarpCount::kCount * kWarpSize;
624 static int const kElementsPerAccess = 1;
639 layout::PitchLinearShape<Shape::kM, Shape::kK>,
646 MatrixShape<Shape::kM, Shape::kK>,
655 layout::PitchLinearShape<Shape::kK, Shape::kN>,
665 MatrixShape<Shape::kK, Shape::kN>,
677 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
678 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
679 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
680 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
681 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
682 "WarpShape must be divisible by ThreadTile shape.");
683 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
745 LayoutC_, arch::OpClassSimt, 2, Operator_
751 using ElementA = int8_t;
753 using ElementB = int8_t;
758 static int const PartitionsK = Shape::kK / WarpShape::kK;
765 Shape::kM / WarpShape::kM,
766 Shape::kN / WarpShape::kN,
772 !(Shape::kM % WarpShape::kM) &&
773 !(Shape::kN % WarpShape::kN),
774 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 781 static int const kThreads = WarpCount::kCount * kWarpSize;
796 layout::PitchLinearShape<Shape::kM, Shape::kK>,
803 MatrixShape<Shape::kM, Shape::kK>,
813 layout::PitchLinearShape<Shape::kN, Shape::kK>,
820 MatrixShape<Shape::kK, Shape::kN>,
832 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
833 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
834 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
835 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
836 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
837 "WarpShape must be divisible by ThreadTile shape.");
838 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
899 LayoutC_, arch::OpClassSimt, 2, Operator_
905 using ElementA = int8_t;
907 using ElementB = int8_t;
912 static int const PartitionsK = Shape::kK / WarpShape::kK;
919 Shape::kM / WarpShape::kM,
920 Shape::kN / WarpShape::kN,
926 !(Shape::kM % WarpShape::kM) &&
927 !(Shape::kN % WarpShape::kN),
928 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 935 static int const kThreads = WarpCount::kCount * kWarpSize;
950 layout::PitchLinearShape<Shape::kK, Shape::kM>,
960 MatrixShape<Shape::kM, Shape::kK>,
970 layout::PitchLinearShape<Shape::kK, Shape::kN>,
980 MatrixShape<Shape::kK, Shape::kN>,
992 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
993 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
994 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
995 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
996 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
997 "WarpShape must be divisible by ThreadTile shape.");
998 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
1053 typename WarpShape_,
1062 LayoutC_, arch::OpClassSimt, 2, Operator_
1068 using ElementA = int8_t;
1070 using ElementB = int8_t;
1075 static int const PartitionsK = Shape::kK / WarpShape::kK;
1082 Shape::kM / WarpShape::kM,
1083 Shape::kN / WarpShape::kN,
1089 !(Shape::kM % WarpShape::kM) &&
1090 !(Shape::kN % WarpShape::kN),
1091 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 1098 static int const kThreads = WarpCount::kCount * kWarpSize;
1113 layout::PitchLinearShape<Shape::kK, Shape::kM>,
1123 MatrixShape<Shape::kM, Shape::kK>,
1132 layout::PitchLinearShape<Shape::kN, Shape::kK>,
1139 MatrixShape<Shape::kK, Shape::kN>,
1151 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
1152 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
1153 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
1154 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
1155 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
1156 "WarpShape must be divisible by ThreadTile shape.");
1157 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
1212 typename WarpShape_,
1221 LayoutC_, arch::OpClassSimt, 2, Operator_
1227 using ElementA = int8_t;
1229 using ElementB = int8_t;
1234 static int const PartitionsK = Shape::kK / WarpShape::kK;
1241 Shape::kM / WarpShape::kM,
1242 Shape::kN / WarpShape::kN,
1248 !(Shape::kM % WarpShape::kM) &&
1249 !(Shape::kN % WarpShape::kN),
1250 "Threadblock-scoped GEMM should be divisible by warp-scoped GEMM size." 1257 static int const kThreads = WarpCount::kCount * kWarpSize;
1272 layout::PitchLinearShape<Shape::kM, Shape::kK>,
1279 MatrixShape<Shape::kM, Shape::kK>,
1289 layout::PitchLinearShape<Shape::kK, Shape::kN>,
1299 MatrixShape<Shape::kK, Shape::kN>,
1311 static const int WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>();
1312 static const int WarpNumThreadsN = kWarpSize / WarpNumThreadsM;
1313 static const int ThreadTileM = WarpShape::kM / WarpNumThreadsM;
1314 static const int ThreadTileN = WarpShape::kN / WarpNumThreadsN;
1315 static_assert(!(WarpShape::kM % WarpNumThreadsM) && !(WarpShape::kN % WarpNumThreadsN),
1316 "WarpShape must be divisible by ThreadTile shape.");
1317 static const int LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1;
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:756
Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions...
Describes the size of a matrix tile.
Definition: matrix_shape.h:42
Templates implementing loading of tiles from pitch-linear rank=2 tensors.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:1195
Definition: aligned_buffer.h:35
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementA ElementA_ ElementA
Definition: default_mma_core_simt.h:108
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:435
Query the number of threads per warp.
Definition: gemm/warp/mma.h:43
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:755
Definition: default_mma_core.h:90
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:719
Templates implementing how threads are mapped to a given tile.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:1225
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:1232
Structure to compute the matrix product targeting CUDA cores and SIMT math instructions.
Definition: mma_simt.h:74
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:749
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:1237
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:1078
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:1074
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:436
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:113
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:761
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:910
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:1073
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:902
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:114
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:270
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:748
Describes the arrangement and configuration of per-lane operations in warp-level matrix multiply...
Definition: mma_simt_policy.h:46
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:873
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:106
Defines a Shape template for matrix tiles.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:1231
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:105
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:1036
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:262
Defines the size of an element in bits.
Definition: numeric_types.h:42
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:1233
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:597
Defines basic properties needed by CTA-level GEMMs assuming expectations about data layout of the glo...
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:434
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:903
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:118
Top-level include for all CUTLASS numeric types.
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:602
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:428
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:1072
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:598
constexpr int simt_transpose_padding(int threads, int crosswise, int size_in_bits)
Computes padding in shared memory to perform efficient transpose without bank conflicts.
Definition: default_mma_core_simt.h:67
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:1066
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:261
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:589
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:1355
Templates implementing loading of tiles from pitch-linear rank=2 tensors.
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
constexpr int simt_get_warp_threads_m()
Definition: default_mma_core_simt.h:62
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:909
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:112
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:1065
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Used for partial specialization.
Definition: default_mma_core_simt.h:229
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:911
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:596
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:440
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingN, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:395
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:427
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementB ElementB_ ElementB
Definition: default_mma_core_simt.h:110
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC ElementC_ ElementC
Definition: default_mma_core_simt.h:268
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:274
Templates implementing warp-level matrix multiply-accumulate operations.
CUTLASS_HOST_DEVICE constexpr int const_min(int a, int b)
Definition: fast_math.h:219
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator Operator_ Operator
Default Operator.
Definition: default_mma_core_simt.h:915
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, 0 >, WarpCount::kK > MmaPolicy
Policy used to define MmaPipelined.
Definition: default_mma_core_simt.h:557
Basic include for CUTLASS.
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC LayoutC_ LayoutC
Definition: default_mma_core_simt.h:269
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass arch::OpClassSimt OperatorClass
Definition: default_mma_core_simt.h:757
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape Shape_ Shape
Definition: default_mma_core_simt.h:1224
cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape WarpShape_ WarpShape
Definition: default_mma_core_simt.h:590
Definition: layout/matrix.h:237