35 #if defined(CUTLASS_ARCH_WMMA_ENABLED) 48 #if ((__CUDACC_VER_MAJOR__ > 10) || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ >= 2)) 50 #define CUTLASS_ARCH_MMA_SM75_SUPPORTED 1 52 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750)) 53 #define CUTLASS_ARCH_MMA_SM75_ENABLED 71 gemm::GemmShape<16, 8, 8>,
105 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 107 unsigned const *A =
reinterpret_cast<unsigned const *
>(&a);
108 unsigned const *B =
reinterpret_cast<unsigned const *
>(&b);
109 unsigned const *C =
reinterpret_cast<unsigned const *
>(&c);
110 unsigned *D =
reinterpret_cast<unsigned *
>(&d);
113 "mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0,%1}, {%2,%3}, {%4}, {%5,%6};\n" 114 :
"=r"(D[0]),
"=r"(D[1])
115 :
"r"(A[0]),
"r"(A[1]),
"r"(B[0]),
"r"(C[0]),
"r"(C[1]));
132 gemm::GemmShape<16, 8, 8>,
163 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 165 unsigned const *A =
reinterpret_cast<unsigned const *
>(&a);
166 unsigned const *B =
reinterpret_cast<unsigned const *
>(&b);
167 float const *C =
reinterpret_cast<float const *
>(&c);
168 float *D =
reinterpret_cast<float *
>(&d);
170 asm volatile(
"mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0,%1,%2,%3}, {%4,%5}, {%6}, {%7,%8,%9,%10};\n" 171 :
"=f"(D[0]),
"=f"(D[1]),
"=f"(D[2]),
"=f"(D[3])
173 "r"(A[0]),
"r"(A[1]),
175 "f"(C[0]),
"f"(C[1]),
"f"(C[2]),
"f"(C[3])
193 gemm::GemmShape<8, 8, 16>,
228 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 230 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
231 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
233 int const *C =
reinterpret_cast<int const *
>(&c);
234 int *D =
reinterpret_cast<int *
>(&d);
236 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 237 :
"=r"(D[0]),
"=r"(D[1])
238 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
249 gemm::GemmShape<8, 8, 16>,
284 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 286 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
287 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
289 int const *C =
reinterpret_cast<int const *
>(&c);
290 int *D =
reinterpret_cast<int *
>(&d);
292 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.u8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 293 :
"=r"(D[0]),
"=r"(D[1])
294 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
305 gemm::GemmShape<8, 8, 16>,
340 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 342 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
343 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
345 int const *C =
reinterpret_cast<int const *
>(&c);
346 int *D =
reinterpret_cast<int *
>(&d);
348 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s8.u8 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 349 :
"=r"(D[0]),
"=r"(D[1])
350 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
362 gemm::GemmShape<8, 8, 16>,
397 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 399 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
400 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
402 int const *C =
reinterpret_cast<int const *
>(&c);
403 int *D =
reinterpret_cast<int *
>(&d);
405 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.u8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 406 :
"=r"(D[0]),
"=r"(D[1])
407 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
424 gemm::GemmShape<8,8,16>,
432 OpMultiplyAddSaturate> {
459 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 461 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
462 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
464 int const *C =
reinterpret_cast<int const *
>(&c);
465 int *D =
reinterpret_cast<int *
>(&d);
467 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.s8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 468 :
"=r"(D[0]),
"=r"(D[1])
469 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
480 gemm::GemmShape<8,8,16>,
488 OpMultiplyAddSaturate> {
515 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 517 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
518 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
520 int const *C =
reinterpret_cast<int const *
>(&c);
521 int *D =
reinterpret_cast<int *
>(&d);
523 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.u8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 524 :
"=r"(D[0]),
"=r"(D[1])
525 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
536 gemm::GemmShape<8,8,16>,
544 OpMultiplyAddSaturate> {
571 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 573 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
574 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
576 int const *C =
reinterpret_cast<int const *
>(&c);
577 int *D =
reinterpret_cast<int *
>(&d);
579 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.s8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 580 :
"=r"(D[0]),
"=r"(D[1])
581 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
592 gemm::GemmShape<8,8,16>,
600 OpMultiplyAddSaturate> {
627 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 629 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
630 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
632 int const *C =
reinterpret_cast<int const *
>(&c);
633 int *D =
reinterpret_cast<int *
>(&d);
635 asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.u8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 636 :
"=r"(D[0]),
"=r"(D[1])
637 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
654 gemm::GemmShape<8,8,32>,
689 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 691 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
692 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
694 int const *C =
reinterpret_cast<int const *
>(&c);
695 int *D =
reinterpret_cast<int *
>(&d);
697 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.s4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 698 :
"=r"(D[0]),
"=r"(D[1])
699 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
710 gemm::GemmShape<8,8,32>,
745 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 747 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
748 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
750 int const *C =
reinterpret_cast<int const *
>(&c);
751 int *D =
reinterpret_cast<int *
>(&d);
753 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.u4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 754 :
"=r"(D[0]),
"=r"(D[1])
755 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
766 gemm::GemmShape<8,8,32>,
801 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 803 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
804 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
806 int const *C =
reinterpret_cast<int const *
>(&c);
807 int *D =
reinterpret_cast<int *
>(&d);
809 asm volatile(
"_mma.m8n8k32.row.col.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 810 :
"=r"(D[0]),
"=r"(D[1])
811 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
822 gemm::GemmShape<8,8,32>,
857 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 859 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
860 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
862 int const *C =
reinterpret_cast<int const *
>(&c);
863 int *D =
reinterpret_cast<int *
>(&d);
865 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.u4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 866 :
"=r"(D[0]),
"=r"(D[1])
867 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
884 gemm::GemmShape<8,8,32>,
892 OpMultiplyAddSaturate> {
919 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 921 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
922 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
924 int const *C =
reinterpret_cast<int const *
>(&c);
925 int *D =
reinterpret_cast<int *
>(&d);
927 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.s4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 928 :
"=r"(D[0]),
"=r"(D[1])
929 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
940 gemm::GemmShape<8,8,32>,
948 OpMultiplyAddSaturate> {
975 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 977 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
978 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
980 int const *C =
reinterpret_cast<int const *
>(&c);
981 int *D =
reinterpret_cast<int *
>(&d);
983 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.u4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 984 :
"=r"(D[0]),
"=r"(D[1])
985 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
996 gemm::GemmShape<8,8,32>,
1004 OpMultiplyAddSaturate> {
1031 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 1033 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
1034 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
1036 int const *C =
reinterpret_cast<int const *
>(&c);
1037 int *D =
reinterpret_cast<int *
>(&d);
1039 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 1040 :
"=r"(D[0]),
"=r"(D[1])
1041 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
1052 gemm::GemmShape<8,8,32>,
1060 OpMultiplyAddSaturate> {
1087 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 1089 unsigned const & A =
reinterpret_cast<unsigned const &
>(a);
1090 unsigned const & B =
reinterpret_cast<unsigned const &
>(b);
1092 int const *C =
reinterpret_cast<int const *
>(&c);
1093 int *D =
reinterpret_cast<int *
>(&d);
1095 asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.u4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n" 1096 :
"=r"(D[0]),
"=r"(D[1])
1097 :
"r"(A),
"r"(B),
"r"(C[0]),
"r"(C[1]));
1114 gemm::GemmShape<8,8,128>,
1149 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED) 1151 #if defined(CUTLASS_ARCH_WMMA_ENABLED) 1152 using WmmaFragmentA = nvcuda::wmma::fragment<
1153 nvcuda::wmma::matrix_a,
1157 nvcuda::wmma::experimental::precision::b1,
1158 nvcuda::wmma::row_major>;
1160 using WmmaFragmentB = nvcuda::wmma::fragment<
1161 nvcuda::wmma::matrix_b,
1165 nvcuda::wmma::experimental::precision::b1,
1166 nvcuda::wmma::col_major>;
1168 using WmmaFragmentC = nvcuda::wmma::fragment<
1169 nvcuda::wmma::accumulator,
1175 WmmaFragmentA
const & A =
reinterpret_cast<WmmaFragmentA
const &
>(a);
1176 WmmaFragmentB
const & B =
reinterpret_cast<WmmaFragmentB
const &
>(b);
1178 WmmaFragmentC
const & C =
reinterpret_cast<WmmaFragmentC
const &
>(c);
1179 WmmaFragmentC & D =
reinterpret_cast<WmmaFragmentC &
>(d);
1181 nvcuda::wmma::bmma_sync(D, A, B, C, nvcuda::wmma::experimental::bmmaBitOpXOR,
1182 nvcuda::wmma::experimental::bmmaAccumulateOpPOPC);
1187 #endif // defined(CUTLASS_ARCH_WMMA_ENABLED) cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:794
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:217
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA uint8_t ElementA
Definition: mma_sm75.h:492
integer_subbyte< 4, false > uint4b_t
4-bit Unsigned integer type
Definition: integer_subbyte.h:158
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:734
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:1070
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:211
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:610
Definition: aligned_buffer.h:35
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:269
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:846
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:616
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:500
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:277
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:329
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:676
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:323
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:554
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:382
integer_subbyte< 1, false > uint1b_t
1-bit Unsigned integer type
Definition: integer_subbyte.h:152
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:1016
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:446
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB uint8_t ElementB
Definition: mma_sm75.h:378
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA int8_t ElementA
Definition: mma_sm75.h:205
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA int8_t ElementA
Definition: mma_sm75.h:548
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:267
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:968
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:1024
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:207
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< half_t, 2 > FragmentB
Definition: mma_sm75.h:150
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:319
4-bit signed integer type
Definition: integer_subbyte.h:42
IEEE half-precision floating-point type.
Definition: half.h:126
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:730
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:444
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:564
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:904
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:1142
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:780
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::ElementC int ElementC
Definition: mma_sm75.h:1134
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:842
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:964
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:380
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:560
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB int8_t ElementB
Definition: mma_sm75.h:496
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:912
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB uint8_t ElementB
Definition: mma_sm75.h:608
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1018
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:958
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:612
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB int8_t ElementB
Definition: mma_sm75.h:209
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:333
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA uint8_t ElementA
Definition: mma_sm75.h:261
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:498
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::ElementC float ElementC
Definition: mma_sm75.h:152
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB int8_t ElementB
Definition: mma_sm75.h:440
Templates exposing architecture support for multiply-add operations.
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:550
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB uint8_t ElementB
Definition: mma_sm75.h:321
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA uint8_t ElementA
Definition: mma_sm75.h:374
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:558
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:1014
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:1010
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:156
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:95
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:840
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:508
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:556
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:674
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:273
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:327
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:384
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::Operator OpXorPopc Operator
Definition: mma_sm75.h:1138
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< float, 4 > FragmentC
Definition: mma_sm75.h:154
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:606
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:682
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA int8_t ElementA
Definition: mma_sm75.h:436
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:732
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< half_t, 4 > FragmentC
Definition: mma_sm75.h:93
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:215
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:221
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB uint8_t ElementB
Definition: mma_sm75.h:552
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:438
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:1020
Top-level include for all CUTLASS numeric types.
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:1076
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< half_t, 4 > FragmentA
Definition: mma_sm75.h:146
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:1080
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:728
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB int8_t ElementB
Definition: mma_sm75.h:265
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:502
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA int8_t ElementA
Definition: mma_sm75.h:317
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:954
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1136
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:668
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:1072
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:908
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:724
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentA Array< uint1b_t, 32 > FragmentA
Definition: mma_sm75.h:1128
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:1066
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:962
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:376
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:836
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:844
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:906
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:678
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:620
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:738
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:160
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA uint8_t ElementA
Definition: mma_sm75.h:604
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:325
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:850
Defines layout functions used by TensorRef and derived classes.
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:452
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:448
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:442
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Computes multiply-add.
Definition: mma_sm75.h:390
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:263
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const
Definition: mma_sm75.h:98
Matrix multiply-add operation.
Definition: arch/mma.h:92
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:898
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:902
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:494
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:386
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentB Array< uint1b_t, 32 > FragmentB
Definition: mma_sm75.h:1132
Templates exposing architecture support for warp matrix multiply-add (WMMA) operations.
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< half_t, 2 > FragmentB
Definition: mma_sm75.h:89
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< half_t, 4 > FragmentA
Definition: mma_sm75.h:85
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1074
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:614
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:784
integer_subbyte< 4, true > int4b_t
4-bit Integer type
Definition: integer_subbyte.h:155
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:504
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:672
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:271
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:786
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:788
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:790
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:213
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:960