59 #if defined(CUTLASS_ARCH_WMMA_SM72_ENABLED) 61 using ElementA = int8_t;
62 using LayoutA = LayoutA_;
63 using ElementB = int8_t;
64 using LayoutB = LayoutB_;
65 using ElementC = int32_t;
66 using LayoutC = LayoutC_;
67 using Operator = cutlass::arch::OpMultiplyAdd;
74 "Supported list of wmma operator shape for s8 multiplicands are: 16x16x16, 8x328x16, and 32x8x16");
78 using FragmentA = nvcuda::wmma::fragment<
79 nvcuda::wmma::matrix_a,
83 typename CutlassToWmmaDataType<ElementA>::Type,
84 typename CutlassToWmmaLayout<LayoutA>::Layout>;
86 using FragmentB = nvcuda::wmma::fragment<
87 nvcuda::wmma::matrix_b,
91 typename CutlassToWmmaDataType<ElementB>::Type,
92 typename CutlassToWmmaLayout<LayoutB>::Layout>;
94 using FragmentC = nvcuda::wmma::fragment<
95 nvcuda::wmma::accumulator,
99 typename CutlassToWmmaDataType<ElementC>::Type>;
107 FragmentC
const &C)
const {
109 nvcuda::wmma::mma_sync(D, A, B, C);
113 static_assert(
false,
"wmma.mma.sync interger type multiplicands is avialable only for SM72 and beyond");
139 #if defined(CUTLASS_ARCH_WMMA_SM72_ENABLED) 140 using Shape = Shape_;
141 using ElementA = uint8_t;
142 using LayoutA = LayoutA_;
143 using ElementB = uint8_t;
144 using LayoutB = LayoutB_;
145 using ElementC = int32_t;
146 using LayoutC = LayoutC_;
147 using Operator = cutlass::arch::OpMultiplyAdd;
154 "Supported list of wmma operator shape for u8 multiplicands are: 16x16x16, 8x328x16, and 32x8x16");
157 using FragmentA = nvcuda::wmma::fragment<
158 nvcuda::wmma::matrix_a,
162 typename CutlassToWmmaDataType<ElementA>::Type,
163 typename CutlassToWmmaLayout<LayoutA>::Layout>;
165 using FragmentB = nvcuda::wmma::fragment<
166 nvcuda::wmma::matrix_b,
170 typename CutlassToWmmaDataType<ElementB>::Type,
171 typename CutlassToWmmaLayout<LayoutB>::Layout>;
173 using FragmentC = nvcuda::wmma::fragment<
174 nvcuda::wmma::accumulator,
178 typename CutlassToWmmaDataType<ElementC>::Type>;
186 FragmentC
const &C)
const {
188 nvcuda::wmma::mma_sync(D, A, B, C);
192 static_assert(
false,
"wmma.mma.sync interger type multiplicands is avialable only for SM72 and beyond");
Definition: aligned_buffer.h:35
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
Defines layout functions used by TensorRef and derived classes.