41 template <
typename LayoutA,
typename LayoutB,
typename LayoutC>
43 gemm::GemmShape<1,1,4>,
58 Array<int8_t, 4>
const &a,
59 Array<int8_t, 4>
const &b,
60 Array<int, 1>
const &c
63 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 610)) 65 unsigned const &A =
reinterpret_cast<unsigned const &
>(a);
66 unsigned const &B =
reinterpret_cast<unsigned const &
>(b);
68 asm volatile(
"dp4a.s32.s32 %0, %1, %2, %3;" 70 :
"r"(A),
"r"(B),
"r"(c[0]));
77 for (
int k = 0; k < 4; ++k) {
88 template <
typename LayoutC>
90 gemm::GemmShape<1, 1, 2>,
105 Array<int16_t, 2>
const &a,
106 Array<int16_t, 2>
const &b,
107 Array<int, 1>
const &c
110 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 610)) 112 unsigned const &A =
reinterpret_cast<unsigned const &
>(a);
113 unsigned const &B =
reinterpret_cast<unsigned const &
>(b);
115 asm volatile(
"dp2a.s32.s32 %0, %1, %2, %3;" 117 :
"r"(A),
"r"(B),
"r"(c[0]));
122 for (
int k = 0; k < 2; ++k) {
Definition: aligned_buffer.h:35
cutlass::arch::Mma< gemm::GemmShape< 1, 1, 4 >, 1, int8_t, LayoutA, int8_t, LayoutB, int, LayoutC, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(Array< int, 1 > &d, Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, Array< int, 1 > const &c)
Definition: arch/mma_sm61.h:56
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
Defines layout functions used by TensorRef and derived classes.
Matrix multiply-add operation.
Definition: arch/mma.h:92
cutlass::arch::Mma< gemm::GemmShape< 1, 1, 2 >, 1, int16_t, layout::RowMajor, int16_t, layout::ColumnMajor, int, LayoutC, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(Array< int, 1 > &d, Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, Array< int, 1 > const &c)
Definition: arch/mma_sm61.h:103