CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Matrix multiply-add operation specialized for the entire warp.
#include <mma_sm70.h>
Public Types | |
using | Shape = gemm::GemmShape< 16, 16, 4 > |
using cutlass::arch::Mma< gemm::GemmShape< 16, 16, 4 >, 32, half_t, LayoutA, half_t, LayoutB, ElementC, LayoutC, Operator >::Shape = gemm::GemmShape<16, 16, 4> |