CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Gemplate that handles conventional layouts for IDP4A.
#include <mma_sm61.h>
Public Types | |
using | Shape = Shape_ |
Size of the Gemm problem - concept: gemm::GemmShape<> More... | |
using | ElementA = int8_t |
Data type of operand A. More... | |
using | LayoutA = layout::ColumnMajor |
Layout of A matrix (concept: layout::MapFunc) More... | |
using | ElementB = int8_t |
Data type of operand B. More... | |
using | LayoutB = layout::RowMajor |
Layout of B matrix (concept: layout::MapFunc) More... | |
using | ElementC = int32_t |
Element type of operand C. More... | |
using | LayoutC = LayoutC_ |
Layout of C matrix (concept: layout::MapFunc) More... | |
using | Operator = arch::OpMultiplyAdd |
Underlying mathematical operator. More... | |
using | FragmentA = Array< ElementA, Shape::kMK > |
A operand storage. More... | |
using | FragmentB = Array< ElementB, Shape::kKN > |
B operand storage. More... | |
using | FragmentC = Array< ElementC, Shape::kMN > |
C operand storage. More... | |
Public Member Functions | |
CUTLASS_HOST_DEVICE void | operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C) |
Computes a matrix product D = A * B + C. More... | |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementA = int8_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementB = int8_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementC = int32_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentA = Array<ElementA, Shape::kMK> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentB = Array<ElementB, Shape::kKN> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentC = Array<ElementC, Shape::kMN> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutA = layout::ColumnMajor |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutB = layout::RowMajor |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutC = LayoutC_ |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Operator = arch::OpMultiplyAdd |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Shape = Shape_ |
|
inline |
Use 1x1x4 IDP4A sequence for bulk of computation