CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Gemplate that handles conventional layouts for IDP4A.
#include <mma_sm61.h>
Public Types | |
using | Shape = Shape_ |
Size of the Gemm problem - concept: gemm::GemmShape<> More... | |
using | ElementA = int8_t |
Data type of operand A. More... | |
using | LayoutA = layout::RowMajor |
Layout of A matrix (concept: layout::MapFunc) More... | |
using | ElementB = int8_t |
Data type of operand B. More... | |
using | LayoutB = layout::ColumnMajor |
Layout of B matrix (concept: layout::MapFunc) More... | |
using | ElementC = int32_t |
Element type of operand C. More... | |
using | LayoutC = LayoutC_ |
Layout of C matrix (concept: layout::MapFunc) More... | |
using | Operator = arch::OpMultiplyAdd |
Underlying mathematical operator. More... | |
using | FragmentA = Array< ElementA, Shape::kMK > |
A operand storage. More... | |
using | FragmentB = Array< ElementB, Shape::kKN > |
B operand storage. More... | |
using | FragmentC = Array< ElementC, Shape::kMN > |
C operand storage. More... | |
Public Member Functions | |
CUTLASS_HOST_DEVICE void | operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C) |
Computes a matrix product D = A * B + C. More... | |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA = int8_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB = int8_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC = int32_t |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA = Array<ElementA, Shape::kMK> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB = Array<ElementB, Shape::kKN> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC = Array<ElementC, Shape::kMN> |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutA = layout::RowMajor |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutB = layout::ColumnMajor |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC = LayoutC_ |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Operator = arch::OpMultiplyAdd |
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Shape = Shape_ |
|
inline |
Use 1x1x4 IDP4A sequence for bulk of computation