CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool > Struct Template Reference

Gemplate that handles conventional layouts for IDP4A.

#include <mma_sm61.h>

Public Types

using Shape = Shape_
 Size of the Gemm problem - concept: gemm::GemmShape<> More...
 
using ElementA = int8_t
 Data type of operand A. More...
 
using LayoutA = layout::RowMajor
 Layout of A matrix (concept: layout::MapFunc) More...
 
using ElementB = int8_t
 Data type of operand B. More...
 
using LayoutB = layout::ColumnMajor
 Layout of B matrix (concept: layout::MapFunc) More...
 
using ElementC = int32_t
 Element type of operand C. More...
 
using LayoutC = LayoutC_
 Layout of C matrix (concept: layout::MapFunc) More...
 
using Operator = arch::OpMultiplyAdd
 Underlying mathematical operator. More...
 
using FragmentA = Array< ElementA, Shape::kMK >
 A operand storage. More...
 
using FragmentB = Array< ElementB, Shape::kKN >
 B operand storage. More...
 
using FragmentC = Array< ElementC, Shape::kMN >
 C operand storage. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE void operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
 Computes a matrix product D = A * B + C. More...
 

Member Typedef Documentation

template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA = int8_t
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB = int8_t
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC = int32_t
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA = Array<ElementA, Shape::kMK>
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB = Array<ElementB, Shape::kKN>
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC = Array<ElementC, Shape::kMN>
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutA = layout::RowMajor
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutB = layout::ColumnMajor
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC = LayoutC_
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Operator = arch::OpMultiplyAdd
template<typename Shape_ , typename LayoutC_ >
using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::Shape = Shape_

Member Function Documentation

template<typename Shape_ , typename LayoutC_ >
CUTLASS_HOST_DEVICE void cutlass::gemm::thread::Mma< Shape_, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, bool >::operator() ( FragmentC D,
FragmentA const &  A,
FragmentB const &  B,
FragmentC const &  C 
)
inline

Use 1x1x4 IDP4A sequence for bulk of computation


The documentation for this struct was generated from the following file: