CUTLASS: cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Gemplate that handles conventional layouts for IDP4A.

#include <mma_sm61.h>

Public Types
using	Shape = Shape_
	Size of the Gemm problem - concept: gemm::GemmShape<> More...

using	ElementA = int8_t
	Data type of operand A. More...

using	LayoutA = layout::ColumnMajor
	Layout of A matrix (concept: layout::MapFunc) More...

using	ElementB = int8_t
	Data type of operand B. More...

using	LayoutB = layout::RowMajor
	Layout of B matrix (concept: layout::MapFunc) More...

using	ElementC = int32_t
	Element type of operand C. More...

using	LayoutC = LayoutC_
	Layout of C matrix (concept: layout::MapFunc) More...

using	Operator = arch::OpMultiplyAdd
	Underlying mathematical operator. More...

using	FragmentA = Array< ElementA, Shape::kMK >
	A operand storage. More...

using	FragmentB = Array< ElementB, Shape::kKN >
	B operand storage. More...

using	FragmentC = Array< ElementC, Shape::kMN >
	C operand storage. More...

Public Member Functions
CUTLASS_HOST_DEVICE void	operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
	Computes a matrix product D = A * B + C. More...

Member Typedef Documentation

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementA = int8_t

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementB = int8_t

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::ElementC = int32_t

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentA = Array<ElementA, Shape::kMK>

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentB = Array<ElementB, Shape::kKN>

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::FragmentC = Array<ElementC, Shape::kMN>

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutA = layout::ColumnMajor

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutB = layout::RowMajor

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::LayoutC = LayoutC_

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Operator = arch::OpMultiplyAdd

template<typename Shape_ , typename LayoutC_ >

using cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::Shape = Shape_

Member Function Documentation

template<typename Shape_ , typename LayoutC_ >

CUTLASS_HOST_DEVICE void cutlass::gemm::thread::Mma< Shape_, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, int32_t, LayoutC_, arch::OpMultiplyAdd, int8_t >::operator()	(	FragmentC &	D,
		FragmentA const &	A,
		FragmentB const &	B,
		FragmentC const &	C
	)

inline

Use 1x1x4 IDP4A sequence for bulk of computation

The documentation for this struct was generated from the following file:

gemm/thread/mma_sm61.h