Partial specialization: More...

#include <default_mma_core_simt.h>

Public Types
using	Shape = Shape_

using	WarpShape = WarpShape_

using	InstructionShape = GemmShape< 1, 1, 4 >

using	ElementA = int8_t

using	LayoutA = layout::RowMajor

using	ElementB = int8_t

using	LayoutB = layout::ColumnMajor

using	ElementC = ElementC_

using	LayoutC = LayoutC_

using	OperatorClass = arch::OpClassSimt

using	Operator = Operator_
	Default Operator. More...

using	WarpCount = GemmShape< Shape::kM/WarpShape::kM, Shape::kN/WarpShape::kN, PartitionsK >
	Number of warps present. More...

using	SmemLayoutA = layout::ColumnMajorInterleaved< 4 >

using	SmemLayoutB = layout::RowMajorInterleaved< 4 >

using	IteratorThreadMapA = transform::PitchLinear2DThreadTileStripminedThreadMap< layout::PitchLinearShape< Shape::kK, Shape::kM >, kThreads, layout::PitchLinearShape< 4, 4 > >
	ThreadMap of iterator A. More...

using	SmemThreadMapA = transform::TransposePitchLinearThreadMap2DThreadTile< IteratorThreadMapA >
	Transpose the ThreadMap of iterator A. More...

using	SmemIteratorA = transform::threadblock::RegularTileIterator2dThreadTile< MatrixShape< Shape::kM, Shape::kK >, ElementA, SmemLayoutA, 1, SmemThreadMapA >
	Shared memory iterator to A operand. More...

using	IteratorThreadMapB = transform::PitchLinear2DThreadTileStripminedThreadMap< layout::PitchLinearShape< Shape::kK, Shape::kN >, kThreads, layout::PitchLinearShape< 4, 4 > >
	Policy of iterator B. More...

using	SmemThreadMapB = transform::TransposePitchLinearThreadMap2DThreadTile< IteratorThreadMapB >
	Transpose the ThreadMap of iterator A. More...

using	SmemIteratorB = transform::threadblock::RegularTileIterator2dThreadTile< MatrixShape< Shape::kK, Shape::kN >, ElementB, SmemLayoutB, 0, SmemThreadMapB >
	Shared memory iterator to B operand. More...

using	LaneMmaShape = cutlass::gemm::GemmShape< LaneM, LaneN, 4 >

using	Policy = cutlass::gemm::warp::MmaSimtPolicy< cutlass::MatrixShape< WarpNumThreadsM, WarpNumThreadsN >, cutlass::layout::ColumnMajorInterleaved< LaneLayout >, LaneMmaShape >

using	MmaWarpSimt = cutlass::gemm::warp::MmaSimt< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Policy, PartitionsK >

using	MmaPolicy = MmaPolicy< MmaWarpSimt, MatrixShape< kPaddingM, 0 >, MatrixShape< 0, kPaddingN >, WarpCount::kK >
	Policy used to define MmaPipelined. More...

Static Public Attributes
static int const	PartitionsK = Shape::kK / WarpShape::kK

static int const	kWarpSize = warp::WarpSize<arch::OpClassSimt>::value
	Number of threads per warp. More...

static int const	kThreads = WarpCount::kCount * kWarpSize
	Number of threads total. More...

static const int	WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>()

static const int	WarpNumThreadsN = kWarpSize / WarpNumThreadsM

static const int	ThreadTileM = WarpShape::kM / WarpNumThreadsM

static const int	ThreadTileN = WarpShape::kN / WarpNumThreadsN

static const int	LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1

static const int	numElementsA = 128 / sizeof_bits<ElementA>::value

static const int	numElementsB = 128 / sizeof_bits<ElementB>::value

static const int	LaneM = cutlass::const_min(4, ThreadTileM)

static const int	LaneN = cutlass::const_min(4, ThreadTileN)

static int const	kPaddingM = detail::simt_transpose_padding(kWarpSize, Shape::kK, sizeof_bits<ElementA>::value)

static int const	kPaddingN = detail::simt_transpose_padding(kWarpSize, Shape::kK, sizeof_bits<ElementB>::value)

Detailed Description