#include <default_mma_core_sm75.h>

Public Types
using	Shape = Shape_

using	WarpShape = WarpShape_

using	InstructionShape = InstructionShape_

using	ElementA = ElementA_

using	LayoutA = layout::ColumnMajorInterleaved< InterleavedK >

using	ElementB = ElementB_

using	LayoutB = layout::RowMajorInterleaved< InterleavedK >

using	ElementC = ElementC_

using	LayoutC = LayoutC_

using	OperatorClass = arch::OpClassTensorOp

using	WarpCount = GemmShape< Shape::kM/WarpShape::kM, Shape::kN/WarpShape::kN, Shape::kK/WarpShape::kK >
	Number of warps present. More...

using	Operator = Operator_
	Default Operator. More...

using	SmemLayoutA = layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits< ElementA >::value, kInterleavedK >

using	SmemLayoutB = layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits< ElementB >::value, kInterleavedK >

using	IteratorThreadMapA = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape< Shape::kM *kInterleavedK, Shape::kK/kInterleavedK >, kThreads, layout::PitchLinearShape< 32, 1 >, kElementsPerAccess >
	ThreadMap of iterator A. More...

using	SmemThreadMapA = transform::TransposePitchLinearThreadMap< IteratorThreadMapA, layout::PitchLinearShape< kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided >>
	Transpose the ThreadMap of iterator A. More...

using	SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape< Shape::kM, Shape::kK >, ElementA, SmemLayoutA, 0, SmemThreadMapA >
	Shared memory iterator to A operand. More...

using	IteratorThreadMapB = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape< Shape::kN *kInterleavedK, Shape::kK/kInterleavedK >, kThreads, layout::PitchLinearShape< 32, 1 >, kElementsPerAccess >
	ThreadMap of iterator B. More...

using	SmemThreadMapB = transform::TransposePitchLinearThreadMap< IteratorThreadMapB, layout::PitchLinearShape< kWarpThreadArrangementContiguous, kWarpThreadArrangementStrided >>
	Transpose the ThreadMap of iterator A. More...

using	SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape< Shape::kK, Shape::kN >, ElementB, SmemLayoutB, 1, SmemThreadMapB >
	Shared memory iterator to B operand. More...

using	MmaTensorOp = typename cutlass::gemm::warp::DefaultMmaTensorOp< WarpShape, InstructionShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Operator, WarpCount::kK, AccumulatorsInRowMajor >::Type

using	MmaPolicy = MmaPolicy< MmaTensorOp, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK >
	Policy used to define MmaPipelined. More...

Static Public Attributes
static int const	kInterleavedK = InterleavedK

static int const	kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value
	Number of threads per warp. More...

static int const	kThreads = WarpCount::kCount * kWarpSize
	Number of threads total. More...

static int const	kAccessSizeInBits = 128
	Size of a threadblock-scoped access. More...

static int const	kElementsPerAccess

static int const	kWarpThreadArrangementContiguous

static int const	kWarpThreadArrangementStrided

Detailed Description

template<typename Shape_, typename WarpShape_, typename InstructionShape_, typename ElementA_, typename ElementB_, typename ElementC_, typename LayoutC_, typename Operator_, bool AccumulatorsInRowMajor, int InterleavedK>
struct cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >

Partial specialization:

A: column-major-interleave32 B: row-major-interleave32 Operator: tensor op class

This uses the default warp-level operator given tile sizes