CUTLASS: cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

#include <default_mma_core_sm50.h>

Public Types
using	Shape = Shape_

using	WarpShape = WarpShape_

using	InstructionShape = InstructionShape_

using	ElementA = ElementA_

using	LayoutA = layout::ColumnMajor

using	ElementB = ElementB_

using	LayoutB = layout::RowMajor

using	ElementC = ElementC_

using	LayoutC = LayoutC_

using	OperatorClass = arch::OpClassSimt

using	WarpCount = GemmShape< Shape::kM/WarpShape::kM, Shape::kN/WarpShape::kN, Shape::kK/WarpShape::kK >
	Number of warps present. More...

using	SmemLayoutA = layout::ColumnMajor
	Shared memory layout for A operand. More...

using	SmemLayoutB = layout::RowMajor
	Shared memory layout for B operand. More...

using	IteratorThreadMapA = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape< Shape::kM, Shape::kK >, kThreads, 1 >
	ThreadMap of iterator A. More...

using	SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape< Shape::kM, Shape::kK >, ElementA, SmemLayoutA, 1, IteratorThreadMapA >
	Shared memory iterator to A operand. More...

using	IteratorThreadMapB = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape< Shape::kN, Shape::kK >, kThreads, 1 >
	ThreadMap of iterator B. More...

using	SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape< Shape::kK, Shape::kN >, ElementB, SmemLayoutB, 0, IteratorThreadMapB >
	Shared memory iterator to B operand. More...

using	WarpMma = cutlass::gemm::warp::MmaSimt< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, warp::MmaSimtPolicy< MatrixShape< 4, 8 >, layout::RowMajorInterleaved< 2 >, GemmShape< 128/sizeof_bits< ElementA >::value, 128/sizeof_bits< ElementB >::value, 1 > > > >

using	MmaPolicy = MmaPolicy< WarpMma, MatrixShape< 0, 0 >, MatrixShape< 0, 0 >, WarpCount::kK >
	Policy used to define MmaPipelined. More...

Static Public Attributes
static int const	kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value
	Number of threads per warp. More...

static int const	kThreads = WarpCount::kCount * kWarpSize
	Number of threads total. More...

Detailed Description

template<typename Shape_, typename WarpShape_, typename ElementA_, typename ElementB_, typename ElementC_, typename LayoutC_, typename Operator_>
struct cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >

Partial specialization:

A: column-major B: row-major InstructionShape: 1-by-1-by-1 Operator: SIMT

This uses the default warp-level operator given tile sizes

Member Typedef Documentation

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementA = ElementA_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementB = ElementB_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::ElementC = ElementC_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::InstructionShape = InstructionShape_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::IteratorThreadMapA = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape<Shape::kM, Shape::kK>, kThreads, 1 >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::IteratorThreadMapB = transform::PitchLinearStripminedThreadMap< layout::PitchLinearShape<Shape::kN, Shape::kK>, kThreads, 1 >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutA = layout::ColumnMajor

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutB = layout::RowMajor

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::LayoutC = LayoutC_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::MmaPolicy = MmaPolicy< WarpMma, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::OperatorClass = arch::OpClassSimt

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::Shape = Shape_

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 1, IteratorThreadMapA >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 0, IteratorThreadMapB >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemLayoutA = layout::ColumnMajor

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::SmemLayoutB = layout::RowMajor

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpCount = GemmShape< Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpMma = cutlass::gemm::warp::MmaSimt< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, warp::MmaSimtPolicy< MatrixShape<4, 8>, layout::RowMajorInterleaved<2>, GemmShape< 128 / sizeof_bits<ElementA>::value, 128 / sizeof_bits<ElementB>::value, 1> > > >

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::WarpShape = WarpShape_

Member Data Documentation

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

int const cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::kThreads = WarpCount::kCount * kWarpSize

static

template<typename Shape_ , typename WarpShape_ , typename ElementA_ , typename ElementB_ , typename ElementC_ , typename LayoutC_ , typename Operator_ >

int const cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >::kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value

static

The documentation for this struct was generated from the following file:

default_mma_core_sm50.h