CUTLASS: cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

#include <default_gemv_core.h>

Public Types
using	Shape = Shape_

using	ThreadShape = ThreadShape_

using	LayoutA = LayoutA_

using	LayoutB = LayoutB_

using	LayoutC = LayoutC_

using	ElementA = ElementA_

using	ElementB = ElementB_

using	ElementC = ElementC_

using	IteratorPolicyA = typename platform::conditional< platform::is_same< LayoutA, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kK, Shape::kM >, 1, ThreadShape::kK >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kK >, 1, ThreadShape::kM >>::type

using	IteratorA = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape< Shape::kM, Shape::kK >, ElementA, LayoutA, 1, IteratorPolicyA >

using	IteratorPolicyB = typename platform::conditional< platform::is_same< LayoutB, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kK >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kK, Shape::kN >, kThreadsPerN, ThreadShape::kK >>::type

using	IteratorB = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape< Shape::kK, Shape::kN >, ElementB, LayoutB, 0, IteratorPolicyB >

using	IteratorPolicyC = typename platform::conditional< platform::is_same< LayoutC, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kM >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kN >, kThreadsPerN, ThreadShape::kM >>::type

using	IteratorC = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape< Shape::kM, Shape::kN >, ElementC, LayoutC, 0, IteratorPolicyC >

using	MmaSimtOp = typename cutlass::gemm::thread::Mma< cutlass::gemm::GemmShape< ThreadShape::kM, ThreadShape::kN, Shape::kK >, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC >

using	Operator = MmaSimtOp

Static Public Attributes
static int const	kThreadsPerN = Shape::kN / ThreadShape::kN

Detailed Description

template<typename Shape_, typename ThreadShape_, typename ElementA_, typename LayoutA_, typename ElementB_, typename LayoutB_, typename ElementC_, typename LayoutC_>
struct cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >

Template defininng default vector-matrix multiply operators inferred from threadblock tile size, global memory data layout.

Member Typedef Documentation

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::ElementA = ElementA_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::ElementB = ElementB_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::ElementC = ElementC_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorA = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape<Shape::kM, Shape::kK>, ElementA, LayoutA, 1, IteratorPolicyA>

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorB = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape<Shape::kK, Shape::kN>, ElementB, LayoutB, 0, IteratorPolicyB>

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorC = cutlass::transform::threadblock::PredicatedTileIterator< cutlass::MatrixShape<Shape::kM, Shape::kN>, ElementC, LayoutC, 0, IteratorPolicyC>

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorPolicyA = typename platform::conditional< platform::is_same<LayoutA, layout::RowMajor>::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape<Shape::kK, Shape::kM>, 1, ThreadShape::kK>, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape<Shape::kM, Shape::kK>, 1, ThreadShape::kM>>::type

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorPolicyB = typename platform::conditional< platform::is_same<LayoutB, layout::RowMajor>::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape<Shape::kN, Shape::kK>, kThreadsPerN, ThreadShape::kN>, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape<Shape::kK, Shape::kN>, kThreadsPerN, ThreadShape::kK>>::type

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::IteratorPolicyC = typename platform::conditional< platform::is_same<LayoutC, layout::RowMajor>::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape<Shape::kN, Shape::kM>, kThreadsPerN, ThreadShape::kN>, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape<Shape::kM, Shape::kN>, kThreadsPerN, ThreadShape::kM>>::type

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::LayoutA = LayoutA_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::LayoutB = LayoutB_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::LayoutC = LayoutC_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::MmaSimtOp = typename cutlass::gemm::thread::Mma< cutlass::gemm::GemmShape<ThreadShape::kM, ThreadShape::kN, Shape::kK>, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC>

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::Operator = MmaSimtOp

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::Shape = Shape_

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

using cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::ThreadShape = ThreadShape_

Member Data Documentation

template<typename Shape_ , typename ThreadShape_ , typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename LayoutC_ >

int const cutlass::gemm::threadblock::DefaultGemvCore< Shape_, ThreadShape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_ >::kThreadsPerN = Shape::kN / ThreadShape::kN

static

The documentation for this struct was generated from the following file:

default_gemv_core.h