CUTLASS: cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Defines the optimal thread map for TensorOp accumulator layouts.

#include <default_thread_map_tensor_op.h>

Classes
struct	Detail

Public Types
using	ThreadblockShape = ThreadblockShape_

using	WarpShape = WarpShape_

using	Element = Element_

using	Type = InterleavedOutputTileThreadMap< layout::PitchLinearShape< Detail::WarpCount::kM, Detail::WarpCount::kN >, layout::PitchLinearShape< WarpShape::kM/Detail::kTensorOpRows, WarpShape::kN/InterleavedK >, Detail::kThreads, kElementsPerAccess, sizeof_bits< Element >::value >

Static Public Attributes
static int const	kPartitionsK = PartitionsK

static int const	kElementsPerAccess = ElementsPerAccess

static int const	kInterleavedK = InterleavedK

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Element = Element_

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::ThreadblockShape = ThreadblockShape_

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Type = InterleavedOutputTileThreadMap< layout::PitchLinearShape<Detail::WarpCount::kM, Detail::WarpCount::kN>, layout::PitchLinearShape<WarpShape::kM / Detail::kTensorOpRows, WarpShape::kN / InterleavedK>, Detail::kThreads, kElementsPerAccess, sizeof_bits<Element>::value>

ThreadMap to be used by epilogue::PredicatedTileIterator satisfying concept InterleavedOutputTileThreadMap

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::WarpShape = WarpShape_

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::kElementsPerAccess = ElementsPerAccess

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::kInterleavedK = InterleavedK

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>

int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::kPartitionsK = PartitionsK

static

The documentation for this struct was generated from the following file:

default_thread_map_tensor_op.h