CUTLASS: cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Defines the optimal thread map for Wmma TensorOp accumulator layouts.

#include <default_thread_map_wmma_tensor_op.h>

Classes
struct	Detail

Public Types
using	ThreadblockShape = ThreadblockShape_

using	WarpShape = WarpShape_

using	InstructionShape = InstructionShape_

using	Element = Element_

using	Type = OutputTileOptimalThreadMap< OutputTileShape< ThreadblockShape::kN, Detail::kTensorOpRows, Detail::WarpCount::kM, 1, 1 >, OutputTileShape< 1, WarpShape::kM/Detail::kTensorOpRows, 1, 1, WarpShape::kM/Detail::kTensorOpRows >, Detail::kThreads, kElementsPerAccess, sizeof_bits< Element >::value >
	ThreadMap to be used by epilogue::PredicatedTileIterator satisfying concept OutputTileThreadMap. More...

Static Public Attributes
static int const	kPartitionsK = PartitionsK

static int const	kElementsPerAccess = ElementsPerAccess

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::Element = Element_

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::InstructionShape = InstructionShape_

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::ThreadblockShape = ThreadblockShape_

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::Type = OutputTileOptimalThreadMap < OutputTileShape<ThreadblockShape::kN, Detail::kTensorOpRows, Detail::WarpCount::kM, 1, 1>, OutputTileShape<1, WarpShape::kM / Detail::kTensorOpRows, 1, 1, WarpShape::kM / Detail::kTensorOpRows>, Detail::kThreads, kElementsPerAccess, sizeof_bits<Element>::value >

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::WarpShape = WarpShape_

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess

static

template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::kPartitionsK = PartitionsK

static

The documentation for this struct was generated from the following file:

default_thread_map_wmma_tensor_op.h