CUTLASS: cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Defines the optimal thread map for TensorOp accumulator layouts.

#include <default_thread_map_volta_tensor_op.h>

Classes
struct	Detail

Public Types
using	ThreadblockShape = ThreadblockShape_

using	WarpShape = WarpShape_

using	ElementOutput = ElementOutput_

using	ElementAccumulator = half_t

using	Type = OutputTileOptimalThreadMap< typename Detail::Shape, typename Detail::Count, Detail::kThreads, kElementsPerAccess, sizeof_bits< ElementOutput >::value >
	ThreadMap to be used by epilogue::PredicatedTileIterator satisfying concept OutputTileThreadMap. More...

Static Public Attributes
static int const	kPartitionsK = PartitionsK

static int const	kElementsPerAccess = ElementsPerAccess

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::ElementAccumulator = half_t

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::ElementOutput = ElementOutput_

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::ThreadblockShape = ThreadblockShape_

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::Type = OutputTileOptimalThreadMap < typename Detail::Shape, typename Detail::Count, Detail::kThreads, kElementsPerAccess, sizeof_bits<ElementOutput>::value >

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::WarpShape = WarpShape_

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::kElementsPerAccess = ElementsPerAccess

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, half_t >::kPartitionsK = PartitionsK

static

The documentation for this struct was generated from the following file:

default_thread_map_volta_tensor_op.h