Defines the optimal thread map for Wmma TensorOp accumulator layouts.
#include <default_thread_map_wmma_tensor_op.h>
|
using | ThreadblockShape = ThreadblockShape_ |
|
using | WarpShape = WarpShape_ |
|
using | InstructionShape = InstructionShape_ |
|
using | Element = Element_ |
|
using | Type = OutputTileOptimalThreadMap< OutputTileShape< ThreadblockShape::kN, Detail::kTensorOpRows, Detail::WarpCount::kM, 1, 1 >, OutputTileShape< 1, WarpShape::kM/Detail::kTensorOpRows, 1, 1, WarpShape::kM/Detail::kTensorOpRows >, Detail::kThreads, kElementsPerAccess, sizeof_bits< Element >::value > |
| ThreadMap to be used by epilogue::PredicatedTileIterator satisfying concept OutputTileThreadMap. More...
|
|
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultThreadMapWmmaTensorOp< ThreadblockShape_, WarpShape_, InstructionShape_, PartitionsK, Element_, ElementsPerAccess >::Type = OutputTileOptimalThreadMap < OutputTileShape<ThreadblockShape::kN, Detail::kTensorOpRows, Detail::WarpCount::kM, 1, 1>, OutputTileShape<1, WarpShape::kM / Detail::kTensorOpRows, 1, 1, WarpShape::kM / Detail::kTensorOpRows>, Detail::kThreads, kElementsPerAccess, sizeof_bits<Element>::value > |
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess>
The documentation for this struct was generated from the following file: