Defines the optimal thread map for TensorOp accumulator layouts.
#include <default_thread_map_tensor_op.h>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Type = InterleavedOutputTileThreadMap< layout::PitchLinearShape<Detail::WarpCount::kM, Detail::WarpCount::kN>, layout::PitchLinearShape<WarpShape::kM / Detail::kTensorOpRows, WarpShape::kN / InterleavedK>, Detail::kThreads, kElementsPerAccess, sizeof_bits<Element>::value> |
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
The documentation for this struct was generated from the following file: