Defines the optimal thread map for TensorOp accumulator layouts.
#include <default_thread_map_volta_tensor_op.h>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
The documentation for this struct was generated from the following file: