#include <default_thread_map_volta_tensor_op.h>
|
using | WarpCount = gemm::GemmShape< ThreadblockShape::kM/WarpShape::kM, ThreadblockShape::kN/WarpShape::kN, kPartitionsK > |
| Number of warps. More...
|
|
using | Shape = cutlass::epilogue::threadblock::OutputTileShape< ThreadblockShape::kN, 4, 4, WarpCount::kM, 1 > |
|
using | Count = cutlass::epilogue::threadblock::OutputTileShape< 1, 2, kInterleavedTilesM, 1, WarpShape::kM/kTensorOpRows > |
| Number of iterations per subspace. More...
|
|
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
The documentation for this struct was generated from the following file: