Public Types
using	WarpCount = gemm::GemmShape< ThreadblockShape::kM/WarpShape::kM, ThreadblockShape::kN/WarpShape::kN, kPartitionsK >
	Number of warps. More...

using	Shape = cutlass::epilogue::threadblock::OutputTileShape< ThreadblockShape::kN, 4, 4, WarpCount::kM, 1 >

using	Count = cutlass::epilogue::threadblock::OutputTileShape< 1, 2, kInterleavedTilesM, 1, WarpShape::kM/kTensorOpRows >
	Number of iterations per subspace. More...

Static Public Attributes
static int const	kTensorOpRows = 16

static int const	kWarpSize = 32

static int const	kInterleavedTilesM = WarpShape::kM / 32

static int const	kThreads = WarpCount::kCount * kWarpSize
	Number of participating threads. More...

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::Shape = cutlass::epilogue::threadblock::OutputTileShape< ThreadblockShape::kN, 4, 4, WarpCount::kM, 1 >

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::WarpCount = gemm::GemmShape< ThreadblockShape::kM / WarpShape::kM, ThreadblockShape::kN / WarpShape::kN, kPartitionsK >

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kInterleavedTilesM = WarpShape::kM / 32

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kTensorOpRows = 16

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kThreads = WarpCount::kCount * kWarpSize

static

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>

int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kWarpSize = 32

static

The documentation for this struct was generated from the following file: