CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail Struct Reference

#include <default_thread_map_volta_tensor_op.h>

Public Types

using WarpCount = gemm::GemmShape< ThreadblockShape::kM/WarpShape::kM, ThreadblockShape::kN/WarpShape::kN, kPartitionsK >
 Number of warps. More...
 
using Shape = cutlass::epilogue::threadblock::OutputTileShape< ThreadblockShape::kN, 4, 4, WarpCount::kM, 1 >
 
using Count = cutlass::epilogue::threadblock::OutputTileShape< 1, 2, kInterleavedTilesM, 1, WarpShape::kM/kTensorOpRows >
 Number of iterations per subspace. More...
 

Static Public Attributes

static int const kTensorOpRows = 16
 
static int const kWarpSize = 32
 
static int const kInterleavedTilesM = WarpShape::kM / 32
 
static int const kThreads = WarpCount::kCount * kWarpSize
 Number of participating threads. More...
 

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::Count = cutlass::epilogue::threadblock::OutputTileShape< 1, 2, kInterleavedTilesM, 1, WarpShape::kM / kTensorOpRows >
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::Shape = cutlass::epilogue::threadblock::OutputTileShape< ThreadblockShape::kN, 4, 4, WarpCount::kM, 1 >
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::WarpCount = gemm::GemmShape< ThreadblockShape::kM / WarpShape::kM, ThreadblockShape::kN / WarpShape::kN, kPartitionsK >

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kInterleavedTilesM = WarpShape::kM / 32
static
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kTensorOpRows = 16
static
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kThreads = WarpCount::kCount * kWarpSize
static
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename ElementOutput_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, ElementOutput_, ElementsPerAccess, float >::Detail::kWarpSize = 32
static

The documentation for this struct was generated from the following file: