CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Detail Struct Reference

#include <default_thread_map_tensor_op.h>

Public Types

using WarpCount = gemm::GemmShape< ThreadblockShape::kM/WarpShape::kM, ThreadblockShape::kN/WarpShape::kN, kPartitionsK >
 Number of warps. More...
 

Static Public Attributes

static int const kTensorOpRows = 8
 Tensor Operations fundamentally perform operations on 8 rows. More...
 
static int const kWarpSize = 32
 
static int const kThreads = WarpCount::kCount * kWarpSize
 Number of participating threads. More...
 

Member Typedef Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
using cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Detail::WarpCount = gemm::GemmShape<ThreadblockShape::kM / WarpShape::kM, ThreadblockShape::kN / WarpShape::kN, kPartitionsK>

Member Data Documentation

template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Detail::kTensorOpRows = 8
static
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Detail::kThreads = WarpCount::kCount * kWarpSize
static
template<typename ThreadblockShape_ , typename WarpShape_ , int PartitionsK, typename Element_ , int ElementsPerAccess, int InterleavedK>
int const cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< ThreadblockShape_, WarpShape_, PartitionsK, Element_, ElementsPerAccess, InterleavedK >::Detail::kWarpSize = 32
static

The documentation for this struct was generated from the following file: