40 namespace threadblock {
46 typename ThreadblockShape_,
71 !(ThreadblockShape::kM % WarpShape::kM) &&
72 !(ThreadblockShape::kM % WarpShape::kM),
"Divisibility");
76 ThreadblockShape::kM / WarpShape::kM,
77 ThreadblockShape::kN / WarpShape::kN,
102 template <
typename ThreadblockShape_,
typename WarpShape_,
int PartitionsK,
103 typename Element_,
int ElementsPerAccess,
int InterleavedK>
107 static int const kPartitionsK = PartitionsK;
109 static int const kElementsPerAccess = ElementsPerAccess;
110 static int const kInterleavedK = InterleavedK;
118 static int const kTensorOpRows = 8;
119 static int const kWarpSize = 32;
122 !(ThreadblockShape::kM % WarpShape::kM),
128 ThreadblockShape::kN / WarpShape::kN, kPartitionsK>;
143 WarpShape::kN / InterleavedK>,
Definition: default_thread_map_tensor_op.h:64
Definition: output_tile_thread_map.h:228
static int const kWarpSize
Definition: default_thread_map_tensor_op.h:68
Definition: aligned_buffer.h:35
Tuple defining point in output tile.
Definition: output_tile_thread_map.h:57
Epilogue for threadblock scoped GEMMs using Tensor Ops.
Definition: default_thread_map_tensor_op.h:116
static int const kPartitionsK
Definition: default_thread_map_tensor_op.h:56
Defines common types used for all GEMM-like operators.
Element_ Element
Definition: default_thread_map_tensor_op.h:108
static int const kCount
Definition: include/cutlass/gemm/gemm.h:67
Definition: output_tile_thread_map.h:442
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
static int const kThreads
Number of participating threads.
Definition: default_thread_map_tensor_op.h:82
Defines the optimal thread map for TensorOp accumulator layouts.
Definition: default_thread_map_tensor_op.h:104
Defines the size of an element in bits.
Definition: numeric_types.h:42
Defines the optimal thread map for TensorOp accumulator layouts.
Definition: default_thread_map_tensor_op.h:52
static int const kTensorOpRows
Tensor Operations fundamentally perform operations on 8 rows.
Definition: default_thread_map_tensor_op.h:67
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
ThreadblockShape_ ThreadblockShape
Definition: default_thread_map_tensor_op.h:105
Element_ Element
Definition: default_thread_map_tensor_op.h:57
WarpShape_ WarpShape
Definition: default_thread_map_tensor_op.h:106
WarpShape_ WarpShape
Definition: default_thread_map_tensor_op.h:55
ThreadblockShape_ ThreadblockShape
Definition: default_thread_map_tensor_op.h:54
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.
static int const kElementsPerAccess
Definition: default_thread_map_tensor_op.h:58