54 namespace threadblock {
61 typename WarpMmaOperator_,
63 typename AccumulatorFragmentIterator_,
64 typename WarpTileIterator_,
89 Shape::kM / WarpMmaOperator::Shape::kM,
90 Shape::kN / WarpMmaOperator::Shape::kN,
104 using Element =
typename WarpTileIterator::Element;
110 using Layout =
typename WarpTileIterator::Layout;
137 return storage.
data();
150 if (threadIdx.x == 0) {
188 shared_storage_(shared_storage),
189 warp_tile_iterator_(shared_storage.
reference(), lane_idx) {
204 warp_tile_iterator_.add_tile_offset(warp_offset);
static int const kM
Definition: include/cutlass/gemm/gemm.h:58
Describes the size of a matrix tile.
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
CUTLASS_DEVICE void debug_print()
Definition: epilogue_base.h:149
static int const kColumn
columns of a matrix
Definition: matrix_shape.h:44
WarpTileIterator warp_tile_iterator_
Stores a warp's fragment of accumulators to SMEM.
Definition: epilogue_base.h:176
SharedStorage & shared_storage_
Definition: epilogue_base.h:173
Templates implementing how threads are mapped to a given tile.
WarpMmaOperator_ WarpMmaOperator
Definition: epilogue_base.h:71
Shared storage allocation needed by the epilogue.
Definition: epilogue_base.h:97
CUTLASS_DEVICE Element * data()
Returns a pointer to the shared memory buffer.
Definition: epilogue_base.h:136
Defines common types used for all GEMM-like operators.
typename AccumulatorTile::Element ElementAccumulator
Accumulator element.
Definition: epilogue_base.h:84
typename WarpTileIterator::TensorRef TensorRef
Tensor reference to shared memory allocation.
Definition: epilogue_base.h:107
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
static int const kK
Definition: include/cutlass/gemm/gemm.h:60
Defines layout functions used by TensorRef and derived classes for common 4-D and 5-D tensor formats...
Defines a Shape template for matrix tiles.
static int const kPartitionsK
Definition: epilogue_base.h:72
typename WarpTileIterator::Element Element
Element type of shared memory.
Definition: epilogue_base.h:104
Defines a canonical coordinate for rank=4 tensors offering named indices.
AlignedBuffer< Element, StorageShape::kCount > storage
Definition: epilogue_base.h:128
static int const kRow
rows of a matrix
Definition: matrix_shape.h:43
AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared me...
Top-level include for all CUTLASS numeric types.
Modifies semantics of cutlass::Array<> to provide guaranteed alignment.
Definition: aligned_buffer.h:45
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
CUTLASS_HOST_DEVICE pointer data()
Definition: aligned_buffer.h:84
typename WarpTileIterator::Layout Layout
Layout of shared memory allocation.
Definition: epilogue_base.h:110
AccumulatorFragmentIterator_ AccumulatorFragmentIterator
Definition: epilogue_base.h:73
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
Defines layout functions used for rank=1 vectors.
Shape_ Shape
Definition: epilogue_base.h:70
Base class for epilogues defining warp-level.
Definition: epilogue_base.h:67
static CUTLASS_HOST_DEVICE RowMajor packed(MatrixCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: layout/matrix.h:93
Padding_ Padding
Definition: epilogue_base.h:75
CUTLASS_DEVICE EpilogueBase(SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
Constructor.
Definition: epilogue_base.h:182
WarpTileIterator_ WarpTileIterator
Definition: epilogue_base.h:74
typename AccumulatorFragmentIterator::AccumulatorTile AccumulatorTile
The complete warp-level accumulator tile.
Definition: epilogue_base.h:81
Basic include for CUTLASS.
Definition: matrix_coord.h:39
CUTLASS_DEVICE TensorRef reference()
Returns a tensor reference to the shared memory buffer.
Definition: epilogue_base.h:142
static int const kN
Definition: include/cutlass/gemm/gemm.h:59