CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ > Class Template Reference

Epilogue operator without splitk.

#include <epilogue.h>

Inheritance diagram for cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >:
Inheritance graph
[legend]
Collaboration diagram for cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >:
Collaboration graph
[legend]

Public Types

using Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ >
 
using Shape = Shape_
 
using WarpMmaOperator = WarpMmaOperator_
 
using OutputTileIterator = OutputTileIterator_
 
using AccumulatorFragmentIterator = AccumulatorFragmentIterator_
 
using WarpTileIterator = WarpTileIterator_
 
using SharedLoadIterator = SharedLoadIterator_
 
using OutputOp = OutputOp_
 
using Padding = Padding_
 
using Layout = layout::RowMajor
 Output layout is always row-major. More...
 
using LongIndex = typename Layout::LongIndex
 
using AccumulatorTile = typename Base::AccumulatorTile
 The complete warp-level accumulator tile. More...
 
using ElementAccumulator = typename WarpTileIterator::Element
 Accumulator element. More...
 
using ElementOutput = typename OutputTileIterator::Element
 Output element. More...
 
using TensorRef = typename OutputTileIterator::TensorRef
 Tensor reference to destination tensor. More...
 
using SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout >
 Tensor reference to sync tensor. More...
 
using ConstTensorRef = typename OutputTileIterator::ConstTensorRef
 Const tensor reference to source tensor. More...
 
using OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess >
 Array type used to output. More...
 
using AccumulatorAccessType = Array< typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess >
 Array type used by output functor. More...
 
using WarpCount = typename Base::WarpCount
 Number of warps. More...
 
- Public Types inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ >
using Shape = Shape_
 
using WarpMmaOperator = WarpMmaOperator_
 
using AccumulatorFragmentIterator = AccumulatorFragmentIterator_
 
using WarpTileIterator = WarpTileIterator_
 
using Padding = Padding_
 
using Layout = layout::RowMajor
 Output layout is always row-major. More...
 
using AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile
 The complete warp-level accumulator tile. More...
 
using ElementAccumulator = typename AccumulatorTile::Element
 Accumulator element. More...
 
using WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK >
 Number of warps. More...
 

Public Member Functions

CUTLASS_DEVICE Epilogue (typename Base::SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
 Constructor. More...
 
CUTLASS_DEVICE void operator() (OutputOp const &output_op, OutputTileIterator destination_iterator, AccumulatorTile const &accumulators, OutputTileIterator source_iterator)
 Streams the result to global memory. More...
 
- Public Member Functions inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ >
CUTLASS_DEVICE EpilogueBase (SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
 Constructor. More...
 

Static Public Attributes

static int const kPartitionsK = PartitionsK
 
static int const kElementsPerAccess = OutputTileIterator::kElementsPerAccess
 Output access size. More...
 
- Static Public Attributes inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ >
static int const kPartitionsK = PartitionsK
 

Additional Inherited Members

- Protected Attributes inherited from cutlass::epilogue::threadblock::EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ >
SharedStorageshared_storage_
 
WarpTileIterator warp_tile_iterator_
 Stores a warp's fragment of accumulators to SMEM. More...
 

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorAccessType = Array<typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorFragmentIterator = AccumulatorFragmentIterator_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::AccumulatorTile = typename Base::AccumulatorTile
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ConstTensorRef = typename OutputTileIterator::ConstTensorRef
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ElementAccumulator = typename WarpTileIterator::Element
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::ElementOutput = typename OutputTileIterator::Element
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Layout = layout::RowMajor
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::LongIndex = typename Layout::LongIndex
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputOp = OutputOp_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputTileIterator = OutputTileIterator_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Padding = Padding_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Shape = Shape_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::SharedLoadIterator = SharedLoadIterator_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::SyncTensorRef = typename cutlass::TensorRef<int, cutlass::layout::PackedVectorLayout>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::TensorRef = typename OutputTileIterator::TensorRef
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpCount = typename Base::WarpCount
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpMmaOperator = WarpMmaOperator_
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::WarpTileIterator = WarpTileIterator_

Constructor & Destructor Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
CUTLASS_DEVICE cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Epilogue ( typename Base::SharedStorage shared_storage,
int  thread_idx,
int  warp_idx,
int  lane_idx 
)
inline
Parameters
shared_storageShared storage object
thread_idxID of a thread within the threadblock
warp_idxID of warp within threadblock
lane_idxId of thread within warp

Member Function Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::operator() ( OutputOp const &  output_op,
OutputTileIterator  destination_iterator,
AccumulatorTile const &  accumulators,
OutputTileIterator  source_iterator 
)
inline

< Threadblock tile coordinate in GEMM (in units of threadblock tiles)

Parameters
output_opOutput operator
destination_iteratorTile iterator for destination
accumulatorsComplete warp-level accumulator tile

Member Data Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess
static
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kPartitionsK = PartitionsK
static

The documentation for this class was generated from the following file: