Epilogue operator without splitk.
#include <interleaved_epilogue.h>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
CUTLASS_DEVICE cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::InterleavedEpilogue |
( |
SharedStorage & |
shared_storage, |
|
|
int |
thread_idx, |
|
|
int |
warp_idx, |
|
|
int |
lane_idx |
|
) |
| |
|
inline |
- Parameters
-
shared_storage | Shared storage object |
thread_idx | ID of a thread within the threadblock |
warp_idx | ID of warp within threadblock |
lane_idx | Id of thread within warp |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::operator() |
( |
OutputOp const & |
output_op, |
|
|
OutputTileIterator |
destination_iterator, |
|
|
AccumulatorTile const & |
accumulators, |
|
|
OutputTileIterator |
source_iterator |
|
) |
| |
|
inline |
< Threadblock tile coordinate in GEMM (in units of threadblock tiles)
- Parameters
-
output_op | Output operator |
destination_iterator | Tile iterator for destination |
accumulators | Complete warp-level accumulator tile |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>
The documentation for this class was generated from the following file: