|
using | Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_ > |
|
using | Shape = Shape_ |
|
using | WarpMmaOperator = WarpMmaOperator_ |
|
using | OutputTileIterator = OutputTileIterator_ |
|
using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ |
|
using | WarpTileIterator = WarpTileIterator_ |
|
using | SharedLoadIterator = SharedLoadIterator_ |
|
using | OutputOp = OutputOp_ |
|
using | Padding = Padding_ |
|
using | Layout = layout::RowMajor |
| Output layout is always row-major. More...
|
|
using | LongIndex = typename Layout::LongIndex |
|
using | AccumulatorTile = typename Base::AccumulatorTile |
| The complete warp-level accumulator tile. More...
|
|
using | ElementAccumulator = typename WarpTileIterator::Element |
| Accumulator element. More...
|
|
using | ElementOutput = typename OutputTileIterator::Element |
| Output element. More...
|
|
using | TensorRef = typename OutputTileIterator::TensorRef |
| Tensor reference to destination tensor. More...
|
|
using | SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout > |
| Tensor reference to sync tensor. More...
|
|
using | ConstTensorRef = typename OutputTileIterator::ConstTensorRef |
| Const tensor reference to source tensor. More...
|
|
using | OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess > |
| Array type used to output. More...
|
|
using | AccumulatorAccessType = Array< typename WarpTileIterator::Element, OutputTileIterator::kElementsPerAccess > |
| Array type used by output functor. More...
|
|
using | WarpCount = typename Base::WarpCount |
| Number of warps. More...
|
|
using | Shape = Shape_ |
|
using | WarpMmaOperator = WarpMmaOperator_ |
|
using | AccumulatorFragmentIterator = AccumulatorFragmentIterator_ |
|
using | WarpTileIterator = WarpTileIterator_ |
|
using | Padding = Padding_ |
|
using | Layout = layout::RowMajor |
| Output layout is always row-major. More...
|
|
using | AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile |
| The complete warp-level accumulator tile. More...
|
|
using | ElementAccumulator = typename AccumulatorTile::Element |
| Accumulator element. More...
|
|
using | WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK > |
| Number of warps. More...
|
|
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Base = EpilogueBase< Shape_, WarpMmaOperator_, PartitionsK, AccumulatorFragmentIterator_, WarpTileIterator_, Padding_> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess> |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
using cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::TensorRef = typename OutputTileIterator::TensorRef |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
CUTLASS_DEVICE cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::Epilogue |
( |
typename Base::SharedStorage & |
shared_storage, |
|
|
int |
thread_idx, |
|
|
int |
warp_idx, |
|
|
int |
lane_idx |
|
) |
| |
|
inline |
- Parameters
-
shared_storage | Shared storage object |
thread_idx | ID of a thread within the threadblock |
warp_idx | ID of warp within threadblock |
lane_idx | Id of thread within warp |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::operator() |
( |
OutputOp const & |
output_op, |
|
|
OutputTileIterator |
destination_iterator, |
|
|
AccumulatorTile const & |
accumulators, |
|
|
OutputTileIterator |
source_iterator |
|
) |
| |
|
inline |
< Threadblock tile coordinate in GEMM (in units of threadblock tiles)
- Parameters
-
output_op | Output operator |
destination_iterator | Tile iterator for destination |
accumulators | Complete warp-level accumulator tile |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess |
|
static |
template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename WarpTileIterator_ , typename SharedLoadIterator_ , typename OutputOp_ , typename Padding_ >
int const cutlass::epilogue::threadblock::Epilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, WarpTileIterator_, SharedLoadIterator_, OutputOp_, Padding_ >::kPartitionsK = PartitionsK |
|
static |