CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <default_epilogue_tensor_op.h>
Public Types | |
using | Shape = Shape_ |
using | WarpMmaTensorOp = WarpMmaTensorOp_ |
using | OutputOp = OutputOp_ |
using | ElementOutput = typename OutputOp::ElementOutput |
using | LayoutC = typename WarpMmaTensorOp::LayoutC |
using | ElementAccumulator = typename WarpMmaTensorOp::ElementC |
using | OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK >::Type |
using | OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK > |
using | AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC > |
using | Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero > |
Static Public Attributes | |
static int const | kPartitionsK = PartitionsK |
static int const | kElementsPerAccess = ElementsPerAccess |
Defines sensible defaults for epilogues for TensorOps which uses intereleaved output layout. For this case, shared memory is not needed.
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC> |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementAccumulator = typename WarpMmaTensorOp::ElementC |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementOutput = typename OutputOp::ElementOutput |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero> |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::LayoutC = typename WarpMmaTensorOp::LayoutC |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputOp = OutputOp_ |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK> |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileThreadMap = typename cutlass::epilogue::threadblock:: DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK>::Type |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Shape = Shape_ |
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::WarpMmaTensorOp = WarpMmaTensorOp_ |
|
static |
|
static |