CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK > Struct Template Reference

#include <default_epilogue_tensor_op.h>

Public Types

using Shape = Shape_
 
using WarpMmaTensorOp = WarpMmaTensorOp_
 
using OutputOp = OutputOp_
 
using ElementOutput = typename OutputOp::ElementOutput
 
using LayoutC = typename WarpMmaTensorOp::LayoutC
 
using ElementAccumulator = typename WarpMmaTensorOp::ElementC
 
using OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK >::Type
 
using OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK >
 
using AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC >
 
using Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero >
 

Static Public Attributes

static int const kPartitionsK = PartitionsK
 
static int const kElementsPerAccess = ElementsPerAccess
 

Detailed Description

template<typename Shape_, typename WarpMmaTensorOp_, int PartitionsK, typename OutputOp_, int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
struct cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >

Defines sensible defaults for epilogues for TensorOps which uses intereleaved output layout. For this case, shared memory is not needed.

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC>
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementAccumulator = typename WarpMmaTensorOp::ElementC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementOutput = typename OutputOp::ElementOutput
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero>
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::LayoutC = typename WarpMmaTensorOp::LayoutC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputOp = OutputOp_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK>
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileThreadMap = typename cutlass::epilogue::threadblock:: DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK>::Type
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Shape = Shape_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::WarpMmaTensorOp = WarpMmaTensorOp_

Member Data Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
int const cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::kElementsPerAccess = ElementsPerAccess
static
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
int const cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::kPartitionsK = PartitionsK
static

The documentation for this struct was generated from the following file: