CUTLASS: cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

#include <default_epilogue_tensor_op.h>

Public Types
using	Shape = Shape_

using	WarpMmaTensorOp = WarpMmaTensorOp_

using	OutputOp = OutputOp_

using	ElementOutput = typename OutputOp::ElementOutput

using	LayoutC = typename WarpMmaTensorOp::LayoutC

using	ElementAccumulator = typename WarpMmaTensorOp::ElementC

using	OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK >::Type

using	OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK >

using	AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC >

using	Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero >

Static Public Attributes
static int const	kPartitionsK = PartitionsK

static int const	kElementsPerAccess = ElementsPerAccess

Detailed Description

template<typename Shape_, typename WarpMmaTensorOp_, int PartitionsK, typename OutputOp_, int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>
struct cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >

Defines sensible defaults for epilogues for TensorOps which uses intereleaved output layout. For this case, shared memory is not needed.

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC>

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementAccumulator = typename WarpMmaTensorOp::ElementC

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::ElementOutput = typename OutputOp::ElementOutput

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Epilogue = cutlass::epilogue::threadblock::InterleavedEpilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, OutputOp, InterleavedK, IsBetaZero>

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::LayoutC = typename WarpMmaTensorOp::LayoutC

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputOp = OutputOp_

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileIterator = cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< OutputTileThreadMap, ElementOutput, InterleavedK>

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::OutputTileThreadMap = typename cutlass::epilogue::threadblock:: DefaultInterleavedThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, InterleavedK>::Type

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::Shape = Shape_

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

using cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::WarpMmaTensorOp = WarpMmaTensorOp_

Member Data Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

int const cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::kElementsPerAccess = ElementsPerAccess

static

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess, int InterleavedK, bool IsBetaZero = false, bool isSplitK = false>

int const cutlass::epilogue::threadblock::DefaultInterleavedEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess, InterleavedK, IsBetaZero, isSplitK >::kPartitionsK = PartitionsK

static

The documentation for this struct was generated from the following file:

default_epilogue_tensor_op.h