Epilogue operator without splitk.

#include <interleaved_epilogue.h>

Classes
struct	SharedStorage
	Shared storage allocation needed by the epilogue. More...

Public Types
using	Shape = Shape_

using	WarpMmaOperator = WarpMmaOperator_

using	AccumulatorFragmentIterator = AccumulatorFragmentIterator_

using	OutputTileIterator = OutputTileIterator_

using	OutputOp = OutputOp_

using	Layout = layout::ColumnMajorInterleaved< InterleavedK >
	Output layout is always row-major. More...

using	AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile
	The complete warp-level accumulator tile. More...

using	ElementAccumulator = typename AccumulatorTile::Element
	Accumulator element. More...

using	ElementOutput = typename OutputTileIterator::Element
	Output element. More...

using	TensorRef = typename OutputTileIterator::TensorRef
	Tensor reference to destination tensor. More...

using	SyncTensorRef = typename cutlass::TensorRef< int, cutlass::layout::PackedVectorLayout >
	Tensor reference to sync tensor. More...

using	ConstTensorRef = typename OutputTileIterator::ConstTensorRef
	Const tensor reference to source tensor. More...

using	OutputAccessType = Array< typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess >
	Array type used to output. More...

using	AccumulatorAccessType = Array< ElementAccumulator, OutputTileIterator::kElementsPerAccess >
	Array type used by output functor. More...

using	WarpCount = gemm::GemmShape< Shape::kM/WarpMmaOperator::Shape::kM, Shape::kN/WarpMmaOperator::Shape::kN, kPartitionsK >
	Number of warps. More...

Public Member Functions
CUTLASS_DEVICE	InterleavedEpilogue (SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
	Constructor. More...

CUTLASS_DEVICE void	operator() (OutputOp const &output_op, OutputTileIterator destination_iterator, AccumulatorTile const &accumulators, OutputTileIterator source_iterator)
	Streams the result to global memory. More...

Static Public Attributes
static int const	kPartitionsK = PartitionsK

static int const	kElementsPerAccess = OutputTileIterator::kElementsPerAccess
	Output access size. More...

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorAccessType = Array<ElementAccumulator, OutputTileIterator::kElementsPerAccess>

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorFragmentIterator = AccumulatorFragmentIterator_

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::AccumulatorTile = typename AccumulatorFragmentIterator::AccumulatorTile

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ConstTensorRef = typename OutputTileIterator::ConstTensorRef

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ElementAccumulator = typename AccumulatorTile::Element

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::ElementOutput = typename OutputTileIterator::Element

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::Layout = layout::ColumnMajorInterleaved<InterleavedK>

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputAccessType = Array<typename OutputTileIterator::Element, OutputTileIterator::kElementsPerAccess>

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputOp = OutputOp_

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::OutputTileIterator = OutputTileIterator_

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::Shape = Shape_

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::SyncTensorRef = typename cutlass::TensorRef<int, cutlass::layout::PackedVectorLayout>

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::TensorRef = typename OutputTileIterator::TensorRef

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::WarpCount = gemm::GemmShape<Shape::kM / WarpMmaOperator::Shape::kM, Shape::kN / WarpMmaOperator::Shape::kN, kPartitionsK>

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

using cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::WarpMmaOperator = WarpMmaOperator_

Constructor & Destructor Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

CUTLASS_DEVICE cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::InterleavedEpilogue	(	SharedStorage &	shared_storage,
		int	thread_idx,
		int	warp_idx,
		int	lane_idx
	)

inline

Parameters

shared_storage	Shared storage object
thread_idx	ID of a thread within the threadblock
warp_idx	ID of warp within threadblock
lane_idx	Id of thread within warp

Member Function Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::operator()	(	OutputOp const &	output_op,
		OutputTileIterator	destination_iterator,
		AccumulatorTile const &	accumulators,
		OutputTileIterator	source_iterator
	)

inline

< Threadblock tile coordinate in GEMM (in units of threadblock tiles)

Parameters

output_op	Output operator
destination_iterator	Tile iterator for destination
accumulators	Complete warp-level accumulator tile

Member Data Documentation

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

int const cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::kElementsPerAccess = OutputTileIterator::kElementsPerAccess

static

template<typename Shape_ , typename WarpMmaOperator_ , int PartitionsK, typename OutputTileIterator_ , typename AccumulatorFragmentIterator_ , typename OutputOp_ , int InterleavedK, bool IsBetaZero = false>

int const cutlass::epilogue::threadblock::InterleavedEpilogue< Shape_, WarpMmaOperator_, PartitionsK, OutputTileIterator_, AccumulatorFragmentIterator_, OutputOp_, InterleavedK, IsBetaZero >::kPartitionsK = PartitionsK

static

The documentation for this class was generated from the following file:

interleaved_epilogue.h

Classes

Public Types

Public Member Functions

Static Public Attributes

Member Typedef Documentation

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation