CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess > Struct Template Reference

Defines sensible defaults for epilogues for TensorOps.

#include <default_epilogue_tensor_op.h>

Public Types

using Shape = Shape_
 
using WarpMmaTensorOp = WarpMmaTensorOp_
 
using OutputOp = OutputOp_
 
using ElementOutput = typename OutputOp::ElementOutput
 
using LayoutC = typename WarpMmaTensorOp::LayoutC
 
using ElementAccumulator = typename WarpMmaTensorOp::ElementC
 
using OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess >::Type
 
using OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
 
using AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC >
 
using WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, ElementAccumulator, LayoutC >
 
using SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator >
 
using Padding = cutlass::MatrixShape< 0, 64/sizeof_bits< ElementAccumulator >::value *4 >
 Hard-coded padding elements added. More...
 
using Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
 

Static Public Attributes

static int const kPartitionsK = PartitionsK
 
static int const kElementsPerAccess = ElementsPerAccess
 

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, typename WarpMmaTensorOp::Policy::Operator::ElementC, typename WarpMmaTensorOp::Policy::Operator::FragmentC, LayoutC >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementAccumulator = typename WarpMmaTensorOp::ElementC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementOutput = typename OutputOp::ElementOutput
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::LayoutC = typename WarpMmaTensorOp::LayoutC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputOp = OutputOp_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess >::Type
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Padding = cutlass::MatrixShape<0, 64 / sizeof_bits<ElementAccumulator>::value * 4>
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Shape = Shape_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpMmaTensorOp = WarpMmaTensorOp_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp< typename WarpMmaTensorOp::Shape, typename WarpMmaTensorOp::Policy::Operator::Shape, ElementAccumulator, LayoutC >

Member Data Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess
static
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kPartitionsK = PartitionsK
static

The documentation for this struct was generated from the following file: