CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess > Struct Template Reference

Defines sensible defaults for epilogues for SimtOps.

#include <default_epilogue_simt.h>

Public Types

using Shape = Shape_
 
using WarpMmaSimt = WarpMmaSimt_
 
using OutputOp = OutputOp_
 
using ElementOutput = typename OutputOp::ElementOutput
 
using LayoutC = typename WarpMmaSimt::LayoutC
 
using ElementAccumulator = typename WarpMmaSimt::ElementC
 
using OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapSimt< Shape, typename WarpMmaSimt::Shape, typename WarpMmaSimt::Policy, kPartitionsK, ElementOutput, kElementsPerAccess >::Type
 
using OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
 
using AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorSimt< typename WarpMmaSimt::Shape, typename WarpMmaSimt::ThreadMma, layout::RowMajor, typename WarpMmaSimt::Policy >
 
using WarpTileIterator = cutlass::epilogue::warp::TileIteratorSimt< typename WarpMmaSimt::Shape, typename WarpMmaSimt::ThreadMma, ElementAccumulator, layout::RowMajor, typename WarpMmaSimt::Policy >
 
using SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator >
 
using Padding = typename WarpTileIterator::Padding
 Hard-coded padding elements added. More...
 
using Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaSimt, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
 

Static Public Attributes

static int const kElementsPerAccess = ElementsPerAccess
 
static const int kPartitionsK = Shape::kK / WarpMmaSimt::Shape::kK
 

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorSimt< typename WarpMmaSimt::Shape, typename WarpMmaSimt::ThreadMma, layout::RowMajor, typename WarpMmaSimt::Policy >
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::ElementAccumulator = typename WarpMmaSimt::ElementC
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::ElementOutput = typename OutputOp::ElementOutput
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaSimt, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::LayoutC = typename WarpMmaSimt::LayoutC
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::OutputOp = OutputOp_
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapSimt< Shape, typename WarpMmaSimt::Shape, typename WarpMmaSimt::Policy, kPartitionsK, ElementOutput, kElementsPerAccess >::Type
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::Padding = typename WarpTileIterator::Padding
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::Shape = Shape_
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator >
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::WarpMmaSimt = WarpMmaSimt_
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::WarpTileIterator = cutlass::epilogue::warp::TileIteratorSimt< typename WarpMmaSimt::Shape, typename WarpMmaSimt::ThreadMma, ElementAccumulator, layout::RowMajor, typename WarpMmaSimt::Policy >

Member Data Documentation

template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess
static
template<typename Shape_ , typename WarpMmaSimt_ , typename OutputOp_ , int ElementsPerAccess>
const int cutlass::epilogue::threadblock::DefaultEpilogueSimt< Shape_, WarpMmaSimt_, OutputOp_, ElementsPerAccess >::kPartitionsK = Shape::kK / WarpMmaSimt::Shape::kK
static

The documentation for this struct was generated from the following file: