CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess > Struct Template Reference

Defines sensible defaults for epilogues for TensorOps.

#include <default_epilogue_volta_tensor_op.h>

Public Types

using Shape = Shape_
 
using WarpMmaTensorOp = WarpMmaTensorOp_
 
using OutputOp = OutputOp_
 
using ElementOutput = typename OutputOp::ElementOutput
 
using LayoutC = typename WarpMmaTensorOp::LayoutC
 
using ElementAccumulator = typename WarpMmaTensorOp::ElementC
 
using OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, ElementAccumulator >::Type
 
using OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
 
using AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape< 32, 32, 4 >, ElementAccumulator, LayoutC >
 
using WarpTileIterator = cutlass::epilogue::warp::TileIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape< 32, 32, 4 >, ElementAccumulator, LayoutC >
 
using SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator, kSharedMemAlignment >
 
using Padding = typename WarpTileIterator::Padding
 Hard-coded padding elements added. More...
 
using Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
 

Static Public Attributes

static int const kPartitionsK = PartitionsK
 
static int const kElementsPerAccess = ElementsPerAccess
 
static int const kSharedMemAlignment = sizeof_bits<ElementAccumulator>::value * WarpTileIterator::kElementsPerAccess / 8
 

Member Typedef Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::AccumulatorFragmentIterator = cutlass::epilogue::warp::FragmentIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape<32, 32, 4>, ElementAccumulator, LayoutC >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementAccumulator = typename WarpMmaTensorOp::ElementC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::ElementOutput = typename OutputOp::ElementOutput
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Epilogue = cutlass::epilogue::threadblock::Epilogue< Shape, WarpMmaTensorOp, kPartitionsK, OutputTileIterator, AccumulatorFragmentIterator, WarpTileIterator, SharedLoadIterator, OutputOp, Padding >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::LayoutC = typename WarpMmaTensorOp::LayoutC
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputOp = OutputOp_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileIterator = cutlass::epilogue::threadblock::PredicatedTileIterator< OutputTileThreadMap, ElementOutput >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::OutputTileThreadMap = typename cutlass::epilogue::threadblock::DefaultThreadMapVoltaTensorOp< Shape, typename WarpMmaTensorOp::Shape, kPartitionsK, ElementOutput, kElementsPerAccess, ElementAccumulator >::Type
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Padding = typename WarpTileIterator::Padding
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::Shape = Shape_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::SharedLoadIterator = cutlass::epilogue::threadblock::SharedLoadIterator< typename OutputTileThreadMap::CompactedThreadMap, ElementAccumulator, kSharedMemAlignment >
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpMmaTensorOp = WarpMmaTensorOp_
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
using cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::WarpTileIterator = cutlass::epilogue::warp::TileIteratorVoltaTensorOp< typename WarpMmaTensorOp::Shape, gemm::GemmShape<32, 32, 4>, ElementAccumulator, LayoutC >

Member Data Documentation

template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess
static
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kPartitionsK = PartitionsK
static
template<typename Shape_ , typename WarpMmaTensorOp_ , int PartitionsK, typename OutputOp_ , int ElementsPerAccess>
int const cutlass::epilogue::threadblock::DefaultEpilogueVoltaTensorOp< Shape_, WarpMmaTensorOp_, PartitionsK, OutputOp_, ElementsPerAccess >::kSharedMemAlignment = sizeof_bits<ElementAccumulator>::value * WarpTileIterator::kElementsPerAccess / 8
static

The documentation for this struct was generated from the following file: