CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | List of all members
cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ > Class Template Reference

Epilogue operator.

#include <direct_epilogue_tensor_op.h>

Classes

struct  Params
 Parameters structure for host-constructible state. More...
 
struct  SharedStorage
 Shared storage allocation needed by the epilogue. More...
 

Public Types

using Shape = Shape_
 
using Operator = Operator_
 
using WarpCount = gemm::GemmShape< Shape::kM/Operator::Shape::kM, Shape::kN/Operator::Shape::kN, PartitionsK, >
 Number of warps spanning threadblock-scoped tile. More...
 
using FragmentC = typename Operator::FragmentC
 Accumulator tile is really the warp-scoped tile. More...
 
using Element = Element_
 Data type of output tensor. More...
 
using Layout = layout::RowMajor
 Output layout is always row-major. More...
 
using OutputOp = OutputOp_
 Function operator computing final output. More...
 
using ConvertOp = ConvertOp_
 Conversion operator to shared memory. More...
 
using TensorRef = TensorRef< Element, Layout::kRank, Layout >
 Reference to source and destination tensors. More...
 

Public Member Functions

CUTLASS_DEVICE DirectEpilogueTensorOp (Params const &params, SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
 Constructor. More...
 
CUTLASS_DEVICE void operator() (gemm::GemmCoord problem_size, gemm::GemmCoord tb_tile_coord, FragmentC const &accumulators)
 Streams the result to global memory. More...
 

Member Typedef Documentation

template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::ConvertOp = ConvertOp_
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Element = Element_
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::FragmentC = typename Operator::FragmentC
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Layout = layout::RowMajor
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Operator = Operator_
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::OutputOp = OutputOp_
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::Shape = Shape_
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::TensorRef = TensorRef<Element, Layout::kRank, Layout>
template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
using cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::WarpCount = gemm::GemmShape< Shape::kM / Operator::Shape::kM, Shape::kN / Operator::Shape::kN, PartitionsK, >

Constructor & Destructor Documentation

template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
CUTLASS_DEVICE cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::DirectEpilogueTensorOp ( Params const &  params,
SharedStorage shared_storage,
int  thread_idx,
int  warp_idx,
int  lane_idx 
)
inline
Parameters
paramsHost-constructable params object
shared_storageShared storage object
thread_idxID of a thread within the threadblock
warp_idxID of warp within threadblock
lane_idxId of thread within warp

Member Function Documentation

template<typename Shape_ , typename Operator_ , int PartitionsK, typename Element_ , typename OutputOp_ , typename ConvertOp_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::DirectEpilogueTensorOp< Shape_, Operator_, PartitionsK, Element_, OutputOp_, ConvertOp_ >::operator() ( gemm::GemmCoord  problem_size,
gemm::GemmCoord  tb_tile_coord,
FragmentC const &  accumulators 
)
inline

< Accumulator tile

Number of mma operations performed

Parameters
problem_sizeProblem size of GEMM (units of ElementC)
tb_tile_coordThreadblock tile coordinate in GEMM (in units of threadblock tiles)

The documentation for this class was generated from the following file: