CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ > Class Template Reference

#include <epilogue_workspace.h>

Classes

struct  Params
 Parameters structure. More...
 
struct  SharedStorage
 Shared storage allocation needed by the epilogue. More...
 

Public Types

using Shape = Shape_
 
using FragmentC = FragmentC_
 
using ElementC = typename FragmentC::value_type
 

Public Member Functions

CUTLASS_DEVICE EpilogueWorkspace (Params const &params, SharedStorage &, int warp_idx, int lane_idx)
 Constructor. More...
 
CUTLASS_DEVICE void operator() (cutlass::gemm::GemmCoord problem_size, cutlass::gemm::GemmCoord tb_tile_coord, FragmentC const &accum)
 Streams the result to global memory. More...
 

Static Public Attributes

static int const kWarpCount = WarpCount
 
static int const kAccessSizeInBits = 128
 Optimize for 128b accesses. More...
 
static int const kWarpSize = 32
 Warp size from the perspective of memory operations. More...
 
static int const kElementsPerAccess
 Vector length of accesses. More...
 
static int const kIterations = FragmentC::kElements / kElementsPerAccess
 Number of stores per thread. More...
 
static int const kWarpAccesses = kIterations * kWarpSize
 Total number of vectorized accesses in warp (in units of vector) More...
 
static int const kThreadblockAccesses = kWarpAccesses * kWarpCount
 Total number of vectorized accesses in threadblock tile (in units of vector) More...
 

Member Typedef Documentation

template<typename Shape_ , int WarpCount, typename FragmentC_ >
using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::ElementC = typename FragmentC::value_type
template<typename Shape_ , int WarpCount, typename FragmentC_ >
using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::FragmentC = FragmentC_
template<typename Shape_ , int WarpCount, typename FragmentC_ >
using cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::Shape = Shape_

Constructor & Destructor Documentation

template<typename Shape_ , int WarpCount, typename FragmentC_ >
CUTLASS_DEVICE cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::EpilogueWorkspace ( Params const &  params,
SharedStorage ,
int  warp_idx,
int  lane_idx 
)
inline
Parameters
paramsHost-constructable params object
warp_idxID of warp within threadblock
lane_idxId of thread within warp

Member Function Documentation

template<typename Shape_ , int WarpCount, typename FragmentC_ >
CUTLASS_DEVICE void cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::operator() ( cutlass::gemm::GemmCoord  problem_size,
cutlass::gemm::GemmCoord  tb_tile_coord,
FragmentC const &  accum 
)
inline

< Accumulator tile

Parameters
problem_sizeProblem size of GEMM (units of ElementC)
tb_tile_coordThreadblock tile coordinate in GEMM (in units of threadblock tiles)

Member Data Documentation

template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kAccessSizeInBits = 128
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kElementsPerAccess
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kIterations = FragmentC::kElements / kElementsPerAccess
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kThreadblockAccesses = kWarpAccesses * kWarpCount
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpAccesses = kIterations * kWarpSize
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpCount = WarpCount
static
template<typename Shape_ , int WarpCount, typename FragmentC_ >
int const cutlass::epilogue::EpilogueWorkspace< Shape_, WarpCount, FragmentC_ >::kWarpSize = 32
static

The documentation for this class was generated from the following file: