CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial > Struct Template Reference

#include <gemm.h>

Classes

struct  Params
 Parameters structure. More...
 
union  SharedStorage
 Shared memory storage structure. More...
 

Public Types

using Mma = Mma_
 
using Epilogue = Epilogue_
 
using OutputOp = typename Epilogue::OutputOp
 
using ThreadblockSwizzle = ThreadblockSwizzle_
 
using WarpCount = typename Mma::WarpCount
 Warp count (concept: GemmShape) More...
 

Public Member Functions

CUTLASS_HOST_DEVICE Gemm ()
 
CUTLASS_DEVICE void operator() (Params const &params, SharedStorage &shared_storage)
 Executes one GEMM. More...
 

Static Public Member Functions

static Status can_implement (cutlass::gemm::GemmCoord const &problem_size, typename Mma::IteratorA::TensorRef ref_A, typename Mma::IteratorB::TensorRef ref_B, typename Epilogue::OutputTileIterator::TensorRef ref_C, typename Epilogue::OutputTileIterator::TensorRef ref_D)
 Determines whether kernel satisfies alignment. More...
 

Static Public Attributes

static bool const kSplitKSerial = SplitKSerial
 
static int const kThreadCount = 32 * WarpCount::kCount
 

Member Typedef Documentation

template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Epilogue = Epilogue_
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Mma = Mma_
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::OutputOp = typename Epilogue::OutputOp
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::ThreadblockSwizzle = ThreadblockSwizzle_
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
using cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::WarpCount = typename Mma::WarpCount

Constructor & Destructor Documentation

template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
CUTLASS_HOST_DEVICE cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::Gemm ( )
inline

Member Function Documentation

template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
static Status cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::can_implement ( cutlass::gemm::GemmCoord const &  problem_size,
typename Mma::IteratorA::TensorRef  ref_A,
typename Mma::IteratorB::TensorRef  ref_B,
typename Epilogue::OutputTileIterator::TensorRef  ref_C,
typename Epilogue::OutputTileIterator::TensorRef  ref_D 
)
inlinestatic
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
CUTLASS_DEVICE void cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::operator() ( Params const &  params,
SharedStorage shared_storage 
)
inline

Member Data Documentation

template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
bool const cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::kSplitKSerial = SplitKSerial
static
template<typename Mma_ , typename Epilogue_ , typename ThreadblockSwizzle_ , bool SplitKSerial>
int const cutlass::gemm::kernel::Gemm< Mma_, Epilogue_, ThreadblockSwizzle_, SplitKSerial >::kThreadCount = 32 * WarpCount::kCount
static

The documentation for this struct was generated from the following file: