CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Attributes | List of all members
cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator > Struct Template Reference

#include <default_gemm_configuration.h>

Public Types

using ThreadblockShape = GemmShape< 128, 256, 32 >
 
using WarpShape = GemmShape< 64, 64, 32 >
 
using InstructionShape = GemmShape< 16, 8, 8 >
 
using EpilogueOutputOp = epilogue::thread::LinearCombination< ElementC, 128/sizeof_bits< ElementC >::value, ElementAccumulator, ElementAccumulator >
 
using Operator = typename platform::conditional< (platform::is_same< ElementA, int8_t >::value||platform::is_same< ElementA, int4b_t >::value||platform::is_same< ElementA, uint8_t >::value||platform::is_same< ElementA, uint4b_t >::value), arch::OpMultiplyAddSaturate, arch::OpMultiplyAdd >::type
 

Static Public Attributes

static int const kAlignmentA = 128 / sizeof_bits<ElementA>::value
 
static int const kAlignmentB = 128 / sizeof_bits<ElementA>::value
 
static int const kStages = 2
 

Member Typedef Documentation

template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::EpilogueOutputOp = epilogue::thread::LinearCombination< ElementC, 128 / sizeof_bits<ElementC>::value, ElementAccumulator, ElementAccumulator >
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::InstructionShape = GemmShape<16, 8, 8>
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::Operator = typename platform::conditional< (platform::is_same<ElementA, int8_t>::value || platform::is_same<ElementA, int4b_t>::value || platform::is_same<ElementA, uint8_t>::value || platform::is_same<ElementA, uint4b_t>::value), arch::OpMultiplyAddSaturate, arch::OpMultiplyAdd>::type
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::ThreadblockShape = GemmShape<128, 256, 32>
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::WarpShape = GemmShape<64, 64, 32>

Member Data Documentation

template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
int const cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::kAlignmentA = 128 / sizeof_bits<ElementA>::value
static
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
int const cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::kAlignmentB = 128 / sizeof_bits<ElementA>::value
static
template<typename ElementA , typename ElementB , typename ElementC , typename ElementAccumulator >
int const cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >::kStages = 2
static

The documentation for this struct was generated from the following file: