CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <default_gemm_configuration.h>
Public Types | |
using | ThreadblockShape = GemmShape< 128, 256, 64 > |
using | WarpShape = GemmShape< 64, 64, 64 > |
using | InstructionShape = GemmShape< 8, 8, 16 > |
using | EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 128/sizeof_bits< ElementC >::value, int32_t, float > |
using | Operator = arch::OpMultiplyAddSaturate |
Static Public Attributes | |
static int const | kAlignmentA = 128 / sizeof_bits<uint8_t>::value |
static int const | kAlignmentB = 128 / sizeof_bits<uint8_t>::value |
static int const | kStages = 2 |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 128 / sizeof_bits<ElementC>::value, int32_t, float> |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::InstructionShape = GemmShape<8, 8, 16> |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::Operator = arch::OpMultiplyAddSaturate |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::ThreadblockShape = GemmShape<128, 256, 64> |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::WarpShape = GemmShape<64, 64, 64> |
|
static |
|
static |
|
static |