CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <default_gemm_configuration.h>
Public Types | |
using | ThreadblockShape = GemmShape< 128, 128, 32 > |
using | WarpShape = GemmShape< 32, 64, 32 > |
using | InstructionShape = GemmShape< 1, 1, 4 > |
using | EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 1, int32_t, float > |
using | Operator = arch::OpMultiplyAdd |
Static Public Attributes | |
static int const | kAlignmentA = 4 |
static int const | kAlignmentB = 4 |
static int const | kStages = 2 |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >::EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 1, int32_t, float > |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >::InstructionShape = GemmShape<1, 1, 4> |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >::Operator = arch::OpMultiplyAdd |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >::ThreadblockShape = GemmShape<128, 128, 32> |
using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >::WarpShape = GemmShape<32, 64, 32> |
|
static |
|
static |
|
static |