CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes
cutlass::gemm::device Namespace Reference

Classes

struct  DefaultGemmConfiguration
 
struct  DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, ElementA, ElementB, ElementC, ElementAccumulator >
 
struct  DefaultGemmConfiguration< arch::OpClassSimt, ArchTag, int8_t, int8_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm70, ElementA, ElementB, ElementC, ElementAccumulator >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, ElementA, ElementB, ElementC, ElementAccumulator >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int4b_t, int4b_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int4b_t, uint4b_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int8_t, int8_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, int8_t, uint8_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint4b_t, int4b_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint4b_t, uint4b_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, int8_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >
 
struct  DefaultGemmConfiguration< arch::OpClassWmmaTensorOp, ArchTag, ElementA, ElementB, ElementC, ElementAccumulator >
 
class  Gemm
 
class  Gemm< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, SplitKSerial, Operator_, IsBetaZero >
 Partial specialization for column-major output exchanges problem size and operand. More...
 
class  GemmBatched
 
class  GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >
 Partial specialization for column-major output exchanges problem size and operand. More...
 
class  GemmComplex
 
class  GemmComplex< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, TransformA, TransformB, SplitKSerial >
 Partial specialization for column-major output exchanges problem size and operand. More...
 
class  GemmSplitKParallel
 
class  GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >
 Partial specialization for column-major output. More...