CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Classes | Functions
cutlass::gemm::threadblock Namespace Reference

Namespaces

 detail
 

Classes

struct  DefaultGemvCore
 
struct  DefaultMma
 
struct  DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::ColumnMajorInterleaved< InterleavedK >, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, true >
 Specialization for column-major-interleaved output. More...
 
struct  DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, false >
 Specialization for row-major output (OperatorClass Simt) More...
 
struct  DefaultMma< ElementA, LayoutA, kAlignmentA, ElementB, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassTensorOp, ArchTag, ThreadblockShape, WarpShape, InstructionShape, 2, Operator, false >
 Specialization for row-major output (OperatorClass Simt) More...
 
struct  DefaultMma< int8_t, LayoutA, kAlignmentA, int8_t, LayoutB, kAlignmentB, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, ArchTag, ThreadblockShape, WarpShape, GemmShape< 1, 1, 4 >, 2, Operator, false >
 
struct  DefaultMmaCore
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_, >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 1 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 Partial specialization: More...
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::ColumnMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 Partial specialization: More...
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >
 Partial specialization: More...
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::ColumnMajorInterleaved< InterleavedK >, ElementB_, layout::RowMajorInterleaved< InterleavedK >, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_, AccumulatorsInRowMajor >
 
struct  DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >
 
struct  GemmBatchedIdentityThreadblockSwizzle
 Threadblock swizzling function for batched GEMMs. More...
 
struct  GemmHorizontalThreadblockSwizzle
 Threadblock swizzling function for GEMMs. More...
 
struct  GemmIdentityThreadblockSwizzle
 Threadblock swizzling function for GEMMs. More...
 
struct  GemmSplitKHorizontalThreadblockSwizzle
 Threadblock swizzling function for split-K GEMMs. More...
 
struct  GemmSplitKIdentityThreadblockSwizzle
 Threadblock swizzling function for split-K GEMMs. More...
 
class  Gemv
 Structure to compute the matrix-vector product using SIMT math instructions. More...
 
struct  GemvBatchedStridedThreadblockDefaultSwizzle
 Threadblock swizzling function for batched GEMVs. More...
 
class  MmaBase
 
class  MmaPipelined
 Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. More...
 
struct  MmaPolicy
 Policy object describing MmaTensorOp. More...
 
class  MmaSingleStage
 Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. More...
 

Functions

CUTLASS_DEVICE int RematerializeThreadIdxX ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeThreadIdxY ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeThreadIdxZ ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockIdxX ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockIdxY ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockIdxZ ()
 Helper to rematerialize block Idx. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockDimX ()
 Helper to rematerialize block Dim. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockDimY ()
 Helper to rematerialize block Dim. Reduces register liveness. More...
 
CUTLASS_DEVICE int RematerializeBlockDimZ ()
 Helper to rematerialize block Dim. Reduces register liveness. More...
 

Function Documentation

CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockDimX ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockDimY ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockDimZ ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockIdxX ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockIdxY ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeBlockIdxZ ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeThreadIdxX ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeThreadIdxY ( )
CUTLASS_DEVICE int cutlass::gemm::threadblock::RematerializeThreadIdxZ ( )