CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes
cutlass::gemm::warp Namespace Reference

Classes

struct  DefaultMmaTensorOp
 Partial specialization for m-by-n-by-kgroup. More...
 
class  MmaComplexTensorOp
 
class  MmaComplexTensorOp< Shape_, complex< RealElementA >, LayoutA_, complex< RealElementB >, LayoutB_, complex< RealElementC >, LayoutC_, Policy_, TransformA, TransformB, Enable >
 Partial specialization for complex*complex+complex => complex using real-valued TensorOps. More...
 
class  MmaSimt
 Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. More...
 
struct  MmaSimtPolicy
 Describes the arrangement and configuration of per-lane operations in warp-level matrix multiply. More...
 
class  MmaSimtTileIterator
 
class  MmaSimtTileIterator< Shape_, Operand::kA, Element_, layout::ColumnMajor, Policy_, PartitionsK, PartitionGroupSize >
 
class  MmaSimtTileIterator< Shape_, Operand::kA, Element_, layout::ColumnMajorInterleaved< 4 >, Policy_, PartitionsK, PartitionGroupSize >
 
class  MmaSimtTileIterator< Shape_, Operand::kB, Element_, layout::RowMajor, Policy_, PartitionsK, PartitionGroupSize >
 
class  MmaSimtTileIterator< Shape_, Operand::kB, Element_, layout::RowMajorInterleaved< 4 >, Policy_, PartitionsK, PartitionGroupSize >
 
class  MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >
 
class  MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::RowMajor, Policy_ >
 
class  MmaTensorOp
 Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. More...
 
class  MmaTensorOpAccumulatorTileIterator
 
class  MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::ColumnMajor, InstructionShape_, OpDelta_ >
 
class  MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::ColumnMajorInterleaved< InterleavedN >, InstructionShape_, OpDelta_ >
 
class  MmaTensorOpAccumulatorTileIterator< Shape_, Element_, cutlass::layout::RowMajor, InstructionShape_, OpDelta_ >
 
class  MmaTensorOpMultiplicandTileIterator
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::TensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, 64 >, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
class  MmaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::TensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, Crosswise >, InstructionShape_, OpDelta_, 32, PartitionsK_ >
 
struct  MmaTensorOpPolicy
 Policy. More...
 
class  MmaVoltaTensorOp
 Structure to compute the matrix product targeting CUDA cores and SIMT math instructions. More...
 
class  MmaVoltaTensorOpAccumulatorTileIterator
 
class  MmaVoltaTensorOpMultiplicandTileIterator
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kA, Element_, cutlass::layout::ColumnMajorVoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kA, Element_, cutlass::layout::VoltaTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kB, Element_, cutlass::layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand::kB, Element_, cutlass::layout::VoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::ColumnMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::RowMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 >
 
class  MmaVoltaTensorOpMultiplicandTileIterator< Shape_, Operand_, Element_, cutlass::layout::VoltaTensorOpMultiplicandCrosswise< sizeof_bits< Element_ >::value, KBlock >, InstructionShape_, OpDelta_, 32 >
 
struct  WarpSize
 Query the number of threads per warp. More...