CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Public Attributes | List of all members
cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp > Struct Template Reference

Thread-level blocked general matrix product.

#include <gemm.h>

Public Types

using ElementA = typename TensorRefA::Element
 
using ElementB = typename TensorRefB::Element
 
using ElementC = typename TensorRefC::Element
 

Public Member Functions

CUTLASS_HOST_DEVICE Gemm (AccumulatorType initial_accum=AccumulatorType(0))
 Constructor. More...
 
CUTLASS_HOST_DEVICE Gemmmultiply_add (gemm::GemmCoord problem_size, TensorRefA tensor_a, TensorRefB tensor_b, MatrixCoord output_coord=MatrixCoord())
 Computes a matrix product. More...
 
CUTLASS_HOST_DEVICE Gemmepilogue (gemm::GemmCoord problem_size, ScalarType alpha, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, MatrixCoord output_coord=MatrixCoord())
 Performs linear scaling of matrix product and updates output tensor. More...
 

Public Attributes

ElementA A_tile [OutputTile::kColumn]
 Tile for A operand. More...
 
ElementB B_tile [OutputTile::kRow]
 Tile for B operand. More...
 
AccumulatorType accum [OutputTile::kColumn][OutputTile::kRow]
 Tile for Accumulator. More...
 

Member Typedef Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementA = typename TensorRefA::Element
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementB = typename TensorRefB::Element
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementC = typename TensorRefC::Element

Constructor & Destructor Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
CUTLASS_HOST_DEVICE cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::Gemm ( AccumulatorType  initial_accum = AccumulatorType(0))
inline

Member Function Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::epilogue ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
ScalarType  beta,
TensorRefC  tensor_c,
TensorRefC  tensor_d,
MatrixCoord  output_coord = MatrixCoord() 
)
inline
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::multiply_add ( gemm::GemmCoord  problem_size,
TensorRefA  tensor_a,
TensorRefB  tensor_b,
MatrixCoord  output_coord = MatrixCoord() 
)
inline

Member Data Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
ElementA cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::A_tile[OutputTile::kColumn]
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
AccumulatorType cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::accum[OutputTile::kColumn][OutputTile::kRow]
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
ElementB cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::B_tile[OutputTile::kRow]

The documentation for this struct was generated from the following file: