CUTLASS: cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Thread-level blocked general matrix product.

#include <gemm.h>

Public Types
using	ElementA = typename TensorRefA::Element

using	ElementB = typename TensorRefB::Element

using	ElementC = typename TensorRefC::Element

Public Member Functions
CUTLASS_HOST_DEVICE	Gemm (AccumulatorType initial_accum=AccumulatorType(0))
	Constructor. More...

CUTLASS_HOST_DEVICE Gemm &	multiply_add (gemm::GemmCoord problem_size, TensorRefA tensor_a, TensorRefB tensor_b, MatrixCoord output_coord=MatrixCoord())
	Computes a matrix product. More...

CUTLASS_HOST_DEVICE Gemm &	epilogue (gemm::GemmCoord problem_size, ScalarType alpha, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, MatrixCoord output_coord=MatrixCoord())
	Performs linear scaling of matrix product and updates output tensor. More...

Public Attributes
ElementA	A_tile [OutputTile::kColumn]
	Tile for A operand. More...

ElementB	B_tile [OutputTile::kRow]
	Tile for B operand. More...

AccumulatorType	accum [OutputTile::kColumn][OutputTile::kRow]
	Tile for Accumulator. More...

Member Typedef Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementA = typename TensorRefA::Element

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementB = typename TensorRefB::Element

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

using cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::ElementC = typename TensorRefC::Element

Constructor & Destructor Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

CUTLASS_HOST_DEVICE cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::Gemm ( AccumulatorType initial_accum = AccumulatorType(0) )

inline

Member Function Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::epilogue	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		ScalarType	beta,
		TensorRefC	tensor_c,
		TensorRefC	tensor_d,
		MatrixCoord	output_coord = `MatrixCoord()`
	)

inline

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::multiply_add	(	gemm::GemmCoord	problem_size,
		TensorRefA	tensor_a,
		TensorRefB	tensor_b,
		MatrixCoord	output_coord = `MatrixCoord()`
	)

inline

Member Data Documentation

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

ElementA cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::A_tile[OutputTile::kColumn]

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

AccumulatorType cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::accum[OutputTile::kColumn][OutputTile::kRow]

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>

ElementB cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::B_tile[OutputTile::kRow]

The documentation for this struct was generated from the following file:

tools/util/include/cutlass/util/reference/device/thread/gemm.h