Thread-level blocked general matrix product.
#include <gemm.h>
|
using | ElementA = typename TensorRefA::Element |
|
using | ElementB = typename TensorRefB::Element |
|
using | ElementC = typename TensorRefC::Element |
|
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::epilogue |
( |
gemm::GemmCoord |
problem_size, |
|
|
ScalarType |
alpha, |
|
|
ScalarType |
beta, |
|
|
TensorRefC |
tensor_c, |
|
|
TensorRefC |
tensor_d, |
|
|
MatrixCoord |
output_coord = MatrixCoord() |
|
) |
| |
|
inline |
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
CUTLASS_HOST_DEVICE Gemm& cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::multiply_add |
( |
gemm::GemmCoord |
problem_size, |
|
|
TensorRefA |
tensor_a, |
|
|
TensorRefB |
tensor_b, |
|
|
MatrixCoord |
output_coord = MatrixCoord() |
|
) |
| |
|
inline |
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
AccumulatorType cutlass::reference::device::thread::Gemm< TensorRefA, TensorRefB, TensorRefC, ScalarType, AccumulatorType, OutputTile, InnerProductOp, ConvertOp >::accum[OutputTile::kColumn][OutputTile::kRow] |
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<typename TensorRefC::Element, ScalarType>>
The documentation for this struct was generated from the following file: