CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Reference implementation for GEMM in host-side code. More...
#include "cutlass/coord.h"
#include "cutlass/matrix_traits.h"
#include "cutlass/tensor_view.h"
#include "cutlass/gemm/gemm.h"
#include "cutlass/util/reference/device/thread/gemm.h"
Go to the source code of this file.
Namespaces | |
cutlass | |
cutlass::reference | |
cutlass::reference::device | |
cutlass::reference::device::kernel | |
Functions | |
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | |
__global__ void | cutlass::reference::device::kernel::Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) |
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | |
__global__ void | cutlass::reference::device::kernel::BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) |