CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Namespaces | Functions
tools/util/include/cutlass/util/reference/device/gemm.h File Reference

Reference implementation for GEMM in device-side code. More...

#include "cutlass/coord.h"
#include "cutlass/numeric_types.h"
#include "cutlass/functional.h"
#include "cutlass/numeric_conversion.h"
#include "cutlass/matrix_traits.h"
#include "cutlass/tensor_view.h"
#include "cutlass/gemm/gemm.h"
#include "cutlass/util/reference/device/kernel/gemm.h"
Include dependency graph for tools/util/include/cutlass/util/reference/device/gemm.h:

Go to the source code of this file.

Classes

struct  cutlass::reference::device::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, InnerProductOp >
 
struct  cutlass::reference::device::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAdd >
 Partial specialization for multiply-add. More...
 
struct  cutlass::reference::device::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAddSaturate >
 Partial specialization for multiply-add-saturate. More...
 
struct  cutlass::reference::device::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpXorPopc >
 Parital specialization for XOR-popc. More...
 

Namespaces

 cutlass
 
 cutlass::reference
 
 cutlass::reference::device
 

Functions

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::device::compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, AccumulatorType initial_accum)
 
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::device::compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, AccumulatorType initial_accum)
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp >
void cutlass::reference::device::BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum)
 Computes a batch of GEMMs over a set of matrices of common dimension. More...
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void cutlass::reference::device::BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c)