CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Functions
cutlass::reference::device::kernel Namespace Reference

Namespaces

 detail
 Defines several helpers.
 

Functions

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum)
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum)
 
template<typename T >
__global__ void TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
 Kernel to initialize tensor to uniform random distribution. More...
 
template<typename T >
__global__ void TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
 Kernel to initialize tensor to uniform distribution. More...
 
template<typename T >
__global__ void TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
 Kernel to initialize tensor to an identity matrix. More...
 
template<typename T >
__global__ void TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
 Kernel to initialize tensor to an identity matrix. More...
 
template<typename Func , int Rank, typename Params >
__global__ void TensorForEach (Coord< Rank > size, Params params=Params())
 Kernel calls a functor for each element in a tensor's index space. More...
 
template<typename Func , int Rank, typename Params >
__global__ void TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end)
 Kernel calls a functor for each element along a tensor's diagonal. More...
 
template<typename Element , typename Func >
__global__ void BlockForEach (Element *ptr, size_t capacity, typename Func::Params params)
 
template<typename Element >
__global__ void BlockCompareEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity)
 
template<typename Element >
__global__ void BlockCompareRelativelyEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor)
 

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void cutlass::reference::device::kernel::BatchedGemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRefCollectionA  tensor_collection_a,
TensorRefCollectionB  tensor_collection_b,
ScalarType  beta,
TensorRefCollectionC  tensor_collection_c,
AccumulatorType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >
__global__ void cutlass::reference::device::kernel::BlockCompareEqual ( int *  equal,
Element const *  ptr_A,
Element const *  ptr_B,
size_t  capacity 
)
template<typename Element >
__global__ void cutlass::reference::device::kernel::BlockCompareRelativelyEqual ( int *  equal,
Element const *  ptr_A,
Element const *  ptr_B,
size_t  capacity,
Element  epsilon,
Element  nonzero_floor 
)
template<typename Element , typename Func >
__global__ void cutlass::reference::device::kernel::BlockForEach ( Element *  ptr,
size_t  capacity,
typename Func::Params  params 
)
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void cutlass::reference::device::kernel::Gemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRefA  tensor_a,
TensorRefB  tensor_b,
ScalarType  beta,
TensorRefC  tensor_c,
TensorRefC  tensor_d,
AccumulatorType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Func , int Rank, typename Params >
__global__ void cutlass::reference::device::kernel::TensorDiagonalForEach ( Coord< Rank >  size,
Params  params,
int  start,
int  end 
)
template<typename Func , int Rank, typename Params >
__global__ void cutlass::reference::device::kernel::TensorForEach ( Coord< Rank >  size,
Params  params = Params() 
)
template<typename T >
__global__ void cutlass::reference::device::kernel::TensorInitializeGaussian ( Distribution  dist,
int64_t  seed,
int  dim_contiguous,
int  dim_strided,
T *  tensor,
int  ldm 
)
template<typename T >
__global__ void cutlass::reference::device::kernel::TensorInitializeIdentity ( Distribution  dist,
int64_t  seed,
int  dim_contiguous,
int  dim_strided,
T *  tensor,
int  ldm 
)
template<typename T >
__global__ void cutlass::reference::device::kernel::TensorInitializeLinear ( Distribution  dist,
int64_t  seed,
int  dim_contiguous,
int  dim_strided,
T *  tensor,
int  ldm 
)
template<typename T >
__global__ void cutlass::reference::device::kernel::TensorInitializeUniform ( Distribution  dist,
int64_t  seed,
int  dim_contiguous,
int  dim_strided,
T *  tensor,
int  ldm 
)