CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Classes | Functions
cutlass::reference::device Namespace Reference

Namespaces

 detail
 
 kernel
 
 thread
 

Classes

struct  BlockForEach
 
struct  Gemm
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAdd >
 Partial specialization for multiply-add. More...
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAddSaturate >
 Partial specialization for multiply-add-saturate. More...
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpXorPopc >
 Partial specialization for XOR-popc. More...
 
struct  TensorDiagonalForEach
 Launches a kernel calling a functor for each element along a tensor's diagonal. More...
 
struct  TensorForEach
 Launches a kernel calling a functor for each element in a tensor's index space. More...
 

Functions

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, AccumulatorType initial_accum)
 
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, AccumulatorType initial_accum)
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp >
void BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum)
 Computes a batch of GEMMs over a set of matrices of common dimension. More...
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c)
 
template<typename Element >
bool BlockCompareEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, int grid_size=0, int block_size=0)
 Performs a bit-level equality check between two blocks. More...
 
template<typename Element >
bool BlockCompareRelativelyEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor, int grid_size=0, int block_size=0)
 Performs a bit-level equality check between two blocks. More...
 
template<typename Element , typename Layout >
void TensorFillRandomGaussian (TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
 Fills a tensor with random values with a Gaussian distribution. More...
 
template<typename Element >
void BlockFillRandomGaussian (Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
 Fills a tensor with random values with a Gaussian distribution. More...
 
template<typename Element , typename Layout >
void TensorFillRandomUniform (TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
 Fills a tensor with random values with a uniform random distribution. More...
 
template<typename Element >
void BlockFillRandomUniform (Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
 Fills a tensor with random values with a uniform random distribution. More...
 
template<typename Element , typename Layout >
void TensorFillDiagonal (TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0))
 Fills a tensor everywhere with a unique value for its diagonal. More...
 
template<typename Element , typename Layout >
void TensorFill (TensorView< Element, Layout > view, Element val=Element(0))
 Fills a tensor with a uniform value. More...
 
template<typename Element , typename Layout >
void TensorFillIdentity (TensorView< Element, Layout > view)
 Fills a tensor's diagonal with 1 and 0 everywhere else. More...
 
template<typename Element , typename Layout >
void TensorUpdateDiagonal (TensorView< Element, Layout > view, Element diag=Element(1))
 Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorUpdateOffDiagonal (TensorView< Element, Layout > view, Element other=Element(1))
 Writes a uniform value to all elements in the tensor without modifying diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorFillLinear (TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0))
 Fills tensor with a linear combination of its coordinate and another vector. More...
 
template<typename Element >
void BlockFillSequential (Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
 Fills a block of data with sequential elements. More...
 
template<typename Element >
void BlockFillRandom (Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
 Fills a block of data with sequential elements. More...
 
template<typename Element , typename Layout >
void TensorCopyDiagonalIn (TensorView< Element, Layout > view, Element const *ptr)
 Copies a diagonal in from host memory without modifying off-diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorCopyDiagonalOut (Element *ptr, TensorView< Element, Layout > view)
 Copies the diagonal of a tensor into a dense buffer in host memory. More...
 

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp >
void cutlass::reference::device::BatchedGemm ( gemm::GemmCoord  problem_size,
int  batch_count,
ScalarType  alpha,
TensorRefCollectionA const &  tensor_a,
TensorRefCollectionB const &  tensor_b,
ScalarType  beta,
TensorRefCollectionC &  tensor_c,
AccumulatorType  initial_accum 
)
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void cutlass::reference::device::BatchedGemm ( gemm::GemmCoord  problem_size,
int  batch_count,
ScalarType  alpha,
TensorRefCollectionA const &  tensor_a,
TensorRefCollectionB const &  tensor_b,
ScalarType  beta,
TensorRefCollectionC &  tensor_c 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >
bool cutlass::reference::device::BlockCompareEqual ( Element const *  ptr_A,
Element const *  ptr_B,
size_t  capacity,
int  grid_size = 0,
int  block_size = 0 
)
template<typename Element >
bool cutlass::reference::device::BlockCompareRelativelyEqual ( Element const *  ptr_A,
Element const *  ptr_B,
size_t  capacity,
Element  epsilon,
Element  nonzero_floor,
int  grid_size = 0,
int  block_size = 0 
)
template<typename Element >
void cutlass::reference::device::BlockFillRandom ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
Distribution  dist 
)
template<typename Element >
void cutlass::reference::device::BlockFillRandomGaussian ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
Element  mean = Element(0),
Element  stddev = Element(1),
int  bits = -1 
)

< Element type

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
seedseed for RNG
meanGaussian distribution's mean
stddevGaussian distribution's standard deviation
template<typename Element >
void cutlass::reference::device::BlockFillRandomUniform ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
Element  max = Element(1),
Element  min = Element(0),
int  bits = -1 
)

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
seedseed for RNG
maxupper bound of distribution
minlower bound for distribution
template<typename Element >
void cutlass::reference::device::BlockFillSequential ( Element *  ptr,
int64_t  capacity,
Element  v = Element(1),
Element  s = Element(0) 
)
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::device::compute_gemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c,
TensorRef< ElementC, LayoutC >  tensor_d,
AccumulatorType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

Explicitly naming types needed by this template can be cumbersome, particularly for the accumulator type, so a function argument 'initial_accum' is exposed. Passing AccumulatorType(0) as the last function argument can be easier than naming all template arguments explicitly.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::device::compute_gemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c,
AccumulatorType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

This assumes the accumulator type is the same type as the scalars.

template<typename Element , typename Layout >
void cutlass::reference::device::TensorCopyDiagonalIn ( TensorView< Element, Layout >  view,
Element const *  ptr 
)

< Layout function

< dense buffer of elements

Parameters
viewdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::device::TensorCopyDiagonalOut ( Element *  ptr,
TensorView< Element, Layout >  view 
)

< Layout function

< source tensor

Parameters
ptrdense buffer of elements
template<typename Element , typename Layout >
void cutlass::reference::device::TensorFill ( TensorView< Element, Layout >  view,
Element  val = Element(0) 
)

< Layout function

< value to uniformly fill it with

Parameters
viewdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::device::TensorFillDiagonal ( TensorView< Element, Layout >  view,
Element  diag = Element(1),
Element  other = Element(0) 
)

< Layout function

< value to write off the diagonal

Parameters
viewdestination tensor
diagvalue to write in the diagonal
template<typename Element , typename Layout >
void cutlass::reference::device::TensorFillIdentity ( TensorView< Element, Layout >  view)

< Layout function

< destination tensor

template<typename Element , typename Layout >
void cutlass::reference::device::TensorFillLinear ( TensorView< Element, Layout >  view,
Array< Element, Layout::kRank > const &  v,
Element  s = Element(0) 
)

< Layout function

Parameters
viewdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::device::TensorFillRandomGaussian ( TensorView< Element, Layout >  view,
uint64_t  seed,
Element  mean = Element(0),
Element  stddev = Element(1),
int  bits = -1 
)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
viewdestination tensor
seedseed for RNG
meanGaussian distribution's mean
stddevGaussian distribution's standard deviation
template<typename Element , typename Layout >
void cutlass::reference::device::TensorFillRandomUniform ( TensorView< Element, Layout >  view,
uint64_t  seed,
Element  max = Element(1),
Element  min = Element(0),
int  bits = -1 
)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
viewdestination tensor
seedseed for RNG
maxupper bound of distribution
minlower bound for distribution
template<typename Element , typename Layout >
void cutlass::reference::device::TensorUpdateDiagonal ( TensorView< Element, Layout >  view,
Element  diag = Element(1) 
)

< Layout function

Parameters
viewdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::device::TensorUpdateOffDiagonal ( TensorView< Element, Layout >  view,
Element  other = Element(1) 
)

< Layout function

Parameters
viewdestination tensor