CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Classes | Functions
cutlass::reference::host Namespace Reference

Namespaces

 detail
 Defines several helpers.
 

Classes

struct  BlockForEach
 
struct  Gemm
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAdd >
 Partial specialization for multiply-add. More...
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAddSaturate >
 Partial specialization for multiply-add-saturate. More...
 
struct  Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpXorPopc >
 Partial specialization for XOR-popc. More...
 

Functions

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, ComputeType initial_accum)
 
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, ComputeType initial_accum)
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum)
 Computes a batch of GEMMs over a set of matrices of common dimension. More...
 
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c)
 
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename ConvertOp = NumericConverter<ElementC, ScalarType>, typename InnerProductOp = multiply_add<ComputeType>>
void GemmComplex (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, ComplexTransform transform_a, TensorRef< ElementB, LayoutB > tensor_b, ComplexTransform transform_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, ComputeType initial_accum)
 
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType >
void GemmComplex (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, ComplexTransform transform_a, TensorRef< ElementB, LayoutB > tensor_b, ComplexTransform transform_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c)
 
template<typename Element , typename Layout >
bool TensorEquals (TensorView< Element, Layout > const &lhs, TensorView< Element, Layout > const &rhs)
 Returns true if two tensor views are equal. More...
 
template<typename Element , typename Layout >
bool TensorNotEquals (TensorView< Element, Layout > const &lhs, TensorView< Element, Layout > const &rhs)
 Returns true if two tensor views are NOT equal. More...
 
template<typename Element , typename Layout >
bool TensorContains (TensorView< Element, Layout > const &view, Element value)
 Returns true if a value is present in a tensor. More...
 
template<typename Element , typename Layout >
std::pair< bool, Coord< Layout::kRank > > TensorFind (TensorView< Element, Layout > const &view, Element value)
 < Layout function More...
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void TensorCopy (TensorView< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src, F const &transform)
 Copies elements from one tensor view into another, satisfying bounds of each tensor. More...
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void TensorCopy (TensorView< DstElement, DstLayout > dst, TensorRef< SrcElement, SrcLayout > src, F const &transform)
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void TensorCopy (TensorRef< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src, F const &transform)
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void TensorCopy (TensorView< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src)
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void TensorCopy (TensorView< DstElement, DstLayout > dst, TensorRef< SrcElement, SrcLayout > src)
 
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void TensorCopy (TensorRef< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src)
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorAdd (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
 Adds two tensors and stores in the destination tensor: d = a + b. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void TensorAdd (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
 Adds a tensor in place: d = d .+ a. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorSub (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
 Subtracts two tensors and stores in the destination tensor: d = a - b. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorSub (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
 Subtracts two tensors in place: d = d .- a. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorMul (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
 Multiplies two tensors and stores in the destination tensor: d = a .* b. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void TensorMul (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
 Multiplies tensors in place: d = d .* a. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorDiv (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
 Divides two tensors and stores in the destination tensor: d = a ./ b. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void TensorDiv (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
 Divides tensors in place: d = d ./ a. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void TensorModulus (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
 Divides two tensors and stores in the destination tensor: d = a ./ b. More...
 
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void TensorModulus (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
 Divides tensors in place: d = d ./ a. More...
 
template<typename Element , typename Layout >
void TensorFill (TensorView< Element, Layout > dst, Element val=Element(0))
 Fills a tensor with a uniform value. More...
 
template<typename Element , typename Layout >
void TensorFillRandomGaussian (TensorView< Element, Layout > dst, uint64_t seed, double mean=0, double stddev=1, int bits=-1)
 Fills a tensor with random values with a Gaussian distribution. More...
 
template<typename Element >
void BlockFillRandomGaussian (Element *ptr, size_t capacity, uint64_t seed, double mean=0, double stddev=1, int bits=-1)
 Fills a tensor with random values with a Gaussian distribution. More...
 
template<typename Element , typename Layout >
void TensorFillRandomUniform (TensorView< Element, Layout > dst, uint64_t seed, double max=1, double min=0, int bits=-1)
 Fills a tensor with random values with a uniform random distribution. More...
 
template<typename Element >
void BlockFillRandomUniform (Element *ptr, size_t capacity, uint64_t seed, double max=1, double min=0, int bits=-1)
 Fills a tensor with random values with a uniform random distribution. More...
 
template<typename Element , typename Layout >
void TensorFillDiagonal (TensorView< Element, Layout > dst, Element diag=Element(1), Element other=Element(0))
 Fills a tensor everywhere with a unique value for its diagonal. More...
 
template<typename Element , typename Layout >
void TensorFillIdentity (TensorView< Element, Layout > dst)
 Helper to fill a tensor's diagonal with 1 and 0 everywhere else. More...
 
template<typename Element , typename Layout >
void TensorUpdateDiagonal (TensorView< Element, Layout > dst, Element val=Element(1))
 Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorUpdateOffDiagonal (TensorView< Element, Layout > dst, Element other=Element(1))
 Writes a uniform value to all elements in the tensor without modifying diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorFillLinear (TensorView< Element, Layout > dst, Array< Element, Layout::kRank > const &v, Element s=Element(0))
 Fills tensor with a linear combination of its coordinate and another vector. More...
 
template<typename Element , typename Layout >
void TensorFillSequential (TensorView< Element, Layout > dst, Element s=Element(0))
 Fills tensor with a linear combination of its coordinate and another vector. More...
 
template<typename Element >
void BlockFillSequential (Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
 Fills a block of data with sequential elements. More...
 
template<typename Element >
void BlockFillRandom (Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
 Fills a block of data with sequential elements. More...
 
template<typename Element , typename Layout >
void TensorCopyDiagonalIn (TensorView< Element, Layout > dst, Element const *ptr)
 Copies a diagonal in from host memory without modifying off-diagonal elements. More...
 
template<typename Element , typename Layout >
void TensorCopyDiagonalOut (Element *ptr, TensorView< Element, Layout > src)
 Copies the diagonal of a tensor into a dense buffer in host memory. More...
 
template<typename Func , int Rank>
void TensorForEach (Coord< Rank > extent, Func &func)
 Iterates over the index space of a tensor. More...
 
template<typename Func , int Rank>
void TensorForEachLambda (Coord< Rank > extent, Func func)
 Iterates over the index space of a tensor and calls a C++ lambda. More...
 
template<typename Element , typename Layout , typename ElementReduction >
ElementReduction TensorNorm (TensorView< Element, Layout > view, ElementReduction accumulator)
 Computes the p=2 norm of the elements of a tensor with arbitrary reduction data type. More...
 
template<typename Element , typename Layout >
double TensorNorm (TensorView< Element, Layout > view)
 Computes the p=2 norm of the elements of a tensor. More...
 

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void cutlass::reference::host::BatchedGemm ( gemm::GemmCoord  problem_size,
int  batch_count,
ScalarType  alpha,
TensorRefCollectionA const &  tensor_a,
TensorRefCollectionB const &  tensor_b,
ScalarType  beta,
TensorRefCollectionC &  tensor_c,
AccumulatorType  initial_accum 
)
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void cutlass::reference::host::BatchedGemm ( gemm::GemmCoord  problem_size,
int  batch_count,
ScalarType  alpha,
TensorRefCollectionA const &  tensor_a,
TensorRefCollectionB const &  tensor_b,
ScalarType  beta,
TensorRefCollectionC &  tensor_c 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >
void cutlass::reference::host::BlockFillRandom ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
Distribution  dist 
)
template<typename Element >
void cutlass::reference::host::BlockFillRandomGaussian ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
double  mean = 0,
double  stddev = 1,
int  bits = -1 
)

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
ptrdestination buffer
capacitynumber of elements
seedseed for RNG
meanGaussian distribution's mean
stddevGaussian distribution's standard deviation
template<typename Element >
void cutlass::reference::host::BlockFillRandomUniform ( Element *  ptr,
size_t  capacity,
uint64_t  seed,
double  max = 1,
double  min = 0,
int  bits = -1 
)

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
seedseed for RNG
maxupper bound of distribution
minlower bound for distribution
template<typename Element >
void cutlass::reference::host::BlockFillSequential ( Element *  ptr,
int64_t  capacity,
Element  v = Element(1),
Element  s = Element(0) 
)
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::host::compute_gemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c,
TensorRef< ElementC, LayoutC >  tensor_d,
ComputeType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void cutlass::reference::host::compute_gemm ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c,
ComputeType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename ConvertOp = NumericConverter<ElementC, ScalarType>, typename InnerProductOp = multiply_add<ComputeType>>
void cutlass::reference::host::GemmComplex ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
ComplexTransform  transform_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ComplexTransform  transform_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c,
ComputeType  initial_accum 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

Explicitly naming types needed by this template can be cumbersome, particularly for the accumulator type, so a function argument 'initial_accum' is exposed. Passing AccumulatorType(0) as the last function argument can be easier than naming all template arguments explicitly.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType >
void cutlass::reference::host::GemmComplex ( gemm::GemmCoord  problem_size,
ScalarType  alpha,
TensorRef< ElementA, LayoutA >  tensor_a,
ComplexTransform  transform_a,
TensorRef< ElementB, LayoutB >  tensor_b,
ComplexTransform  transform_b,
ScalarType  beta,
TensorRef< ElementC, LayoutC >  tensor_c 
)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

This assumes the accumulator type is the same type as the scalars.

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorAdd ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a,
TensorRef< ElementB, LayoutB >  b 
)
Parameters
ddestination tensor view
aA tensor reference
bB tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void cutlass::reference::host::TensorAdd ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a 
)
Parameters
ddestination tensor view
aA tensor reference
template<typename Element , typename Layout >
bool cutlass::reference::host::TensorContains ( TensorView< Element, Layout > const &  view,
Element  value 
)

< Layout function

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void cutlass::reference::host::TensorCopy ( TensorView< DstElement, DstLayout >  dst,
TensorView< SrcElement, SrcLayout >  src,
F const &  transform 
)
template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void cutlass::reference::host::TensorCopy ( TensorView< DstElement, DstLayout >  dst,
TensorRef< SrcElement, SrcLayout >  src,
F const &  transform 
)

Copies elements from a TensorRef into a TensorView. Assumes source tensor has sufficient extent to avoid out of bounds accesses.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void cutlass::reference::host::TensorCopy ( TensorRef< DstElement, DstLayout >  dst,
TensorView< SrcElement, SrcLayout >  src,
F const &  transform 
)

Copies elements from a TensorRef into a TensorView. Assumes source tensor has sufficient extent to avoid out of bounds accesses.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void cutlass::reference::host::TensorCopy ( TensorView< DstElement, DstLayout >  dst,
TensorView< SrcElement, SrcLayout >  src 
)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void cutlass::reference::host::TensorCopy ( TensorView< DstElement, DstLayout >  dst,
TensorRef< SrcElement, SrcLayout >  src 
)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void cutlass::reference::host::TensorCopy ( TensorRef< DstElement, DstLayout >  dst,
TensorView< SrcElement, SrcLayout >  src 
)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename Element , typename Layout >
void cutlass::reference::host::TensorCopyDiagonalIn ( TensorView< Element, Layout >  dst,
Element const *  ptr 
)

< Layout function

< dense buffer of elements

Parameters
dstdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::host::TensorCopyDiagonalOut ( Element *  ptr,
TensorView< Element, Layout >  src 
)

< Layout function

< source tensor

Parameters
ptrdense buffer of elements
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorDiv ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a,
TensorRef< ElementB, LayoutB >  b 
)
Parameters
ddestination tensor view
aA tensor reference
bB tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void cutlass::reference::host::TensorDiv ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a 
)
Parameters
ddestination tensor view
aA tensor reference
template<typename Element , typename Layout >
bool cutlass::reference::host::TensorEquals ( TensorView< Element, Layout > const &  lhs,
TensorView< Element, Layout > const &  rhs 
)

< Layout function

template<typename Element , typename Layout >
void cutlass::reference::host::TensorFill ( TensorView< Element, Layout >  dst,
Element  val = Element(0) 
)

< Layout function

< value to uniformly fill it with

Parameters
dstdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillDiagonal ( TensorView< Element, Layout >  dst,
Element  diag = Element(1),
Element  other = Element(0) 
)

< Layout function

< value to write off the diagonal

Parameters
dstdestination tensor
diagvalue to write in the diagonal
template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillIdentity ( TensorView< Element, Layout >  dst)

< Layout function

< destination tensor

template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillLinear ( TensorView< Element, Layout >  dst,
Array< Element, Layout::kRank > const &  v,
Element  s = Element(0) 
)

< Layout function

Parameters
dstdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillRandomGaussian ( TensorView< Element, Layout >  dst,
uint64_t  seed,
double  mean = 0,
double  stddev = 1,
int  bits = -1 
)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
dstdestination tensor
seedseed for RNG
meanGaussian distribution's mean
stddevGaussian distribution's standard deviation
template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillRandomUniform ( TensorView< Element, Layout >  dst,
uint64_t  seed,
double  max = 1,
double  min = 0,
int  bits = -1 
)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters
dstdestination tensor
seedseed for RNG
maxupper bound of distribution
minlower bound for distribution
template<typename Element , typename Layout >
void cutlass::reference::host::TensorFillSequential ( TensorView< Element, Layout >  dst,
Element  s = Element(0) 
)

< Layout function

Parameters
dstdestination tensor
template<typename Element , typename Layout >
std::pair<bool, Coord<Layout::kRank> > cutlass::reference::host::TensorFind ( TensorView< Element, Layout > const &  view,
Element  value 
)

Returns a pair containing a boolean of whether a value exists in a tensor and the location of the first occurrence. If the value is not contained in the tensor, the second element of the pair is undefined.

template<typename Func , int Rank>
void cutlass::reference::host::TensorForEach ( Coord< Rank >  extent,
Func &  func 
)

< rank of index space

template<typename Func , int Rank>
void cutlass::reference::host::TensorForEachLambda ( Coord< Rank >  extent,
Func  func 
)

< rank of index space

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorModulus ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a,
TensorRef< ElementB, LayoutB >  b 
)
Parameters
ddestination tensor view
aA tensor reference
bB tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void cutlass::reference::host::TensorModulus ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a 
)
Parameters
ddestination tensor view
aA tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorMul ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a,
TensorRef< ElementB, LayoutB >  b 
)
Parameters
ddestination tensor view
aA tensor reference
bB tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void cutlass::reference::host::TensorMul ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a 
)
Parameters
ddestination tensor view
aA tensor reference
template<typename Element , typename Layout , typename ElementReduction >
ElementReduction cutlass::reference::host::TensorNorm ( TensorView< Element, Layout >  view,
ElementReduction  accumulator 
)
template<typename Element , typename Layout >
double cutlass::reference::host::TensorNorm ( TensorView< Element, Layout >  view)
template<typename Element , typename Layout >
bool cutlass::reference::host::TensorNotEquals ( TensorView< Element, Layout > const &  lhs,
TensorView< Element, Layout > const &  rhs 
)

< Layout function

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorSub ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a,
TensorRef< ElementB, LayoutB >  b 
)
Parameters
ddestination tensor view
aA tensor reference
bB tensor reference
template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void cutlass::reference::host::TensorSub ( TensorView< ElementD, LayoutD >  d,
TensorRef< ElementA, LayoutA >  a 
)
Parameters
ddestination tensor view
aA tensor reference
template<typename Element , typename Layout >
void cutlass::reference::host::TensorUpdateDiagonal ( TensorView< Element, Layout >  dst,
Element  val = Element(1) 
)

< Layout function

Parameters
dstdestination tensor
template<typename Element , typename Layout >
void cutlass::reference::host::TensorUpdateOffDiagonal ( TensorView< Element, Layout >  dst,
Element  other = Element(1) 
)

< Layout function

Parameters
dstdestination tensor