Namespaces
	detail
	Defines several helpers.

Classes
struct	BlockForEach

struct	Gemm

struct	Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAdd >
	Partial specialization for multiply-add. More...

struct	Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAddSaturate >
	Partial specialization for multiply-add-saturate. More...

struct	Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpXorPopc >
	Partial specialization for XOR-popc. More...

Functions
template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void	compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, ComputeType initial_accum)

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>
void	compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, ComputeType initial_accum)

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void	BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum)
	Computes a batch of GEMMs over a set of matrices of common dimension. More...

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >
void	BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c)

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename ConvertOp = NumericConverter<ElementC, ScalarType>, typename InnerProductOp = multiply_add<ComputeType>>
void	GemmComplex (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, ComplexTransform transform_a, TensorRef< ElementB, LayoutB > tensor_b, ComplexTransform transform_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, ComputeType initial_accum)

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType >
void	GemmComplex (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, ComplexTransform transform_a, TensorRef< ElementB, LayoutB > tensor_b, ComplexTransform transform_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c)

template<typename Element , typename Layout >
bool	TensorEquals (TensorView< Element, Layout > const &lhs, TensorView< Element, Layout > const &rhs)
	Returns true if two tensor views are equal. More...

template<typename Element , typename Layout >
bool	TensorNotEquals (TensorView< Element, Layout > const &lhs, TensorView< Element, Layout > const &rhs)
	Returns true if two tensor views are NOT equal. More...

template<typename Element , typename Layout >
bool	TensorContains (TensorView< Element, Layout > const &view, Element value)
	Returns true if a value is present in a tensor. More...

template<typename Element , typename Layout >
std::pair< bool, Coord< Layout::kRank > >	TensorFind (TensorView< Element, Layout > const &view, Element value)
	< Layout function More...

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void	TensorCopy (TensorView< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src, F const &transform)
	Copies elements from one tensor view into another, satisfying bounds of each tensor. More...

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void	TensorCopy (TensorView< DstElement, DstLayout > dst, TensorRef< SrcElement, SrcLayout > src, F const &transform)

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void	TensorCopy (TensorRef< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src, F const &transform)

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void	TensorCopy (TensorView< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src)

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >
void	TensorCopy (TensorView< DstElement, DstLayout > dst, TensorRef< SrcElement, SrcLayout > src)

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >
void	TensorCopy (TensorRef< DstElement, DstLayout > dst, TensorView< SrcElement, SrcLayout > src)

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorAdd (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
	Adds two tensors and stores in the destination tensor: d = a + b. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void	TensorAdd (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
	Adds a tensor in place: d = d .+ a. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorSub (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
	Subtracts two tensors and stores in the destination tensor: d = a - b. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorSub (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
	Subtracts two tensors in place: d = d .- a. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorMul (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
	Multiplies two tensors and stores in the destination tensor: d = a .* b. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void	TensorMul (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
	Multiplies tensors in place: d = d .* a. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorDiv (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
	Divides two tensors and stores in the destination tensor: d = a ./ b. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void	TensorDiv (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
	Divides tensors in place: d = d ./ a. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >
void	TensorModulus (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a, TensorRef< ElementB, LayoutB > b)
	Divides two tensors and stores in the destination tensor: d = a ./ b. More...

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >
void	TensorModulus (TensorView< ElementD, LayoutD > d, TensorRef< ElementA, LayoutA > a)
	Divides tensors in place: d = d ./ a. More...

template<typename Element , typename Layout >
void	TensorFill (TensorView< Element, Layout > dst, Element val=Element(0))
	Fills a tensor with a uniform value. More...

template<typename Element , typename Layout >
void	TensorFillRandomGaussian (TensorView< Element, Layout > dst, uint64_t seed, double mean=0, double stddev=1, int bits=-1)
	Fills a tensor with random values with a Gaussian distribution. More...

template<typename Element >
void	BlockFillRandomGaussian (Element *ptr, size_t capacity, uint64_t seed, double mean=0, double stddev=1, int bits=-1)
	Fills a tensor with random values with a Gaussian distribution. More...

template<typename Element , typename Layout >
void	TensorFillRandomUniform (TensorView< Element, Layout > dst, uint64_t seed, double max=1, double min=0, int bits=-1)
	Fills a tensor with random values with a uniform random distribution. More...

template<typename Element >
void	BlockFillRandomUniform (Element *ptr, size_t capacity, uint64_t seed, double max=1, double min=0, int bits=-1)
	Fills a tensor with random values with a uniform random distribution. More...

template<typename Element , typename Layout >
void	TensorFillDiagonal (TensorView< Element, Layout > dst, Element diag=Element(1), Element other=Element(0))
	Fills a tensor everywhere with a unique value for its diagonal. More...

template<typename Element , typename Layout >
void	TensorFillIdentity (TensorView< Element, Layout > dst)
	Helper to fill a tensor's diagonal with 1 and 0 everywhere else. More...

template<typename Element , typename Layout >
void	TensorUpdateDiagonal (TensorView< Element, Layout > dst, Element val=Element(1))
	Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements. More...

template<typename Element , typename Layout >
void	TensorUpdateOffDiagonal (TensorView< Element, Layout > dst, Element other=Element(1))
	Writes a uniform value to all elements in the tensor without modifying diagonal elements. More...

template<typename Element , typename Layout >
void	TensorFillLinear (TensorView< Element, Layout > dst, Array< Element, Layout::kRank > const &v, Element s=Element(0))
	Fills tensor with a linear combination of its coordinate and another vector. More...

template<typename Element , typename Layout >
void	TensorFillSequential (TensorView< Element, Layout > dst, Element s=Element(0))
	Fills tensor with a linear combination of its coordinate and another vector. More...

template<typename Element >
void	BlockFillSequential (Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
	Fills a block of data with sequential elements. More...

template<typename Element >
void	BlockFillRandom (Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
	Fills a block of data with sequential elements. More...

template<typename Element , typename Layout >
void	TensorCopyDiagonalIn (TensorView< Element, Layout > dst, Element const *ptr)
	Copies a diagonal in from host memory without modifying off-diagonal elements. More...

template<typename Element , typename Layout >
void	TensorCopyDiagonalOut (Element *ptr, TensorView< Element, Layout > src)
	Copies the diagonal of a tensor into a dense buffer in host memory. More...

template<typename Func , int Rank>
void	TensorForEach (Coord< Rank > extent, Func &func)
	Iterates over the index space of a tensor. More...

template<typename Func , int Rank>
void	TensorForEachLambda (Coord< Rank > extent, Func func)
	Iterates over the index space of a tensor and calls a C++ lambda. More...

template<typename Element , typename Layout , typename ElementReduction >
ElementReduction	TensorNorm (TensorView< Element, Layout > view, ElementReduction accumulator)
	Computes the p=2 norm of the elements of a tensor with arbitrary reduction data type. More...

template<typename Element , typename Layout >
double	TensorNorm (TensorView< Element, Layout > view)
	Computes the p=2 norm of the elements of a tensor. More...

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >

void cutlass::reference::host::BatchedGemm	(	gemm::GemmCoord	problem_size,
		int	batch_count,
		ScalarType	alpha,
		TensorRefCollectionA const &	tensor_a,
		TensorRefCollectionB const &	tensor_b,
		ScalarType	beta,
		TensorRefCollectionC &	tensor_c,
		AccumulatorType	initial_accum
	)

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType >

void cutlass::reference::host::BatchedGemm	(	gemm::GemmCoord	problem_size,
		int	batch_count,
		ScalarType	alpha,
		TensorRefCollectionA const &	tensor_a,
		TensorRefCollectionB const &	tensor_b,
		ScalarType	beta,
		TensorRefCollectionC &	tensor_c
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >

void cutlass::reference::host::BlockFillRandom	(	Element *	ptr,
		size_t	capacity,
		uint64_t	seed,
		Distribution	dist
	)

template<typename Element >

void cutlass::reference::host::BlockFillRandomGaussian	(	Element *	ptr,
		size_t	capacity,
		uint64_t	seed,
		double	mean = `0`,
		double	stddev = `1`,
		int	bits = `-1`
	)

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

ptr	destination buffer
capacity	number of elements
seed	seed for RNG
mean	Gaussian distribution's mean
stddev	Gaussian distribution's standard deviation

template<typename Element >

void cutlass::reference::host::BlockFillRandomUniform	(	Element *	ptr,
		size_t	capacity,
		uint64_t	seed,
		double	max = `1`,
		double	min = `0`,
		int	bits = `-1`
	)

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

seed	seed for RNG
max	upper bound of distribution
min	lower bound for distribution

template<typename Element >

void cutlass::reference::host::BlockFillSequential	(	Element *	ptr,
		int64_t	capacity,
		Element	v = `Element(1)`,
		Element	s = `Element(0)`
	)

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>

void cutlass::reference::host::compute_gemm	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRef< ElementA, LayoutA >	tensor_a,
		TensorRef< ElementB, LayoutB >	tensor_b,
		ScalarType	beta,
		TensorRef< ElementC, LayoutC >	tensor_c,
		TensorRef< ElementC, LayoutC >	tensor_d,
		ComputeType	initial_accum
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>>

void cutlass::reference::host::compute_gemm	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRef< ElementA, LayoutA >	tensor_a,
		TensorRef< ElementB, LayoutB >	tensor_b,
		ScalarType	beta,
		TensorRef< ElementC, LayoutC >	tensor_c,
		ComputeType	initial_accum
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename ConvertOp = NumericConverter<ElementC, ScalarType>, typename InnerProductOp = multiply_add<ComputeType>>

void cutlass::reference::host::GemmComplex	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRef< ElementA, LayoutA >	tensor_a,
		ComplexTransform	transform_a,
		TensorRef< ElementB, LayoutB >	tensor_b,
		ComplexTransform	transform_b,
		ScalarType	beta,
		TensorRef< ElementC, LayoutC >	tensor_c,
		ComputeType	initial_accum
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

Explicitly naming types needed by this template can be cumbersome, particularly for the accumulator type, so a function argument 'initial_accum' is exposed. Passing AccumulatorType(0) as the last function argument can be easier than naming all template arguments explicitly.

template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType >

void cutlass::reference::host::GemmComplex	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRef< ElementA, LayoutA >	tensor_a,
		ComplexTransform	transform_a,
		TensorRef< ElementB, LayoutB >	tensor_b,
		ComplexTransform	transform_b,
		ScalarType	beta,
		TensorRef< ElementC, LayoutC >	tensor_c
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

This assumes the accumulator type is the same type as the scalars.

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorAdd	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a,
		TensorRef< ElementB, LayoutB >	b
	)

Parameters

d	destination tensor view
a	A tensor reference
b	B tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >

void cutlass::reference::host::TensorAdd	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a
	)

Parameters

d	destination tensor view
a	A tensor reference

template<typename Element , typename Layout >

bool cutlass::reference::host::TensorContains	(	TensorView< Element, Layout > const &	view,
		Element	value
	)

< Layout function

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >

void cutlass::reference::host::TensorCopy	(	TensorView< DstElement, DstLayout >	dst,
		TensorView< SrcElement, SrcLayout >	src,
		F const &	transform
	)

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >

void cutlass::reference::host::TensorCopy	(	TensorView< DstElement, DstLayout >	dst,
		TensorRef< SrcElement, SrcLayout >	src,
		F const &	transform
	)

Copies elements from a TensorRef into a TensorView. Assumes source tensor has sufficient extent to avoid out of bounds accesses.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >

void cutlass::reference::host::TensorCopy	(	TensorRef< DstElement, DstLayout >	dst,
		TensorView< SrcElement, SrcLayout >	src,
		F const &	transform
	)

Copies elements from a TensorRef into a TensorView. Assumes source tensor has sufficient extent to avoid out of bounds accesses.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >

void cutlass::reference::host::TensorCopy	(	TensorView< DstElement, DstLayout >	dst,
		TensorView< SrcElement, SrcLayout >	src
	)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout , typename F >

void cutlass::reference::host::TensorCopy	(	TensorView< DstElement, DstLayout >	dst,
		TensorRef< SrcElement, SrcLayout >	src
	)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename DstElement , typename DstLayout , typename SrcElement , typename SrcLayout >

void cutlass::reference::host::TensorCopy	(	TensorRef< DstElement, DstLayout >	dst,
		TensorView< SrcElement, SrcLayout >	src
	)

Copies elements from one tensor view into another, satisfying bounds of each tensor. Succeeds if SrcElement can be converted to DstElement.

template<typename Element , typename Layout >

void cutlass::reference::host::TensorCopyDiagonalIn	(	TensorView< Element, Layout >	dst,
		Element const *	ptr
	)

< Layout function

< dense buffer of elements

Parameters

dst	destination tensor

template<typename Element , typename Layout >

void cutlass::reference::host::TensorCopyDiagonalOut	(	Element *	ptr,
		TensorView< Element, Layout >	src
	)

< Layout function

< source tensor

Parameters

ptr	dense buffer of elements

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorDiv	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a,
		TensorRef< ElementB, LayoutB >	b
	)

Parameters

d	destination tensor view
a	A tensor reference
b	B tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >

void cutlass::reference::host::TensorDiv	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a
	)

Parameters

d	destination tensor view
a	A tensor reference

template<typename Element , typename Layout >

bool cutlass::reference::host::TensorEquals	(	TensorView< Element, Layout > const &	lhs,
		TensorView< Element, Layout > const &	rhs
	)

< Layout function

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFill	(	TensorView< Element, Layout >	dst,
		Element	val = `Element(0)`
	)

< Layout function

< value to uniformly fill it with

Parameters

dst	destination tensor

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillDiagonal	(	TensorView< Element, Layout >	dst,
		Element	diag = `Element(1)`,
		Element	other = `Element(0)`
	)

< Layout function

< value to write off the diagonal

Parameters

dst	destination tensor
diag	value to write in the diagonal

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillIdentity ( TensorView< Element, Layout > dst )

< Layout function

< destination tensor

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillLinear	(	TensorView< Element, Layout >	dst,
		Array< Element, Layout::kRank > const &	v,
		Element	s = `Element(0)`
	)

< Layout function

Parameters

dst	destination tensor

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillRandomGaussian	(	TensorView< Element, Layout >	dst,
		uint64_t	seed,
		double	mean = `0`,
		double	stddev = `1`,
		int	bits = `-1`
	)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

dst	destination tensor
seed	seed for RNG
mean	Gaussian distribution's mean
stddev	Gaussian distribution's standard deviation

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillRandomUniform	(	TensorView< Element, Layout >	dst,
		uint64_t	seed,
		double	max = `1`,
		double	min = `0`,
		int	bits = `-1`
	)

< Layout function

< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.

Parameters

dst	destination tensor
seed	seed for RNG
max	upper bound of distribution
min	lower bound for distribution

template<typename Element , typename Layout >

void cutlass::reference::host::TensorFillSequential	(	TensorView< Element, Layout >	dst,
		Element	s = `Element(0)`
	)

< Layout function

Parameters

dst	destination tensor

template<typename Element , typename Layout >

std::pair<bool, Coord<Layout::kRank> > cutlass::reference::host::TensorFind	(	TensorView< Element, Layout > const &	view,
		Element	value
	)

Returns a pair containing a boolean of whether a value exists in a tensor and the location of the first occurrence. If the value is not contained in the tensor, the second element of the pair is undefined.

template<typename Func , int Rank>

void cutlass::reference::host::TensorForEach	(	Coord< Rank >	extent,
		Func &	func
	)

< rank of index space

template<typename Func , int Rank>

void cutlass::reference::host::TensorForEachLambda	(	Coord< Rank >	extent,
		Func	func
	)

< rank of index space

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorModulus	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a,
		TensorRef< ElementB, LayoutB >	b
	)

Parameters

d	destination tensor view
a	A tensor reference
b	B tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >

void cutlass::reference::host::TensorModulus	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a
	)

Parameters

d	destination tensor view
a	A tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorMul	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a,
		TensorRef< ElementB, LayoutB >	b
	)

Parameters

d	destination tensor view
a	A tensor reference
b	B tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA >

void cutlass::reference::host::TensorMul	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a
	)

Parameters

d	destination tensor view
a	A tensor reference

template<typename Element , typename Layout , typename ElementReduction >

ElementReduction cutlass::reference::host::TensorNorm	(	TensorView< Element, Layout >	view,
		ElementReduction	accumulator
	)

template<typename Element , typename Layout >

double cutlass::reference::host::TensorNorm ( TensorView< Element, Layout > view )

template<typename Element , typename Layout >

bool cutlass::reference::host::TensorNotEquals	(	TensorView< Element, Layout > const &	lhs,
		TensorView< Element, Layout > const &	rhs
	)

< Layout function

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorSub	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a,
		TensorRef< ElementB, LayoutB >	b
	)

Parameters

d	destination tensor view
a	A tensor reference
b	B tensor reference

template<typename ElementD , typename LayoutD , typename ElementA , typename LayoutA , typename ElementB , typename LayoutB >

void cutlass::reference::host::TensorSub	(	TensorView< ElementD, LayoutD >	d,
		TensorRef< ElementA, LayoutA >	a
	)

Parameters

d	destination tensor view
a	A tensor reference

template<typename Element , typename Layout >

void cutlass::reference::host::TensorUpdateDiagonal	(	TensorView< Element, Layout >	dst,
		Element	val = `Element(1)`
	)

< Layout function

Parameters

dst	destination tensor

template<typename Element , typename Layout >

void cutlass::reference::host::TensorUpdateOffDiagonal	(	TensorView< Element, Layout >	dst,
		Element	other = `Element(1)`
	)

< Layout function

Parameters

dst	destination tensor

Namespaces

Classes

Functions

Function Documentation