Namespaces
	detail
	Defines several helpers.

Functions
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void	Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum)

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >
__global__ void	BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum)

template<typename T >
__global__ void	TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
	Kernel to initialize tensor to uniform random distribution. More...

template<typename T >
__global__ void	TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
	Kernel to initialize tensor to uniform distribution. More...

template<typename T >
__global__ void	TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
	Kernel to initialize tensor to an identity matrix. More...

template<typename T >
__global__ void	TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm)
	Kernel to initialize tensor to an identity matrix. More...

template<typename Func , int Rank, typename Params >
__global__ void	TensorForEach (Coord< Rank > size, Params params=Params())
	Kernel calls a functor for each element in a tensor's index space. More...

template<typename Func , int Rank, typename Params >
__global__ void	TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end)
	Kernel calls a functor for each element along a tensor's diagonal. More...

template<typename Element , typename Func >
__global__ void	BlockForEach (Element *ptr, size_t capacity, typename Func::Params params)

template<typename Element >
__global__ void	BlockCompareEqual (int equal, Element const ptr_A, Element const *ptr_B, size_t capacity)

template<typename Element >
__global__ void	BlockCompareRelativelyEqual (int equal, Element const ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor)

Function Documentation

template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

__global__ void cutlass::reference::device::kernel::BatchedGemm	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRefCollectionA	tensor_collection_a,
		TensorRefCollectionB	tensor_collection_b,
		ScalarType	beta,
		TensorRefCollectionC	tensor_collection_c,
		AccumulatorType	initial_accum
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Element >

__global__ void cutlass::reference::device::kernel::BlockCompareEqual	(	int *	equal,
		Element const *	ptr_A,
		Element const *	ptr_B,
		size_t	capacity
	)

template<typename Element >

__global__ void cutlass::reference::device::kernel::BlockCompareRelativelyEqual	(	int *	equal,
		Element const *	ptr_A,
		Element const *	ptr_B,
		size_t	capacity,
		Element	epsilon,
		Element	nonzero_floor
	)

template<typename Element , typename Func >

__global__ void cutlass::reference::device::kernel::BlockForEach	(	Element *	ptr,
		size_t	capacity,
		typename Func::Params	params
	)

template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp >

__global__ void cutlass::reference::device::kernel::Gemm	(	gemm::GemmCoord	problem_size,
		ScalarType	alpha,
		TensorRefA	tensor_a,
		TensorRefB	tensor_b,
		ScalarType	beta,
		TensorRefC	tensor_c,
		TensorRefC	tensor_d,
		AccumulatorType	initial_accum
	)

Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.

template<typename Func , int Rank, typename Params >

__global__ void cutlass::reference::device::kernel::TensorDiagonalForEach	(	Coord< Rank >	size,
		Params	params,
		int	start,
		int	end
	)

template<typename Func , int Rank, typename Params >

__global__ void cutlass::reference::device::kernel::TensorForEach	(	Coord< Rank >	size,
		Params	params = `Params()`
	)

template<typename T >

__global__ void cutlass::reference::device::kernel::TensorInitializeGaussian	(	Distribution	dist,
		int64_t	seed,
		int	dim_contiguous,
		int	dim_strided,
		T *	tensor,
		int	ldm
	)

template<typename T >

__global__ void cutlass::reference::device::kernel::TensorInitializeIdentity	(	Distribution	dist,
		int64_t	seed,
		int	dim_contiguous,
		int	dim_strided,
		T *	tensor,
		int	ldm
	)

template<typename T >

__global__ void cutlass::reference::device::kernel::TensorInitializeLinear	(	Distribution	dist,
		int64_t	seed,
		int	dim_contiguous,
		int	dim_strided,
		T *	tensor,
		int	ldm
	)

template<typename T >

__global__ void cutlass::reference::device::kernel::TensorInitializeUniform	(	Distribution	dist,
		int64_t	seed,
		int	dim_contiguous,
		int	dim_strided,
		T *	tensor,
		int	ldm
	)

Namespaces

Functions

Function Documentation