Namespaces
	arch

	debug

	detail

	device_memory

	epilogue

	gemm

	layout

	library

	platform

	reduction

	reference

	thread

	transform

Classes
class	AlignedArray
	Aligned array type. More...

struct	AlignedBuffer
	Modifies semantics of cutlass::Array<> to provide guaranteed alignment. More...

class	Array< T, N, false >
	Statically sized array for any data type. More...

class	Array< T, N, true >
	Statically sized array for any data type. More...

struct	CommandLine

class	complex

class	ConstSubbyteReference

struct	Coord
	Statically-sized array specifying Coords within a tensor. More...

class	cuda_exception
	C++ exception wrapper for CUDA `cudaError_t`. More...

struct	Distribution
	Distribution type. More...

struct	divide_assert

struct	divides

struct	divides< Array< half_t, N > >

struct	divides< Array< T, N > >

struct	FloatType
	Defines a floating-point type based on the number of exponent and mantissa bits. More...

struct	FloatType< 11, 52 >

struct	FloatType< 5, 10 >

struct	FloatType< 8, 23 >

struct	half_t
	IEEE half-precision floating-point type. More...

class	HostTensor
	Host tensor. More...

class	IdentityTensorLayout

struct	integer_subbyte
	4-bit signed integer type More...

struct	IntegerType
	Defines integers based on size and whether they are signed. More...

struct	IntegerType< 1, false >

struct	IntegerType< 1, true >

struct	IntegerType< 16, false >

struct	IntegerType< 16, true >

struct	IntegerType< 32, false >

struct	IntegerType< 32, true >

struct	IntegerType< 4, false >

struct	IntegerType< 4, true >

struct	IntegerType< 64, false >

struct	IntegerType< 64, true >

struct	IntegerType< 8, false >

struct	IntegerType< 8, true >

struct	is_pow2

struct	KernelLaunchConfiguration
	Structure containing the basic launch configuration of a CUDA kernel. More...

struct	log2_down

struct	log2_down< N, 1, Count >

struct	log2_up

struct	log2_up< N, 1, Count >

struct	MatrixCoord

struct	MatrixShape
	Describes the size of a matrix tile. More...

struct	Max

struct	maximum

struct	maximum< Array< T, N > >

struct	maximum< float >

struct	Min

struct	minimum

struct	minimum< Array< T, N > >

struct	minimum< float >

struct	minus

struct	minus< Array< half_t, N > >

struct	minus< Array< T, N > >

struct	multiplies

struct	multiplies< Array< half_t, N > >

struct	multiplies< Array< T, N > >

struct	multiply_add
	Fused multiply-add. More...

struct	multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > >
	Fused multiply-add. More...

struct	multiply_add< Array< T, N >, Array< T, N >, Array< T, N > >
	Fused multiply-add. More...

struct	multiply_add< complex< T >, complex< T >, complex< T > >
	Fused multiply-add. More...

struct	multiply_add< complex< T >, T, complex< T > >
	Fused multiply-add. More...

struct	multiply_add< T, complex< T >, complex< T > >
	Fused multiply-add. More...

struct	negate

struct	negate< Array< half_t, N > >

struct	negate< Array< T, N > >

struct	NumericArrayConverter
	Conversion operator for Array. More...

struct	NumericArrayConverter< float, half_t, 2, Round >
	Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. More...

struct	NumericArrayConverter< float, half_t, N, Round >
	Partial specialization for Array<half> <= Array<float> More...

struct	NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest >
	Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest. More...

struct	NumericArrayConverter< half_t, float, N, Round >
	Partial specialization for Array<half> <= Array<float> More...

struct	NumericConverter

struct	NumericConverter< float, half_t, Round >
	Partial specialization for float <= half_t. More...

struct	NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest >
	Specialization for round-to-nearest. More...

struct	NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero >
	Specialization for round-toward-zero. More...

struct	NumericConverter< int8_t, float, Round >

struct	NumericConverter< T, T, Round >
	Partial specialization for float <= half_t. More...

struct	NumericConverterClamp

struct	plus

struct	plus< Array< half_t, N > >

struct	plus< Array< T, N > >

struct	PredicateVector
	Statically sized array of bits implementing. More...

struct	RealType
	Used to determine the real-valued underlying type of a numeric type T. More...

struct	RealType< complex< T > >
	Partial specialization for complex-valued type. More...

struct	ReferenceFactory

struct	ReferenceFactory< Element, false >

struct	ReferenceFactory< Element, true >

struct	ScalarIO
	Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...

class	Semaphore
	CTA-wide semaphore for inter-CTA synchronization. More...

struct	sizeof_bits
	Defines the size of an element in bits. More...

struct	sizeof_bits< Array< T, N, RegisterSized > >
	Statically sized array for any data type. More...

struct	sizeof_bits< bin1_t >
	Defines the size of an element in bits - specialized for bin1_t. More...

struct	sizeof_bits< int4b_t >
	Defines the size of an element in bits - specialized for int4b_t. More...

struct	sizeof_bits< uint1b_t >
	Defines the size of an element in bits - specialized for uint1b_t. More...

struct	sizeof_bits< uint4b_t >
	Defines the size of an element in bits - specialized for uint4b_t. More...

struct	sqrt_est

class	SubbyteReference

struct	Tensor4DCoord
	Defines a canonical 4D coordinate used by tensor operations. More...

class	TensorRef

class	TensorView

struct	TypeTraits

struct	TypeTraits< complex< double > >

struct	TypeTraits< complex< float > >

struct	TypeTraits< complex< half > >

struct	TypeTraits< complex< half_t > >

struct	TypeTraits< double >

struct	TypeTraits< float >

struct	TypeTraits< half_t >

struct	TypeTraits< int >

struct	TypeTraits< int64_t >

struct	TypeTraits< int8_t >

struct	TypeTraits< uint64_t >

struct	TypeTraits< uint8_t >

struct	TypeTraits< unsigned >

struct	xor_add
	Fused multiply-add. More...

Typedefs
using	uint1b_t = integer_subbyte< 1, false >
	1-bit Unsigned integer type More...

using	int4b_t = integer_subbyte< 4, true >
	4-bit Integer type More...

using	uint4b_t = integer_subbyte< 4, false >
	4-bit Unsigned integer type More...

using	bin1_t = bool
	1-bit binary type More...

Enumerations
enum	ComplexTransform { ComplexTransform::kNone, ComplexTransform::kConjugate }
	Enumeraed type describing a transformation on a complex value. More...

enum	Status { Status::kSuccess, Status::kErrorMisalignedOperand, Status::kErrorInvalidLayout, Status::kErrorInvalidProblem, Status::kErrorNotSupported, Status::kErrorWorkspaceNull, Status::kErrorInternal, Status::kInvalid }
	Status code returned by CUTLASS operations. More...

enum	MatrixLayout { MatrixLayout::kColumnMajor, MatrixLayout::kRowMajor }

enum	MatrixTransform { MatrixTransform::kNone, MatrixTransform::kTranspose, MatrixTransform::kConjugate, MatrixTransform::kHermitian }
	Transformation applied to matrix operands. More...

enum	FloatRoundStyle { FloatRoundStyle::round_indeterminate, FloatRoundStyle::round_toward_zero, FloatRoundStyle::round_to_nearest, FloatRoundStyle::round_toward_infinity, FloatRoundStyle::round_toward_neg_infinity, FloatRoundStyle::round_half_ulp_truncate }

Functions
CUTLASS_HOST_DEVICE constexpr bool	ispow2 (unsigned x)
	Returns true if the argument is a power of 2. More...

CUTLASS_HOST_DEVICE constexpr unsigned	floor_pow_2 (unsigned x)
	Returns the largest power of two not greater than the argument. More...

CUTLASS_HOST_DEVICE float const &	real (cuFloatComplex const &z)
	Returns the real part of the complex number. More...

CUTLASS_HOST_DEVICE float &	real (cuFloatComplex &z)
	Returns the real part of the complex number. More...

CUTLASS_HOST_DEVICE double const &	real (cuDoubleComplex const &z)
	Returns the real part of the complex number. More...

CUTLASS_HOST_DEVICE double &	real (cuDoubleComplex &z)
	Returns the real part of the complex number. More...

CUTLASS_HOST_DEVICE float const &	imag (cuFloatComplex const &z)
	Returns the imaginary part of the complex number. More...

CUTLASS_HOST_DEVICE float &	imag (cuFloatComplex &z)
	Returns the imaginary part of the complex number. More...

CUTLASS_HOST_DEVICE double const &	imag (cuDoubleComplex const &z)
	Returns the imaginary part of the complex number. More...

CUTLASS_HOST_DEVICE double &	imag (cuDoubleComplex &z)
	Returns the imaginary part of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T const &	real (complex< T > const &z)
	Returns the real part of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T &	real (complex< T > &z)
	Returns the real part of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T const &	imag (complex< T > const &z)
	Returns the imaginary part of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T &	imag (complex< T > &z)
	Returns the imaginary part of the complex number. More...

template<typename T >
std::ostream &	operator<< (std::ostream &out, complex< T > const &z)

template<typename T >
CUTLASS_HOST_DEVICE T	abs (complex< T > const &z)
	Returns the magnitude of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T	arg (complex< T > const &z)
	Returns the magnitude of the complex number. More...

template<typename T >
CUTLASS_HOST_DEVICE T	norm (T const &z)
	Returns the squared magnitude of a real number. More...

template<>
CUTLASS_HOST_DEVICE int8_t	norm (int8_t const &z)
	Returns the squared magnitude of a real number. More...

template<typename T >
CUTLASS_HOST_DEVICE double	norm (complex< T > const &z)
	Returns the squared magnitude of a complex number. More...

template<typename T , typename R >
CUTLASS_HOST_DEVICE R	norm_accumulate (T const &x, R const &accumulator)
	Norm-accumulate calculation. More...

template<typename T , typename R >
CUTLASS_HOST_DEVICE R	norm_accumulate (complex< T > const &z, R const &accumulator)
	Norm accumulate specialized for complex types. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	conj (complex< T > const &z)
	Returns the complex conjugate. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	proj (complex< T > const &z)
	Projects the complex number z onto the Riemann sphere. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	polar (T const &r, T const &theta=T())
	Returns a complex number with magnitude r and phase theta. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	exp (complex< T > const &z)
	Computes the complex exponential of z. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	log (complex< T > const &z)
	Computes the complex exponential of z. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	log10 (complex< T > const &z)
	Computes the complex exponential of z. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	sqrt (complex< T > const &z)
	Computes the square root of complex number z. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	cos (complex< T > const &z)
	Computes the cosine of complex z. More...

template<typename T >
CUTLASS_HOST_DEVICE complex< T >	sin (complex< T > const &z)
	Computes the sin of complex z. More...

template<>
CUTLASS_HOST_DEVICE cutlass::complex< half_t >	from_real< cutlass::complex< half_t > > (double r)

template<>
CUTLASS_HOST_DEVICE cutlass::complex< float >	from_real< cutlass::complex< float > > (double r)

template<>
CUTLASS_HOST_DEVICE cutlass::complex< double >	from_real< cutlass::complex< double > > (double r)

template<int Rank, typename Index >
CUTLASS_HOST_DEVICE Coord< Rank, Index >	operator/ (Index s, Coord< Rank, Index > coord)
	Scalar division. More...

template<int Rank, typename Index >
CUTLASS_HOST_DEVICE Coord< Rank, Index >	operator/ (Coord< Rank, Index > coord, Index s)
	Scalar division. More...

CUTLASS_HOST_DEVICE Coord< 1 >	make_Coord (int _0)
	Helper to make a 2-element coordinate. More...

CUTLASS_HOST_DEVICE Coord< 2 >	make_Coord (int _0, int _1)
	Helper to make a 2-element coordinate. More...

CUTLASS_HOST_DEVICE Coord< 3 >	make_Coord (int _0, int _1, int _2)
	Helper to make a 3-element coordinate. More...

CUTLASS_HOST_DEVICE Coord< 4 >	make_Coord (int _0, int _1, int _2, int _3)
	Helper to make a 4-element coordinate. More...

template<int Rank>
std::ostream &	operator<< (std::ostream &out, Coord< Rank > const &coord)

std::istream &	operator>> (std::istream &stream, half_t &x)

std::ostream &	operator<< (std::ostream &out, half_t const &x)

template<typename T >
std::ostream &	operator<< (std::ostream &out, ScalarIO< T > const &scalar)
	Default printing to ostream. More...

template<>
std::ostream &	operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar)
	Printing to ostream of int8_t as integer rather than character. More...

template<>
std::ostream &	operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar)
	Printing to ostream of uint8_t as integer rather than character. More...

template<typename Operator >
__global__ void	Kernel (typename Operator::Params params)
	Generic CUTLASS kernel template. More...

template<typename dividend_t , typename divisor_t >
CUTLASS_HOST_DEVICE dividend_t	round_nearest (dividend_t dividend, divisor_t divisor)

template<typename value_t >
CUTLASS_HOST_DEVICE value_t	gcd (value_t a, value_t b)

template<typename value_t >
CUTLASS_HOST_DEVICE value_t	lcm (value_t a, value_t b)

template<typename value_t >
CUTLASS_HOST_DEVICE value_t	clz (value_t x)

template<typename value_t >
CUTLASS_HOST_DEVICE value_t	find_log2 (value_t x)

CUTLASS_HOST_DEVICE void	find_divisor (unsigned int &mul, unsigned int &shr, unsigned int denom)

CUTLASS_HOST_DEVICE void	fast_divmod (int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)

CUTLASS_HOST_DEVICE void	fast_divmod (int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr)

CUTLASS_HOST_DEVICE constexpr int	const_min (int a, int b)

CUTLASS_HOST_DEVICE constexpr int	const_max (int a, int b)

CUTLASS_HOST_DEVICE bool	signbit (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE cutlass::half_t	abs (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE bool	isnan (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE bool	isfinite (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE cutlass::half_t	nanh (const char *)

CUTLASS_HOST_DEVICE bool	isinf (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE bool	isnormal (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE int	fpclassify (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE cutlass::half_t	sqrt (cutlass::half_t const &h)

CUTLASS_HOST_DEVICE half_t	copysign (half_t const &a, half_t const &b)

CUTLASS_HOST_DEVICE bool	operator== (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE bool	operator!= (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE bool	operator< (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE bool	operator<= (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE bool	operator> (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE bool	operator>= (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t	operator+ (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t	operator- (half_t const &lhs)

CUTLASS_HOST_DEVICE half_t	operator- (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t	operator* (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t	operator/ (half_t const &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t &	operator+= (half_t &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t &	operator-= (half_t &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t &	operator*= (half_t &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t &	operator/= (half_t &lhs, half_t const &rhs)

CUTLASS_HOST_DEVICE half_t &	operator++ (half_t &lhs)

CUTLASS_HOST_DEVICE half_t &	operator-- (half_t &lhs)

CUTLASS_HOST_DEVICE half_t	operator++ (half_t &lhs, int)

CUTLASS_HOST_DEVICE half_t	operator-- (half_t &lhs, int)

template<typename T >
CUTLASS_HOST_DEVICE bool	relatively_equal (T a, T b, T epsilon, T nonzero_floor)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint1b_t > (uint1b_t a, uint1b_t b, uint1b_t, uint1b_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< int4b_t > (int4b_t a, int4b_t b, int4b_t, int4b_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint4b_t > (uint4b_t a, uint4b_t b, uint4b_t, uint4b_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< int8_t > (int8_t a, int8_t b, int8_t, int8_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint8_t > (uint8_t a, uint8_t b, uint8_t, uint8_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< int16_t > (int16_t a, int16_t b, int16_t, int16_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint16_t > (uint16_t a, uint16_t b, uint16_t, uint16_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< int32_t > (int32_t a, int32_t b, int32_t, int32_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint32_t > (uint32_t a, uint32_t b, uint32_t, uint32_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< int64_t > (int64_t a, int64_t b, int64_t, int64_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< uint64_t > (uint64_t a, uint64_t b, uint64_t, uint64_t)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< half_t > (half_t a, half_t b, half_t epsilon, half_t nonzero_floor)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< float > (float a, float b, float epsilon, float nonzero_floor)

template<>
CUTLASS_HOST_DEVICE bool	relatively_equal< double > (double a, double b, double epsilon, double nonzero_floor)

template<typename Element , typename Layout >
CUTLASS_HOST_DEVICE TensorRef< Element, Layout >	make_TensorRef (Element *ptr, Layout const &layout)
	Constructs a TensorRef, deducing types from arguments. More...

template<typename Element , typename Layout >
bool	TensorRef_aligned (TensorRef< Element, Layout > const &ref, int alignment)

template<typename Element , typename Layout >
CUTLASS_HOST_DEVICE TensorView< Element, Layout >	make_TensorView (Element *ptr, Layout const &layout, typename Layout::TensorCoord const &extent)
	Constructs a TensorRef, deducing types from arguments. More...

__host__ CUTLASS_DEVICE cudaError_t	cuda_perror_impl (cudaError_t error, const char *filename, int line)
	The corresponding error message is printed to `stderr` (or `stdout` in device code) along with the supplied source context. More...

std::ostream &	operator<< (std::ostream &out, cudaError_t result)
	Writes a cudaError_t to an output stream. More...

std::ostream &	operator<< (std::ostream &out, cuda_exception const &e)
	Writes a cuda_exception instance to an output stream. More...

template<int Interleaved, typename Element , typename Layout >
void	reorder_column (TensorRef< Element, Layout > dest, TensorRef< Element, Layout > src, cutlass::gemm::GemmCoord problem_size)

template<typename Element , typename Layout >
std::ostream &	TensorViewWrite (std::ostream &out, TensorView< Element, Layout > const &view)
	Prints human-readable representation of a TensorView to an ostream. More...

template<typename Element , typename Layout >
std::ostream &	operator<< (std::ostream &out, TensorView< Element, Layout > const &view)
	Prints human-readable representation of a TensorView to an ostream. More...

Typedef Documentation

using cutlass::bin1_t = typedef bool

using cutlass::int4b_t = typedef integer_subbyte<4, true>

using cutlass::uint1b_t = typedef integer_subbyte<1, false>

using cutlass::uint4b_t = typedef integer_subbyte<4, false>

Enumeration Type Documentation

enum cutlass::ComplexTransform

strong

Enumerator
kNone
kConjugate

enum cutlass::FloatRoundStyle

strong

Floating-point rounding style similare to Standard Library's formats but supporting additional rounding options.

Enumerator
round_indeterminate	rounding mode unknown
round_toward_zero	round toward zero
round_to_nearest	round to nearest even
round_toward_infinity	round toward infinity
round_toward_neg_infinity	round toward negative infinity
round_half_ulp_truncate	add 0.5ulp to integer representation then round toward zero

enum cutlass::MatrixLayout

strong

Enumerator
kColumnMajor
kRowMajor

enum cutlass::MatrixTransform

strong

Enumerator
kNone
kTranspose	no operation
kConjugate	transpose operation
kHermitian	conjugate conjugate transpose

enum cutlass::Status

strong

Enumerator
kSuccess	Operation was successful.
kErrorMisalignedOperand	operands fail alignment requirements.
kErrorInvalidLayout	Layout fails alignment requirement.
kErrorInvalidProblem	Specified problem size is not supported by operator.
kErrorNotSupported	Operation is not supported on current device.
kErrorWorkspaceNull	The given workspace is null when it is required to be non-null.
kErrorInternal	An error within CUTLASS occurred.
kInvalid	Status is unspecified.

Function Documentation

template<typename T >

CUTLASS_HOST_DEVICE T cutlass::abs ( complex< T > const & z )

CUTLASS_HOST_DEVICE cutlass::half_t cutlass::abs ( cutlass::half_t const & h )

template<typename T >

CUTLASS_HOST_DEVICE T cutlass::arg ( complex< T > const & z )

template<typename value_t >

CUTLASS_HOST_DEVICE value_t cutlass::clz ( value_t x )

log2 computation, what's the difference between the below codes and log2_up/down codes?

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::conj ( complex< T > const & z )

CUTLASS_HOST_DEVICE constexpr int cutlass::const_max	(	int	a,
		int	b
	)

CUTLASS_HOST_DEVICE constexpr int cutlass::const_min	(	int	a,
		int	b
	)

CUTLASS_HOST_DEVICE half_t cutlass::copysign	(	half_t const &	a,
		half_t const &	b
	)

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::cos ( complex< T > const & z )

__host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl	(	cudaError_t	error,
		const char *	filename,
		int	line
	)

Returns: The CUDA error.

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::exp ( complex< T > const & z )

CUTLASS_HOST_DEVICE void cutlass::fast_divmod	(	int &	quo,
		int &	rem,
		int	src,
		int	div,
		unsigned int	mul,
		unsigned int	shr
	)

Find quotient and remainder using device-side intrinsics

CUTLASS_HOST_DEVICE void cutlass::fast_divmod	(	int &	quo,
		int64_t &	rem,
		int64_t	src,
		int	div,
		unsigned int	mul,
		unsigned int	shr
	)

CUTLASS_HOST_DEVICE void cutlass::find_divisor	(	unsigned int &	mul,
		unsigned int &	shr,
		unsigned int	denom
	)

Find divisor, using find_log2

template<typename value_t >

CUTLASS_HOST_DEVICE value_t cutlass::find_log2 ( value_t x )

CUTLASS_HOST_DEVICE constexpr unsigned cutlass::floor_pow_2 ( unsigned x )

CUTLASS_HOST_DEVICE int cutlass::fpclassify ( cutlass::half_t const & h )

template<>

CUTLASS_HOST_DEVICE cutlass::complex<double> cutlass::from_real< cutlass::complex< double > > ( double r )

template<>

CUTLASS_HOST_DEVICE cutlass::complex<float> cutlass::from_real< cutlass::complex< float > > ( double r )

template<>

CUTLASS_HOST_DEVICE cutlass::complex<half_t> cutlass::from_real< cutlass::complex< half_t > > ( double r )

template<typename value_t >

CUTLASS_HOST_DEVICE value_t cutlass::gcd	(	value_t	a,
		value_t	b
	)

Greatest common divisor

CUTLASS_HOST_DEVICE float const& cutlass::imag ( cuFloatComplex const & z )

CUTLASS_HOST_DEVICE float& cutlass::imag ( cuFloatComplex & z )

CUTLASS_HOST_DEVICE double const& cutlass::imag ( cuDoubleComplex const & z )

CUTLASS_HOST_DEVICE double& cutlass::imag ( cuDoubleComplex & z )

template<typename T >

CUTLASS_HOST_DEVICE T const& cutlass::imag ( complex< T > const & z )

template<typename T >

CUTLASS_HOST_DEVICE T& cutlass::imag ( complex< T > & z )

CUTLASS_HOST_DEVICE bool cutlass::isfinite ( cutlass::half_t const & h )

CUTLASS_HOST_DEVICE bool cutlass::isinf ( cutlass::half_t const & h )

CUTLASS_HOST_DEVICE bool cutlass::isnan ( cutlass::half_t const & h )

CUTLASS_HOST_DEVICE bool cutlass::isnormal ( cutlass::half_t const & h )

CUTLASS_HOST_DEVICE constexpr bool cutlass::ispow2 ( unsigned x )

template<typename Operator >

__global__ void cutlass::Kernel ( typename Operator::Params params )

template<typename value_t >

CUTLASS_HOST_DEVICE value_t cutlass::lcm	(	value_t	a,
		value_t	b
	)

Least common multiple

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::log ( complex< T > const & z )

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::log10 ( complex< T > const & z )

CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord ( int _0 )

CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord	(	int	_0,
		int	_1
	)

CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord	(	int	_0,
		int	_1,
		int	_2
	)

CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord	(	int	_0,
		int	_1,
		int	_2,
		int	_3
	)

template<typename Element , typename Layout >

CUTLASS_HOST_DEVICE TensorRef<Element, Layout> cutlass::make_TensorRef	(	Element *	ptr,
		Layout const &	layout
	)

template<typename Element , typename Layout >

CUTLASS_HOST_DEVICE TensorView<Element, Layout> cutlass::make_TensorView	(	Element *	ptr,
		Layout const &	layout,
		typename Layout::TensorCoord const &	extent
	)

CUTLASS_HOST_DEVICE cutlass::half_t cutlass::nanh ( const char * )

template<typename T >

CUTLASS_HOST_DEVICE T cutlass::norm ( T const & z )

template<>

CUTLASS_HOST_DEVICE int8_t cutlass::norm ( int8_t const & z )

template<typename T >

CUTLASS_HOST_DEVICE double cutlass::norm ( complex< T > const & z )

template<typename T , typename R >

CUTLASS_HOST_DEVICE R cutlass::norm_accumulate	(	T const &	x,
		R const &	accumulator
	)

template<typename T , typename R >

CUTLASS_HOST_DEVICE R cutlass::norm_accumulate	(	complex< T > const &	z,
		R const &	accumulator
	)

CUTLASS_HOST_DEVICE bool cutlass::operator!=	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t cutlass::operator*	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator*=	(	half_t &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t cutlass::operator+	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator++ ( half_t & lhs )

CUTLASS_HOST_DEVICE half_t cutlass::operator++	(	half_t &	lhs,
		int
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator+=	(	half_t &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t cutlass::operator- ( half_t const & lhs )

CUTLASS_HOST_DEVICE half_t cutlass::operator-	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator-- ( half_t & lhs )

CUTLASS_HOST_DEVICE half_t cutlass::operator--	(	half_t &	lhs,
		int
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator-=	(	half_t &	lhs,
		half_t const &	rhs
	)

template<int Rank, typename Index >

CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/	(	Index	s,
		Coord< Rank, Index >	coord
	)

template<int Rank, typename Index >

CUTLASS_HOST_DEVICE Coord<Rank, Index> cutlass::operator/	(	Coord< Rank, Index >	coord,
		Index	s
	)

CUTLASS_HOST_DEVICE half_t cutlass::operator/	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE half_t& cutlass::operator/=	(	half_t &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE bool cutlass::operator<	(	half_t const &	lhs,
		half_t const &	rhs
	)

template<int Rank>

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		Coord< Rank > const &	coord
	)

inline

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		cudaError_t	result
	)

inline

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		cuda_exception const &	e
	)

inline

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		half_t const &	x
	)

inline

template<typename T >

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		ScalarIO< T > const &	scalar
	)

inline

template<>

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		ScalarIO< int8_t > const &	scalar
	)

inline

template<>

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		ScalarIO< uint8_t > const &	scalar
	)

inline

template<typename Element , typename Layout >

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		TensorView< Element, Layout > const &	view
	)

inline

template<typename T >

std::ostream& cutlass::operator<<	(	std::ostream &	out,
		complex< T > const &	z
	)

CUTLASS_HOST_DEVICE bool cutlass::operator<=	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE bool cutlass::operator==	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE bool cutlass::operator>	(	half_t const &	lhs,
		half_t const &	rhs
	)

CUTLASS_HOST_DEVICE bool cutlass::operator>=	(	half_t const &	lhs,
		half_t const &	rhs
	)

std::istream& cutlass::operator>>	(	std::istream &	stream,
		half_t &	x
	)

inline

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::polar	(	T const &	r,
		T const &	theta = `T()`
	)

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::proj ( complex< T > const & z )

CUTLASS_HOST_DEVICE float const& cutlass::real ( cuFloatComplex const & z )

CUTLASS_HOST_DEVICE float& cutlass::real ( cuFloatComplex & z )

CUTLASS_HOST_DEVICE double const& cutlass::real ( cuDoubleComplex const & z )

CUTLASS_HOST_DEVICE double& cutlass::real ( cuDoubleComplex & z )

template<typename T >

CUTLASS_HOST_DEVICE T const& cutlass::real ( complex< T > const & z )

template<typename T >

CUTLASS_HOST_DEVICE T& cutlass::real ( complex< T > & z )

template<typename T >

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal	(	T	a,
		T	b,
		T	epsilon,
		T	nonzero_floor
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< double >	(	double	a,
		double	b,
		double	epsilon,
		double	nonzero_floor
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< float >	(	float	a,
		float	b,
		float	epsilon,
		float	nonzero_floor
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< half_t >	(	half_t	a,
		half_t	b,
		half_t	epsilon,
		half_t	nonzero_floor
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int16_t >	(	int16_t	a,
		int16_t	b,
		int16_t	,
		int16_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int32_t >	(	int32_t	a,
		int32_t	b,
		int32_t	,
		int32_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int4b_t >	(	int4b_t	a,
		int4b_t	b,
		int4b_t	,
		int4b_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int64_t >	(	int64_t	a,
		int64_t	b,
		int64_t	,
		int64_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< int8_t >	(	int8_t	a,
		int8_t	b,
		int8_t	,
		int8_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint16_t >	(	uint16_t	a,
		uint16_t	b,
		uint16_t	,
		uint16_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint1b_t >	(	uint1b_t	a,
		uint1b_t	b,
		uint1b_t	,
		uint1b_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint32_t >	(	uint32_t	a,
		uint32_t	b,
		uint32_t	,
		uint32_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint4b_t >	(	uint4b_t	a,
		uint4b_t	b,
		uint4b_t	,
		uint4b_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint64_t >	(	uint64_t	a,
		uint64_t	b,
		uint64_t	,
		uint64_t
	)

template<>

CUTLASS_HOST_DEVICE bool cutlass::relatively_equal< uint8_t >	(	uint8_t	a,
		uint8_t	b,
		uint8_t	,
		uint8_t
	)

template<int Interleaved, typename Element , typename Layout >

void cutlass::reorder_column	(	TensorRef< Element, Layout >	dest,
		TensorRef< Element, Layout >	src,
		cutlass::gemm::GemmCoord	problem_size
	)

template<typename dividend_t , typename divisor_t >

CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest	(	dividend_t	dividend,
		divisor_t	divisor
	)

Round dividend up to the nearest multiple of divisor

CUTLASS_HOST_DEVICE bool cutlass::signbit ( cutlass::half_t const & h )

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::sin ( complex< T > const & z )

template<typename T >

CUTLASS_HOST_DEVICE complex<T> cutlass::sqrt ( complex< T > const & z )

CUTLASS_HOST_DEVICE cutlass::half_t cutlass::sqrt ( cutlass::half_t const & h )

template<typename Element , typename Layout >

bool cutlass::TensorRef_aligned	(	TensorRef< Element, Layout > const &	ref,
		int	alignment
	)

template<typename Element , typename Layout >

std::ostream& cutlass::TensorViewWrite	(	std::ostream &	out,
		TensorView< Element, Layout > const &	view
	)

inline

Namespaces

Classes

Typedefs

Enumerations

Functions

Typedef Documentation

Enumeration Type Documentation

Function Documentation