|
class | AlignedArray |
| Aligned array type. More...
|
|
struct | AlignedBuffer |
| Modifies semantics of cutlass::Array<> to provide guaranteed alignment. More...
|
|
class | Array< T, N, false > |
| Statically sized array for any data type. More...
|
|
class | Array< T, N, true > |
| Statically sized array for any data type. More...
|
|
struct | CommandLine |
|
class | complex |
|
class | ConstSubbyteReference |
|
struct | Coord |
| Statically-sized array specifying Coords within a tensor. More...
|
|
class | cuda_exception |
| C++ exception wrapper for CUDA cudaError_t . More...
|
|
struct | Distribution |
| Distribution type. More...
|
|
struct | divide_assert |
|
struct | divides |
|
struct | divides< Array< half_t, N > > |
|
struct | divides< Array< T, N > > |
|
struct | FloatType |
| Defines a floating-point type based on the number of exponent and mantissa bits. More...
|
|
struct | FloatType< 11, 52 > |
|
struct | FloatType< 5, 10 > |
|
struct | FloatType< 8, 23 > |
|
struct | half_t |
| IEEE half-precision floating-point type. More...
|
|
class | HostTensor |
| Host tensor. More...
|
|
class | IdentityTensorLayout |
|
struct | integer_subbyte |
| 4-bit signed integer type More...
|
|
struct | IntegerType |
| Defines integers based on size and whether they are signed. More...
|
|
struct | IntegerType< 1, false > |
|
struct | IntegerType< 1, true > |
|
struct | IntegerType< 16, false > |
|
struct | IntegerType< 16, true > |
|
struct | IntegerType< 32, false > |
|
struct | IntegerType< 32, true > |
|
struct | IntegerType< 4, false > |
|
struct | IntegerType< 4, true > |
|
struct | IntegerType< 64, false > |
|
struct | IntegerType< 64, true > |
|
struct | IntegerType< 8, false > |
|
struct | IntegerType< 8, true > |
|
struct | is_pow2 |
|
struct | KernelLaunchConfiguration |
| Structure containing the basic launch configuration of a CUDA kernel. More...
|
|
struct | log2_down |
|
struct | log2_down< N, 1, Count > |
|
struct | log2_up |
|
struct | log2_up< N, 1, Count > |
|
struct | MatrixCoord |
|
struct | MatrixShape |
| Describes the size of a matrix tile. More...
|
|
struct | Max |
|
struct | maximum |
|
struct | maximum< Array< T, N > > |
|
struct | maximum< float > |
|
struct | Min |
|
struct | minimum |
|
struct | minimum< Array< T, N > > |
|
struct | minimum< float > |
|
struct | minus |
|
struct | minus< Array< half_t, N > > |
|
struct | minus< Array< T, N > > |
|
struct | multiplies |
|
struct | multiplies< Array< half_t, N > > |
|
struct | multiplies< Array< T, N > > |
|
struct | multiply_add |
| Fused multiply-add. More...
|
|
struct | multiply_add< Array< half_t, N >, Array< half_t, N >, Array< half_t, N > > |
| Fused multiply-add. More...
|
|
struct | multiply_add< Array< T, N >, Array< T, N >, Array< T, N > > |
| Fused multiply-add. More...
|
|
struct | multiply_add< complex< T >, complex< T >, complex< T > > |
| Fused multiply-add. More...
|
|
struct | multiply_add< complex< T >, T, complex< T > > |
| Fused multiply-add. More...
|
|
struct | multiply_add< T, complex< T >, complex< T > > |
| Fused multiply-add. More...
|
|
struct | negate |
|
struct | negate< Array< half_t, N > > |
|
struct | negate< Array< T, N > > |
|
struct | NumericArrayConverter |
| Conversion operator for Array. More...
|
|
struct | NumericArrayConverter< float, half_t, 2, Round > |
| Partial specialization for Array<float, 2> <= Array<half_t, 2>, round to nearest. More...
|
|
struct | NumericArrayConverter< float, half_t, N, Round > |
| Partial specialization for Array<half> <= Array<float> More...
|
|
struct | NumericArrayConverter< half_t, float, 2, FloatRoundStyle::round_to_nearest > |
| Partial specialization for Array<half, 2> <= Array<float, 2>, round to nearest. More...
|
|
struct | NumericArrayConverter< half_t, float, N, Round > |
| Partial specialization for Array<half> <= Array<float> More...
|
|
struct | NumericConverter |
|
struct | NumericConverter< float, half_t, Round > |
| Partial specialization for float <= half_t. More...
|
|
struct | NumericConverter< half_t, float, FloatRoundStyle::round_to_nearest > |
| Specialization for round-to-nearest. More...
|
|
struct | NumericConverter< half_t, float, FloatRoundStyle::round_toward_zero > |
| Specialization for round-toward-zero. More...
|
|
struct | NumericConverter< int8_t, float, Round > |
|
struct | NumericConverter< T, T, Round > |
| Partial specialization for float <= half_t. More...
|
|
struct | NumericConverterClamp |
|
struct | plus |
|
struct | plus< Array< half_t, N > > |
|
struct | plus< Array< T, N > > |
|
struct | PredicateVector |
| Statically sized array of bits implementing. More...
|
|
struct | RealType |
| Used to determine the real-valued underlying type of a numeric type T. More...
|
|
struct | RealType< complex< T > > |
| Partial specialization for complex-valued type. More...
|
|
struct | ReferenceFactory |
|
struct | ReferenceFactory< Element, false > |
|
struct | ReferenceFactory< Element, true > |
|
struct | ScalarIO |
| Helper to enable formatted printing of CUTLASS scalar types to an ostream. More...
|
|
class | Semaphore |
| CTA-wide semaphore for inter-CTA synchronization. More...
|
|
struct | sizeof_bits |
| Defines the size of an element in bits. More...
|
|
struct | sizeof_bits< Array< T, N, RegisterSized > > |
| Statically sized array for any data type. More...
|
|
struct | sizeof_bits< bin1_t > |
| Defines the size of an element in bits - specialized for bin1_t. More...
|
|
struct | sizeof_bits< int4b_t > |
| Defines the size of an element in bits - specialized for int4b_t. More...
|
|
struct | sizeof_bits< uint1b_t > |
| Defines the size of an element in bits - specialized for uint1b_t. More...
|
|
struct | sizeof_bits< uint4b_t > |
| Defines the size of an element in bits - specialized for uint4b_t. More...
|
|
struct | sqrt_est |
|
class | SubbyteReference |
|
struct | Tensor4DCoord |
| Defines a canonical 4D coordinate used by tensor operations. More...
|
|
class | TensorRef |
|
class | TensorView |
|
struct | TypeTraits |
|
struct | TypeTraits< complex< double > > |
|
struct | TypeTraits< complex< float > > |
|
struct | TypeTraits< complex< half > > |
|
struct | TypeTraits< complex< half_t > > |
|
struct | TypeTraits< double > |
|
struct | TypeTraits< float > |
|
struct | TypeTraits< half_t > |
|
struct | TypeTraits< int > |
|
struct | TypeTraits< int64_t > |
|
struct | TypeTraits< int8_t > |
|
struct | TypeTraits< uint64_t > |
|
struct | TypeTraits< uint8_t > |
|
struct | TypeTraits< unsigned > |
|
struct | xor_add |
| Fused multiply-add. More...
|
|
|
CUTLASS_HOST_DEVICE constexpr bool | ispow2 (unsigned x) |
| Returns true if the argument is a power of 2. More...
|
|
CUTLASS_HOST_DEVICE constexpr unsigned | floor_pow_2 (unsigned x) |
| Returns the largest power of two not greater than the argument. More...
|
|
CUTLASS_HOST_DEVICE float const & | real (cuFloatComplex const &z) |
| Returns the real part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE float & | real (cuFloatComplex &z) |
| Returns the real part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE double const & | real (cuDoubleComplex const &z) |
| Returns the real part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE double & | real (cuDoubleComplex &z) |
| Returns the real part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE float const & | imag (cuFloatComplex const &z) |
| Returns the imaginary part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE float & | imag (cuFloatComplex &z) |
| Returns the imaginary part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE double const & | imag (cuDoubleComplex const &z) |
| Returns the imaginary part of the complex number. More...
|
|
CUTLASS_HOST_DEVICE double & | imag (cuDoubleComplex &z) |
| Returns the imaginary part of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T const & | real (complex< T > const &z) |
| Returns the real part of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T & | real (complex< T > &z) |
| Returns the real part of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T const & | imag (complex< T > const &z) |
| Returns the imaginary part of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T & | imag (complex< T > &z) |
| Returns the imaginary part of the complex number. More...
|
|
template<typename T > |
std::ostream & | operator<< (std::ostream &out, complex< T > const &z) |
|
template<typename T > |
CUTLASS_HOST_DEVICE T | abs (complex< T > const &z) |
| Returns the magnitude of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T | arg (complex< T > const &z) |
| Returns the magnitude of the complex number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE T | norm (T const &z) |
| Returns the squared magnitude of a real number. More...
|
|
template<> |
CUTLASS_HOST_DEVICE int8_t | norm (int8_t const &z) |
| Returns the squared magnitude of a real number. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE double | norm (complex< T > const &z) |
| Returns the squared magnitude of a complex number. More...
|
|
template<typename T , typename R > |
CUTLASS_HOST_DEVICE R | norm_accumulate (T const &x, R const &accumulator) |
| Norm-accumulate calculation. More...
|
|
template<typename T , typename R > |
CUTLASS_HOST_DEVICE R | norm_accumulate (complex< T > const &z, R const &accumulator) |
| Norm accumulate specialized for complex types. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | conj (complex< T > const &z) |
| Returns the complex conjugate. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | proj (complex< T > const &z) |
| Projects the complex number z onto the Riemann sphere. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | polar (T const &r, T const &theta=T()) |
| Returns a complex number with magnitude r and phase theta. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | exp (complex< T > const &z) |
| Computes the complex exponential of z. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | log (complex< T > const &z) |
| Computes the complex exponential of z. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | log10 (complex< T > const &z) |
| Computes the complex exponential of z. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | sqrt (complex< T > const &z) |
| Computes the square root of complex number z. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | cos (complex< T > const &z) |
| Computes the cosine of complex z. More...
|
|
template<typename T > |
CUTLASS_HOST_DEVICE complex< T > | sin (complex< T > const &z) |
| Computes the sin of complex z. More...
|
|
template<> |
CUTLASS_HOST_DEVICE cutlass::complex< half_t > | from_real< cutlass::complex< half_t > > (double r) |
|
template<> |
CUTLASS_HOST_DEVICE cutlass::complex< float > | from_real< cutlass::complex< float > > (double r) |
|
template<> |
CUTLASS_HOST_DEVICE cutlass::complex< double > | from_real< cutlass::complex< double > > (double r) |
|
template<int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Index s, Coord< Rank, Index > coord) |
| Scalar division. More...
|
|
template<int Rank, typename Index > |
CUTLASS_HOST_DEVICE Coord< Rank, Index > | operator/ (Coord< Rank, Index > coord, Index s) |
| Scalar division. More...
|
|
CUTLASS_HOST_DEVICE Coord< 1 > | make_Coord (int _0) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 2 > | make_Coord (int _0, int _1) |
| Helper to make a 2-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 3 > | make_Coord (int _0, int _1, int _2) |
| Helper to make a 3-element coordinate. More...
|
|
CUTLASS_HOST_DEVICE Coord< 4 > | make_Coord (int _0, int _1, int _2, int _3) |
| Helper to make a 4-element coordinate. More...
|
|
template<int Rank> |
std::ostream & | operator<< (std::ostream &out, Coord< Rank > const &coord) |
|
std::istream & | operator>> (std::istream &stream, half_t &x) |
|
std::ostream & | operator<< (std::ostream &out, half_t const &x) |
|
template<typename T > |
std::ostream & | operator<< (std::ostream &out, ScalarIO< T > const &scalar) |
| Default printing to ostream. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< int8_t > const &scalar) |
| Printing to ostream of int8_t as integer rather than character. More...
|
|
template<> |
std::ostream & | operator<< (std::ostream &out, ScalarIO< uint8_t > const &scalar) |
| Printing to ostream of uint8_t as integer rather than character. More...
|
|
template<typename Operator > |
__global__ void | Kernel (typename Operator::Params params) |
| Generic CUTLASS kernel template. More...
|
|
template<typename dividend_t , typename divisor_t > |
CUTLASS_HOST_DEVICE dividend_t | round_nearest (dividend_t dividend, divisor_t divisor) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | gcd (value_t a, value_t b) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | lcm (value_t a, value_t b) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | clz (value_t x) |
|
template<typename value_t > |
CUTLASS_HOST_DEVICE value_t | find_log2 (value_t x) |
|
CUTLASS_HOST_DEVICE void | find_divisor (unsigned int &mul, unsigned int &shr, unsigned int denom) |
|
CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr) |
|
CUTLASS_HOST_DEVICE void | fast_divmod (int &quo, int64_t &rem, int64_t src, int div, unsigned int mul, unsigned int shr) |
|
CUTLASS_HOST_DEVICE constexpr int | const_min (int a, int b) |
|
CUTLASS_HOST_DEVICE constexpr int | const_max (int a, int b) |
|
CUTLASS_HOST_DEVICE bool | signbit (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE cutlass::half_t | abs (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE bool | isnan (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE bool | isfinite (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE cutlass::half_t | nanh (const char *) |
|
CUTLASS_HOST_DEVICE bool | isinf (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE bool | isnormal (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE int | fpclassify (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE cutlass::half_t | sqrt (cutlass::half_t const &h) |
|
CUTLASS_HOST_DEVICE half_t | copysign (half_t const &a, half_t const &b) |
|
CUTLASS_HOST_DEVICE bool | operator== (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE bool | operator!= (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE bool | operator< (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE bool | operator<= (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE bool | operator> (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE bool | operator>= (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t | operator+ (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs) |
|
CUTLASS_HOST_DEVICE half_t | operator- (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t | operator* (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t | operator/ (half_t const &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator+= (half_t &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator-= (half_t &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator*= (half_t &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator/= (half_t &lhs, half_t const &rhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator++ (half_t &lhs) |
|
CUTLASS_HOST_DEVICE half_t & | operator-- (half_t &lhs) |
|
CUTLASS_HOST_DEVICE half_t | operator++ (half_t &lhs, int) |
|
CUTLASS_HOST_DEVICE half_t | operator-- (half_t &lhs, int) |
|
template<typename T > |
CUTLASS_HOST_DEVICE bool | relatively_equal (T a, T b, T epsilon, T nonzero_floor) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint1b_t > (uint1b_t a, uint1b_t b, uint1b_t, uint1b_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< int4b_t > (int4b_t a, int4b_t b, int4b_t, int4b_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint4b_t > (uint4b_t a, uint4b_t b, uint4b_t, uint4b_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< int8_t > (int8_t a, int8_t b, int8_t, int8_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint8_t > (uint8_t a, uint8_t b, uint8_t, uint8_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< int16_t > (int16_t a, int16_t b, int16_t, int16_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint16_t > (uint16_t a, uint16_t b, uint16_t, uint16_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< int32_t > (int32_t a, int32_t b, int32_t, int32_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint32_t > (uint32_t a, uint32_t b, uint32_t, uint32_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< int64_t > (int64_t a, int64_t b, int64_t, int64_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< uint64_t > (uint64_t a, uint64_t b, uint64_t, uint64_t) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< half_t > (half_t a, half_t b, half_t epsilon, half_t nonzero_floor) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< float > (float a, float b, float epsilon, float nonzero_floor) |
|
template<> |
CUTLASS_HOST_DEVICE bool | relatively_equal< double > (double a, double b, double epsilon, double nonzero_floor) |
|
template<typename Element , typename Layout > |
CUTLASS_HOST_DEVICE TensorRef< Element, Layout > | make_TensorRef (Element *ptr, Layout const &layout) |
| Constructs a TensorRef, deducing types from arguments. More...
|
|
template<typename Element , typename Layout > |
bool | TensorRef_aligned (TensorRef< Element, Layout > const &ref, int alignment) |
|
template<typename Element , typename Layout > |
CUTLASS_HOST_DEVICE TensorView< Element, Layout > | make_TensorView (Element *ptr, Layout const &layout, typename Layout::TensorCoord const &extent) |
| Constructs a TensorRef, deducing types from arguments. More...
|
|
__host__ CUTLASS_DEVICE cudaError_t | cuda_perror_impl (cudaError_t error, const char *filename, int line) |
| The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
|
|
std::ostream & | operator<< (std::ostream &out, cudaError_t result) |
| Writes a cudaError_t to an output stream. More...
|
|
std::ostream & | operator<< (std::ostream &out, cuda_exception const &e) |
| Writes a cuda_exception instance to an output stream. More...
|
|
template<int Interleaved, typename Element , typename Layout > |
void | reorder_column (TensorRef< Element, Layout > dest, TensorRef< Element, Layout > src, cutlass::gemm::GemmCoord problem_size) |
|
template<typename Element , typename Layout > |
std::ostream & | TensorViewWrite (std::ostream &out, TensorView< Element, Layout > const &view) |
| Prints human-readable representation of a TensorView to an ostream. More...
|
|
template<typename Element , typename Layout > |
std::ostream & | operator<< (std::ostream &out, TensorView< Element, Layout > const &view) |
| Prints human-readable representation of a TensorView to an ostream. More...
|
|