44 Array<half_t, 2>
operator*(Array<half_t, 2>
const &a, Array<half_t, 2>
const &b) {
54 Array<half_t, 2>
operator+(AArray<half_t, 2>
const &a, Array<half_t, 2>
const &b) {
64 Array<half_t, 2>
operator-(Array<half_t, 2>
const &a, Array<half_t, 2>
const &b) {
77 Array<half_t, 2>
mac(Array<half_t, 2>
const &a, Array<half_t, 2>
const &b, Array<half_t, 2>
const &c) {
100 float dot(Array<half_t, 2>
const &a, Array<half_t, 2>
const &b,
float accum) {
Definition: aligned_buffer.h:35
IEEE half-precision floating-point type.
Definition: half.h:126
CUTLASS_HOST_DEVICE Array< T, N > operator-(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:67
CUTLASS_HOST_DEVICE Array< T, N > operator*(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:45
CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)
Definition: simd.h:101
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE Array< T, N > operator+(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:56
Templates exposing SIMD operators.
CUTLASS_HOST_DEVICE Array< T, N > mac(Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c)
Definition: simd.h:84