40 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate with a 42 template <
typename Atype,
typename Btype,
typename Ctype>
45 return Ctype(a) * Ctype(b) + c;
51 int inner_product<Array<bin1_t, 32>, Array<bin1_t, 32>,
int>(
57 for (
int bit = 0; bit < 32; bit++) {
58 accum += a[bit] ^ b[bit];
97 template <
typename SrcType,
typename DstType>
100 #pragma hd_warning_disable // Suppresses warnings when attempting to instantiate complex<T> with a 103 static DstType
apply(SrcType src) {
return static_cast<DstType
>(src); };
111 return static_cast<int8_t
>(fmaxf(-128.f, fminf(127.f, src)));
120 return static_cast<uint8_t
>(fmaxf(0.f, fminf(255.f, src)));
Definition: aligned_buffer.h:35
static CUTLASS_HOST_DEVICE DstType apply(SrcType src)
Definition: inner_product.h:103
CUTLASS_HOST_DEVICE Ctype inner_product(Atype a, Btype b, Ctype c)
Template function to compute an inner product.
Definition: inner_product.h:44
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
static CUTLASS_HOST_DEVICE uint8_t apply(float src)
Definition: inner_product.h:118
Definition: inner_product.h:98
Basic include for CUTLASS.
static CUTLASS_HOST_DEVICE int8_t apply(float src)
Definition: inner_product.h:109