32 #include "../numeric_types.h" 44 template <
typename T,
int N>
45 Array<T, N>
operator*(Array<T, N>
const &a, Array<T, N>
const &b) {
48 for (
int i = 0; i < N; ++i) {
55 template <
typename T,
int N>
56 Array<T, N>
operator+(Array<T, N>
const &a, Array<T, N>
const &b) {
59 for (
int i = 0; i < N; ++i) {
66 template <
typename T,
int N>
67 Array<T, N>
operator-(Array<T, N>
const &a, Array<T, N>
const &b) {
70 for (
int i = 0; i < N; ++i) {
83 template <
typename T,
int N>
84 Array<T, N>
mac(Array<T, N>
const &a, Array<T, N>
const &b, Array<T, N>
const &c) {
87 for (
int i = 0; i < N; ++i) {
88 d[i] = a[i] * b[i] + c;
100 template <
typename Element,
typename Accumulator,
int N>
101 Accumulator
dot(Array<T, N>
const &a, Array<T, N>
const &b, Accumulator accum) {
103 for (
int i = 0; i < N; ++i) {
104 accum += a[i] * b[i];
Definition: aligned_buffer.h:35
Templates exposing SIMD operators for SM60.
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_HOST_DEVICE Array< T, N > operator-(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:67
CUTLASS_HOST_DEVICE Array< T, N > operator*(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:45
CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)
Definition: simd.h:101
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE Array< T, N > operator+(Array< T, N > const &a, Array< T, N > const &b)
Definition: simd.h:56
Templates exposing SIMD operators for SM60.
CUTLASS_HOST_DEVICE Array< T, N > mac(Array< T, N > const &a, Array< T, N > const &b, Array< T, N > const &c)
Definition: simd.h:84