CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Templates exposing SIMD operators for SM60. More...
#include "simd.h"
Go to the source code of this file.
Namespaces | |
cutlass | |
cutlass::arch | |
Functions | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int8_t * int8_t) x 4 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint8_t * int8_t) x 4 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int8_t * uint8_t) x 4 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint8_t * uint8_t) x 4 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More... | |
template<> | |
CUTLASS_HOST_DEVICE int32_t | cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum) |
Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More... | |