CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Namespaces | Functions
simd_sm61.h File Reference

Templates exposing SIMD operators for SM60. More...

#include "simd.h"
Include dependency graph for simd_sm61.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

 cutlass
 
 cutlass::arch
 

Functions

template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int8_t * int8_t) x 4 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint8_t, 4 > const &a, Array< int8_t, 4 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint8_t * int8_t) x 4 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int8_t * uint8_t) x 4 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint8_t, 4 > const &a, Array< uint8_t, 4 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint8_t * uint8_t) x 4 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< int8_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int16_t * int8_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< uint8_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint16_t * int8_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< int16_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< int16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (int16_t * int16_t) x 2 + int32_t. More...
 
template<>
CUTLASS_HOST_DEVICE int32_t cutlass::arch::dot (Array< uint16_t, 2 > const &a, Array< uint16_t, 2 > const &b, int32_t accum)
 Dot product operator - specialized for int32_t <- (uint16_t * int16_t) x 2 + int32_t. More...