|
template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > |
__global__ void | Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) |
|
template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > |
__global__ void | BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) |
|
template<typename T > |
__global__ void | TensorInitializeUniform (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| Kernel to initialize tensor to uniform random distribution. More...
|
|
template<typename T > |
__global__ void | TensorInitializeGaussian (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| Kernel to initialize tensor to uniform distribution. More...
|
|
template<typename T > |
__global__ void | TensorInitializeLinear (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| Kernel to initialize tensor to an identity matrix. More...
|
|
template<typename T > |
__global__ void | TensorInitializeIdentity (Distribution dist, int64_t seed, int dim_contiguous, int dim_strided, T *tensor, int ldm) |
| Kernel to initialize tensor to an identity matrix. More...
|
|
template<typename Func , int Rank, typename Params > |
__global__ void | TensorForEach (Coord< Rank > size, Params params=Params()) |
| Kernel calls a functor for each element in a tensor's index space. More...
|
|
template<typename Func , int Rank, typename Params > |
__global__ void | TensorDiagonalForEach (Coord< Rank > size, Params params, int start, int end) |
| Kernel calls a functor for each element along a tensor's diagonal. More...
|
|
template<typename Element , typename Func > |
__global__ void | BlockForEach (Element *ptr, size_t capacity, typename Func::Params params) |
|
template<typename Element > |
__global__ void | BlockCompareEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity) |
|
template<typename Element > |
__global__ void | BlockCompareRelativelyEqual (int *equal, Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor) |
|