CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <tensor_op_multiplicand_sm75.h>
Public Types | |
using | Index = int32_t |
Index type used for coordinates. More... | |
using | LongIndex = int64_t |
Long index type used for offsets. More... | |
using | TensorCoord = MatrixCoord |
Logical coordinate. More... | |
using | Stride = Coord< kStrideRank, Index, LongIndex > |
Stride vector. More... | |
using | Base = TensorOpMultiplicandCrosswise< ElementSize, Crosswise > |
using | TileShape = typename Base::TileShape |
using | PartitionShape = typename Base::PartitionShape |
using | PartitionCount = typename Base::PartitionCount |
using | AccessCount = typename Base::AccessCount |
Public Member Functions | |
CUTLASS_HOST_DEVICE | ColumnMajorTensorOpMultiplicandCrosswise (Index ldm=0) |
Ctor. More... | |
CUTLASS_HOST_DEVICE | ColumnMajorTensorOpMultiplicandCrosswise (Stride stride) |
Ctor. More... | |
CUTLASS_HOST_DEVICE LongIndex | operator() (TensorCoord const &coord) const |
CUTLASS_HOST_DEVICE TensorCoord | inverse (LongIndex offset) const |
Inverse of layout function, mapping linear offset to logical coordinate. More... | |
CUTLASS_HOST_DEVICE Stride | stride () const |
Returns the stride of the layout. More... | |
CUTLASS_HOST_DEVICE Stride & | stride () |
Returns the stride of the layout. More... | |
CUTLASS_HOST_DEVICE LongIndex | capacity (TensorCoord const &extent) const |
Static Public Member Functions | |
static CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise | packed (TensorCoord const &extent) |
Helper returns a layout to a tightly packed tensor. More... | |
Static Public Attributes | |
static int const | kRank = 2 |
Logical rank of tensor. More... | |
static int const | kStrideRank = 1 |
Rank of stride vector. More... | |
static int const | kAccessSize = Base::kAccessSize |
This layout is optimized for 128b accesses. More... | |
static int const | kElementSize = Base::kElementSize |
static int const | kElementsPerAccess = Base::kElementsPerAccess |
Template mapping a column-major view of pitch-linear memory to TensorOpMultiplicandCrosswise
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::AccessCount = typename Base::AccessCount |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::Base = TensorOpMultiplicandCrosswise<ElementSize, Crosswise> |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::Index = int32_t |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::LongIndex = int64_t |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::PartitionCount = typename Base::PartitionCount |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::PartitionShape = typename Base::PartitionShape |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::Stride = Coord<kStrideRank, Index, LongIndex> |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::TensorCoord = MatrixCoord |
using cutlass::layout::ColumnMajorTensorOpMultiplicandCrosswise< ElementSize, Crosswise >::TileShape = typename Base::TileShape |
|
inline |
|
inline |
|
inline |
Compute the number of contiguous elements needed to store a tensor with the given size
|
inline |
|
inline |
Returns the offset of a coordinate in linear memory. Assumes coordinate has convention (contiguous, strided)
|
inlinestatic |
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
|
static |