CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise > Struct Template Reference

#include <tensor_op_multiplicand_sm75.h>

Public Types

using Index = int32_t
 Index type used for coordinates. More...
 
using LongIndex = int64_t
 Long index type used for offsets. More...
 
using TensorCoord = PitchLinearCoord
 Logical coordinate. More...
 
using Stride = Coord< kStrideRank, Index, LongIndex >
 Stride vector. More...
 
using TileShape = PitchLinearShape< kTileShapeContiguous, kTileShapeStride >
 
using PartitionShape = PitchLinearShape< 4, 4 >
 Fundamental partition shape in units of vectors. More...
 
using PartitionCount = PitchLinearShape< TileShape::kContiguous/PartitionShape::kContiguous, TileShape::kStrided/PartitionShape::kStrided >
 
using AccessCount = PitchLinearShape< PartitionShape::kContiguous, PartitionShape::kStrided >
 

Public Member Functions

CUTLASS_HOST_DEVICE TensorOpMultiplicand (Index ldm=0)
 Ctor. More...
 
CUTLASS_HOST_DEVICE TensorOpMultiplicand (Stride stride)
 Ctor. More...
 
CUTLASS_HOST_DEVICE LongIndex operator() (TensorCoord const &coord) const
 
CUTLASS_HOST_DEVICE Stride stride () const
 Returns the stride of the layout. More...
 
CUTLASS_HOST_DEVICE Stridestride ()
 Returns the stride of the layout. More...
 
CUTLASS_HOST_DEVICE LongIndex capacity (TensorCoord const &extent) const
 

Static Public Member Functions

static CUTLASS_HOST_DEVICE TensorOpMultiplicand packed (TensorCoord const &extent)
 Helper returns a layout to a tightly packed tensor. More...
 

Static Public Attributes

static int const kRank = 2
 Logical rank of tensor. More...
 
static int const kStrideRank = 1
 Rank of stride vector. More...
 
static int const kAccessSize = 128
 This layout is optimized for 128b accesses. More...
 
static int const kElementSize = ElementSize
 
static int const kElementsPerAccess = kAccessSize / kElementSize
 
static int const kCrosswise = Crosswise
 
static int const kTileShapeContiguous = 128 / (kAccessSize / 8)
 
static int const kFactor
 Number of kblocks to store PartitionShape::kContiguous Elements. More...
 
static int const kTileShapeStride
 

Detailed Description

template<int ElementSize, int Crosswise>
struct cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >

Template based on element size (in bits) - defined in terms of pitch-linear memory and Crosswise size (in elements).

Member Typedef Documentation

template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::AccessCount = PitchLinearShape<PartitionShape::kContiguous, PartitionShape::kStrided>
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::Index = int32_t
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::LongIndex = int64_t
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::PartitionShape = PitchLinearShape<4, 4>
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::Stride = Coord<kStrideRank, Index, LongIndex>
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::TensorCoord = PitchLinearCoord
template<int ElementSize, int Crosswise>
using cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::TileShape = PitchLinearShape<kTileShapeContiguous, kTileShapeStride>

Fundamental tile shape in units of vectors For TN kblock=32 and 8x8x16 shapes, TileShape = <8, 4>. For the rest, TileShape = <8, 8>

Constructor & Destructor Documentation

template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::TensorOpMultiplicand ( Index  ldm = 0)
inline
template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::TensorOpMultiplicand ( Stride  stride)
inline

Member Function Documentation

template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE LongIndex cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::capacity ( TensorCoord const &  extent) const
inline

Compute the number of contiguous elements needed to store a tensor with the given size

template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE LongIndex cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::operator() ( TensorCoord const &  coord) const
inline

Returns the offset of a coordinate in linear memory. Assumes coordinate has convention (contiguous, strided)

template<int ElementSize, int Crosswise>
static CUTLASS_HOST_DEVICE TensorOpMultiplicand cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::packed ( TensorCoord const &  extent)
inlinestatic
template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE Stride cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::stride ( ) const
inline
template<int ElementSize, int Crosswise>
CUTLASS_HOST_DEVICE Stride& cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::stride ( )
inline

Member Data Documentation

template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kAccessSize = 128
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kCrosswise = Crosswise
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kElementSize = ElementSize
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kElementsPerAccess = kAccessSize / kElementSize
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kFactor
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kRank = 2
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kStrideRank = 1
static
template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kTileShapeContiguous = 128 / (kAccessSize / 8)
static

Contiguous dimension of the tile shape matches one shared memory cache line - 128B. For 128bit access size, it equals to 8 accesses.

template<int ElementSize, int Crosswise>
int const cutlass::layout::TensorOpMultiplicand< ElementSize, Crosswise >::kTileShapeStride
static
Initial value:

The strided dimension needs to be at least WarpSize(32) / kTileShapeContiguous for a warp to access. To ensure conflict free access, it also needs to be at least (kTileShapeContiguous / kFactor).


The documentation for this struct was generated from the following file: