CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment > Class Template Reference

#include <shared_load_iterator.h>

Public Types

using ThreadMap = ThreadMap_
 
using Shape = typename ThreadMap::Shape
 
using Element = Element_
 
using Layout = layout::RowMajor
 
using TensorRef = TensorRef< Element, Layout >
 
using ConstTensorRef = typename TensorRef::ConstTensorRef
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorCoord = MatrixCoord
 
using Fragment = Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess >
 Fragment object. More...
 
using AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment >
 Memory access size. More...
 

Public Member Functions

CUTLASS_DEVICE SharedLoadIterator (TensorRef ref, int thread_idx)
 Constructor. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_DEVICE void add_tile_offset (TensorCoord const &offset)
 
CUTLASS_DEVICE void load_with_pointer_offset (Fragment &frag, Index pointer_offset)
 Loads a fragment from memory. More...
 
CUTLASS_DEVICE void load (Fragment &frag)
 Loads a fragment. More...
 

Static Public Attributes

static int const kElementsPerAccess = ThreadMap::kElementsPerAccess
 
static int const kMinAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8
 
static int const kAlignment = (MaxAlignment < kMinAlignment ? MaxAlignment : kMinAlignment)
 
static int const kThreads = ThreadMap::kThreads
 

Detailed Description

template<typename ThreadMap_, typename Element_, int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
class cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >

Tile iterator used to load output tile from shared memory in epilogue.

Satisfies: ReadableTileIterator

Member Typedef Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess, kAlignment>
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ConstTensorRef = typename TensorRef::ConstTensorRef
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Element = Element_
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Fragment = Array< Element, ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess>
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Index = typename Layout::Index
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Layout = layout::RowMajor
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::LongIndex = typename Layout::LongIndex
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::Shape = typename ThreadMap::Shape
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorCoord = MatrixCoord
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::TensorRef = TensorRef<Element, Layout>
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
using cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::ThreadMap = ThreadMap_

Constructor & Destructor Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
CUTLASS_DEVICE cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::SharedLoadIterator ( TensorRef  ref,
int  thread_idx 
)
inline

Member Function Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::add_tile_offset ( TensorCoord const &  offset)
inline
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::load ( Fragment frag)
inline
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::load_with_pointer_offset ( Fragment frag,
Index  pointer_offset 
)
inline

Member Data Documentation

template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kAlignment = (MaxAlignment < kMinAlignment ? MaxAlignment : kMinAlignment)
static
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kElementsPerAccess = ThreadMap::kElementsPerAccess
static
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kMinAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8
static
template<typename ThreadMap_ , typename Element_ , int MaxAlignment = ThreadMap_::kElementsPerAccess * sizeof_bits<Element_>::value / 8>
int const cutlass::epilogue::threadblock::SharedLoadIterator< ThreadMap_, Element_, MaxAlignment >::kThreads = ThreadMap::kThreads
static

The documentation for this class was generated from the following file: