CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ > Class Template Reference

#include <predicated_tile_iterator.h>

Classes

struct  Mask
 Mask object. More...
 
struct  Params
 

Public Types

using ThreadMap = ThreadMap_
 
using Shape = typename ThreadMap::Shape
 
using Element = Element_
 
using Layout = layout::RowMajor
 
using TensorRef = TensorRef< Element, Layout >
 
using ConstTensorRef = typename TensorRef::ConstTensorRef
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorCoord = MatrixCoord
 
using Fragment = Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess >
 Fragment object. More...
 
using AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess >
 Memory access size. More...
 

Public Member Functions

CUTLASS_DEVICE PredicatedTileIterator (Params const &params, Element *pointer, TensorCoord extent, int thread_idx, TensorCoord threadblock_offset=TensorCoord())
 Constructor. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_DEVICE void load (Fragment &frag)
 Loads a fragment from memory. More...
 
CUTLASS_DEVICE void store (Fragment const &frag)
 Stores a fragment to memory. More...
 
CUTLASS_HOST_DEVICE PredicatedTileIteratoroperator++ ()
 Advances to the next position to load or store. More...
 
CUTLASS_DEVICE void clear_mask ()
 Efficiently enables all accesses guarded by mask. More...
 
CUTLASS_DEVICE void enable_mask ()
 Sets the mask. More...
 
CUTLASS_DEVICE void get_mask (Mask &mask)
 Sets the mask. More...
 
CUTLASS_DEVICE void set_mask (Mask const &mask)
 

Static Public Attributes

static int const kElementsPerAccess = ThreadMap::kElementsPerAccess
 
static int const kThreads = ThreadMap::kThreads
 
static int const kIterations = ThreadMap::Count::kTile
 

Detailed Description

template<typename ThreadMap_, typename Element_>
class cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >

Tile iterator used to load output tile from shared memory in epilogue.

Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator

Member Typedef Documentation

template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess>
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::ConstTensorRef = typename TensorRef::ConstTensorRef
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Element = Element_
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Fragment = Array< Element, ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess>
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Index = typename Layout::Index
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Layout = layout::RowMajor
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::LongIndex = typename Layout::LongIndex
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Shape = typename ThreadMap::Shape
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::TensorCoord = MatrixCoord
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::TensorRef = TensorRef<Element, Layout>
template<typename ThreadMap_ , typename Element_ >
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::ThreadMap = ThreadMap_

Constructor & Destructor Documentation

template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::PredicatedTileIterator ( Params const &  params,
Element pointer,
TensorCoord  extent,
int  thread_idx,
TensorCoord  threadblock_offset = TensorCoord() 
)
inline

Member Function Documentation

template<typename ThreadMap_ , typename Element_ >
CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::clear_mask ( )
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::enable_mask ( )
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::get_mask ( Mask mask)
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::load ( Fragment frag)
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_HOST_DEVICE PredicatedTileIterator& cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::operator++ ( )
inline

Efficiently disables all accesses guarded by mask

template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::set_mask ( Mask const &  mask)
inline
template<typename ThreadMap_ , typename Element_ >
CUTLASS_DEVICE void cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::store ( Fragment const &  frag)
inline

Member Data Documentation

template<typename ThreadMap_ , typename Element_ >
int const cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::kElementsPerAccess = ThreadMap::kElementsPerAccess
static
template<typename ThreadMap_ , typename Element_ >
int const cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::kIterations = ThreadMap::Count::kTile
static
template<typename ThreadMap_ , typename Element_ >
int const cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::kThreads = ThreadMap::kThreads
static

The documentation for this class was generated from the following file: