CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK > Class Template Reference

#include <predicated_tile_iterator.h>

Classes

struct  Mask
 Mask object. More...
 
struct  Params
 

Public Types

using ThreadMap = ThreadMap_
 
using Element = Element_
 
using Layout = layout::ColumnMajorInterleaved< InterleavedK >
 
using TensorRef = TensorRef< Element, Layout >
 
using ConstTensorRef = typename TensorRef::ConstTensorRef
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorCoord = layout::PitchLinearCoord
 
using Fragment = Array< Element, ThreadMap::kElementsPerAccess >
 Fragment object. More...
 
using AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess >
 Memory access size. More...
 

Public Member Functions

CUTLASS_DEVICE InterleavedPredicatedTileIterator (Params const &params, Element *pointer, TensorCoord extent, int thread_idx, TensorCoord threadblock_offset)
 Constructor. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_DEVICE void load (Fragment &frag)
 Loads a fragment from memory. More...
 
CUTLASS_DEVICE void store (Fragment const &frag)
 Stores a fragment to memory. More...
 
CUTLASS_HOST_DEVICE void set_iteration_index (int iteration)
 Overrides the internal iteration index. More...
 
CUTLASS_HOST_DEVICE InterleavedPredicatedTileIteratoroperator++ ()
 Advances to the next position to load or store. More...
 
CUTLASS_DEVICE void clear_mask ()
 Efficiently enables all accesses guarded by mask. More...
 
CUTLASS_DEVICE void enable_mask ()
 Sets the mask. More...
 
CUTLASS_DEVICE void get_mask (Mask &mask)
 Sets the mask. More...
 
CUTLASS_DEVICE void set_mask (Mask const &mask)
 

Static Public Attributes

static int const kElementsPerAccess = ThreadMap::kElementsPerAccess
 
static int const kThreads = ThreadMap::kThreads
 
static int const kIterations = ThreadMap::Iterations::kCount
 

Detailed Description

template<typename ThreadMap_, typename Element_, int InterleavedK>
class cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >

Tile iterator used to load output tile from shared memory in epilogue.

Satisfies: ReadableTileIterator | InterleavedPredicatedTileIterator | ForwardTileIterator

Member Typedef Documentation

template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess>
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::ConstTensorRef = typename TensorRef::ConstTensorRef
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Element = Element_
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Fragment = Array<Element, ThreadMap::kElementsPerAccess>
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Index = typename Layout::Index
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::Layout = layout::ColumnMajorInterleaved<InterleavedK>
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::LongIndex = typename Layout::LongIndex
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::TensorCoord = layout::PitchLinearCoord
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::TensorRef = TensorRef<Element, Layout>
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
using cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::ThreadMap = ThreadMap_

Constructor & Destructor Documentation

template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::InterleavedPredicatedTileIterator ( Params const &  params,
Element pointer,
TensorCoord  extent,
int  thread_idx,
TensorCoord  threadblock_offset 
)
inline

Member Function Documentation

template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::clear_mask ( )
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::enable_mask ( )
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::get_mask ( Mask mask)
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::load ( Fragment frag)
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_HOST_DEVICE InterleavedPredicatedTileIterator& cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::operator++ ( )
inline

Efficiently disables all accesses guarded by mask

template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::set_iteration_index ( int  iteration)
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::set_mask ( Mask const &  mask)
inline
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
CUTLASS_DEVICE void cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::store ( Fragment const &  frag)
inline

Member Data Documentation

template<typename ThreadMap_ , typename Element_ , int InterleavedK>
int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kElementsPerAccess = ThreadMap::kElementsPerAccess
static
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kIterations = ThreadMap::Iterations::kCount
static
template<typename ThreadMap_ , typename Element_ , int InterleavedK>
int const cutlass::epilogue::threadblock::InterleavedPredicatedTileIterator< ThreadMap_, Element_, InterleavedK >::kThreads = ThreadMap::kThreads
static

The documentation for this class was generated from the following file: