CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ > Class Template Reference

#include <predicated_tile_access_iterator.h>

Classes

class  Params
 Parameters object is precomputed state and is host-constructible. More...
 

Public Types

using Shape = Shape_
 
using Element = Element_
 
using Layout = layout::PitchLinear
 
using ThreadMap = ThreadMap_
 
using AccessType = AccessType_
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorRef = TensorRef< Element, Layout >
 
using TensorView = TensorView< Element, Layout >
 
using TensorCoord = typename Layout::TensorCoord
 
using Pointer = Element *
 
using NonConstPointer = typename platform::remove_const< Element >::type *
 
using Mask = Array< uint32_t, kPredicateWordCount >
 Predicate vector stores mask to guard accesses. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset)
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id)
 Construct a PredicatedTileAccessIterator with zero threadblock offset. More...
 
CUTLASS_HOST_DEVICE void set_iteration_index (int index)
 Overrides the internal iteration index. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_DEVICE void add_tile_offset (TensorCoord const &tile_offset)
 Advances an iterator along logical dimensions of matrix in units of whole tiles. More...
 
CUTLASS_HOST_DEVICE AccessTypeget () const
 Returns a pointer. More...
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIteratoroperator++ ()
 Increment and return an instance to self. More...
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator operator++ (int)
 Increment and return an instance to self. More...
 
CUTLASS_HOST_DEVICE void clear_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void enable_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void set_mask (Mask const &mask)
 Sets the predicate mask, overriding value stored in predicate iterator. More...
 
CUTLASS_HOST_DEVICE void get_mask (Mask &mask)
 Gets the mask. More...
 
CUTLASS_HOST_DEVICE bool valid ()
 Returns whether access is valid or not. More...
 

Static Public Attributes

static int const kAdvanceRank = AdvanceRank
 
static int const kAccessesPerVector = ThreadMap::kElementsPerAccess / AccessType::kElements
 
static int const kPredicatesPerByte = 4
 
static int const kPredicatesPerWord = 4 * kPredicatesPerByte
 
static int const kPredicateCount = ThreadMap::Iterations::kCount * kAccessesPerVector
 
static int const kPredicateByteCount
 Number of 32b words containing predicates. More...
 
static int const kPredicateWordCount = (kPredicateByteCount + 3) / 4
 
static unsigned const kPredicateMask = (1u << kPredicatesPerByte) - 1u
 

Detailed Description

template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, typename AccessType_>
class cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >

Specialization of PredicatedTileAccessIterator for pitch-linear data.

Member Typedef Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::AccessType = AccessType_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Element = Element_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Index = typename Layout::Index
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Layout = layout::PitchLinear
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::LongIndex = typename Layout::LongIndex
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Mask = Array<uint32_t, kPredicateWordCount>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::NonConstPointer = typename platform::remove_const<Element>::type *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Pointer = Element *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Shape = Shape_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorCoord = typename Layout::TensorCoord
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorRef = TensorRef<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorView = TensorView<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::ThreadMap = ThreadMap_

Constructor & Destructor Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id,
TensorCoord const &  threadblock_offset 
)
inline

Constructs a TileIterator from its precomputed state, threadblock offset, and thread ID

Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor
thread_idID of each participating thread
threadblock_offsetInitial offset of threadblock
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id 
)
inline
Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor ID of each participating thread

Member Function Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::add_tile_offset ( TensorCoord const &  tile_offset)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::clear_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::enable_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE AccessType* cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::get ( ) const
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::get_mask ( Mask mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator& cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( int  )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::set_iteration_index ( int  index)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::set_mask ( Mask const &  mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
CUTLASS_HOST_DEVICE bool cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::valid ( )
inline

Member Data Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kAccessesPerVector = ThreadMap::kElementsPerAccess / AccessType::kElements
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kAdvanceRank = AdvanceRank
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateByteCount
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateCount = ThreadMap::Iterations::kCount * kAccessesPerVector
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
unsigned const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateMask = (1u << kPredicatesPerByte) - 1u
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicatesPerByte = 4
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicatesPerWord = 4 * kPredicatesPerByte
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateWordCount = (kPredicateByteCount + 3) / 4
static

The documentation for this class was generated from the following file: