CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ > Class Template Reference

#include <predicated_tile_access_iterator.h>

Classes

class  Params
 Parameters object is precomputed state and is host-constructible. More...
 

Public Types

using Shape = Shape_
 
using Element = Element_
 
using Layout = layout::ColumnMajorInterleaved< kInterleavedK >
 
using ThreadMap = ThreadMap_
 
using AccessType = AccessType_
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorRef = TensorRef< Element, Layout >
 
using TensorView = TensorView< Element, Layout >
 
using TensorCoord = typename Layout::TensorCoord
 
using Pointer = Element *
 
using NonConstPointer = typename platform::remove_const< Element >::type *
 
using UnderlyingIterator = PredicatedTileAccessIterator< layout::PitchLinearShape< Shape::kRow *kInterleavedK, Shape::kColumn/kInterleavedK >, Element, layout::PitchLinear,(kAdvanceRank==0?0:1), ThreadMap, AccessType >
 
using Mask = typename UnderlyingIterator::Mask
 Predicate vector stores mask to guard accesses. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset)
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id)
 Construct a PredicatedTileAccessIterator with zero threadblock offset. More...
 
CUTLASS_HOST_DEVICE void set_iteration_index (int index)
 Overrides the internal iteration index. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_HOST_DEVICE void add_tile_offset (TensorCoord const &tile_offset)
 
CUTLASS_HOST_DEVICE AccessTypeget () const
 Returns a pointer. More...
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIteratoroperator++ ()
 
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator operator++ (int)
 
CUTLASS_HOST_DEVICE void clear_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void enable_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void set_mask (Mask const &mask)
 Sets the predicate mask, overriding value stored in predicate iterator. More...
 
CUTLASS_HOST_DEVICE void get_mask (Mask &mask)
 Gets the mask. More...
 
CUTLASS_HOST_DEVICE bool valid ()
 Returns whether access is valid or not. More...
 

Static Public Attributes

static int const kInterleavedK = InterleavedK
 
static int const kAdvanceRank = AdvanceRank
 
static int const kAccessesPerVector = UnderlyingIterator::kAccessesPerVector
 

Detailed Description

template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, typename AccessType_, int InterleavedK>
class cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >

Specialization of PredicatedTileAccessIterator for interleaved-32 data. It is mapped to the congruous layout.

Satisfies: ForwardTileIteratorConcept | ReadableContiguousTileIteratorConcept | WriteableContiguousTileIteratorConcept | MaskedTileIteratorConcept

Member Typedef Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::AccessType = AccessType_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Element = Element_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Index = typename Layout::Index
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Layout = layout::ColumnMajorInterleaved<kInterleavedK>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::LongIndex = typename Layout::LongIndex
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Mask = typename UnderlyingIterator::Mask
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::NonConstPointer = typename platform::remove_const<Element>::type *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Pointer = Element *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::Shape = Shape_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::TensorCoord = typename Layout::TensorCoord
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::TensorRef = TensorRef<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::TensorView = TensorView<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::ThreadMap = ThreadMap_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
using cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::UnderlyingIterator = PredicatedTileAccessIterator< layout::PitchLinearShape<Shape::kRow * kInterleavedK, Shape::kColumn / kInterleavedK>, Element, layout::PitchLinear, (kAdvanceRank == 0 ? 0 : 1), ThreadMap, AccessType>

Constructor & Destructor Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id,
TensorCoord const &  threadblock_offset 
)
inline

Constructs a TileIterator from its precomputed state, threadblock offset, and thread ID

Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor
thread_idID of each participating thread
threadblock_offsetInitial offset of threadblock
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id 
)
inline
Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor
thread_idID of each participating thread

Member Function Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::add_tile_offset ( TensorCoord const &  tile_offset)
inline

Advances an iterator along logical dimensions of matrix in units of whole tiles

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::clear_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::enable_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE AccessType* cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::get ( ) const
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::get_mask ( Mask mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator& cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( )
inline

Advances to the next tile in memory.

The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( int  )
inline

Advances to the next tile in memory.

The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::set_iteration_index ( int  index)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::set_mask ( Mask const &  mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
CUTLASS_HOST_DEVICE bool cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::valid ( )
inline

Member Data Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::kAccessesPerVector = UnderlyingIterator::kAccessesPerVector
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::kAdvanceRank = AdvanceRank
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ , int InterleavedK>
int const cutlass::transform::threadblock::PredicatedTileAccessIterator< Shape_, Element_, layout::ColumnMajorInterleaved< InterleavedK >, AdvanceRank, ThreadMap_, AccessType_ >::kInterleavedK = InterleavedK
static

The documentation for this class was generated from the following file: