CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize > Class Template Reference

#include <predicated_tile_iterator.h>

Classes

class  Params
 Parameters object is precomputed state and is host-constructible. More...
 

Public Types

using Shape = Shape_
 
using Element = Element_
 
using Layout = layout::ColumnMajor
 
using ThreadMap = ThreadMap_
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorRef = TensorRef< Element, Layout >
 
using TensorView = TensorView< Element, Layout >
 
using TensorCoord = typename Layout::TensorCoord
 
using Pointer = Element *
 
using NonConstPointer = typename platform::remove_const< Element >::type *
 
using UnderlyingIterator = PredicatedTileIterator< layout::PitchLinearShape< Shape::kRow, Shape::kColumn >, Element, layout::PitchLinear,(kAdvanceRank==0?0:1), ThreadMap, AccessSize >
 
using AccessType = typename UnderlyingIterator::AccessType
 
using Fragment = cutlass::Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess >
 Fragment object to be loaded or stored. More...
 
using Mask = typename UnderlyingIterator::Mask
 Predicate vector stores mask to guard accesses. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE PredicatedTileIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset)
 Constructs a TileIterator from its precomputed state, threadblock offset, and thread ID. More...
 
CUTLASS_HOST_DEVICE PredicatedTileIterator (Params const &params, Pointer pointer, TensorCoord extent, int thread_id)
 Construct a PredicatedTileIterator with zero threadblock offset. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_HOST_DEVICE PredicatedTileIteratoroperator++ ()
 
CUTLASS_HOST_DEVICE PredicatedTileIterator operator++ (int)
 
CUTLASS_HOST_DEVICE void clear_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void enable_mask ()
 Clears the predicate set efficiently. More...
 
CUTLASS_HOST_DEVICE void set_mask (Mask const &mask)
 Sets the predicate mask, overriding value stored in predicate iterator. More...
 
CUTLASS_HOST_DEVICE void get_mask (Mask &mask)
 Gets the mask. More...
 
CUTLASS_DEVICE void load_with_pointer_offset (Fragment &frag, Index pointer_offset)
 Loads a fragment from memory. More...
 
CUTLASS_DEVICE void load (Fragment &frag)
 Loads a fragment from memory. More...
 
CUTLASS_DEVICE void store_with_pointer_offset (Fragment const &frag, Index pointer_offset)
 Store a fragment to memory. More...
 
CUTLASS_DEVICE void store (Fragment const &frag)
 Store a fragment to memory. More...
 

Static Public Attributes

static int const kAdvanceRank = AdvanceRank
 

Detailed Description

template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, int AccessSize>
class cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >

Specialization of PredicatedTileIterator for pitch-linear data.

Satisfies: ForwardTileIteratorConcept | ReadableContiguousTileIteratorConcept | WriteableContiguousTileIteratorConcept | MaskedTileIteratorConcept

Member Typedef Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::AccessType = typename UnderlyingIterator::AccessType
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Element = Element_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Fragment = cutlass::Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Index = typename Layout::Index
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Layout = layout::ColumnMajor
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::LongIndex = typename Layout::LongIndex
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Mask = typename UnderlyingIterator::Mask
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::NonConstPointer = typename platform::remove_const<Element>::type *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Pointer = Element *
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::Shape = Shape_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::TensorCoord = typename Layout::TensorCoord
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::TensorRef = TensorRef<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::TensorView = TensorView<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::ThreadMap = ThreadMap_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
using cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::UnderlyingIterator = PredicatedTileIterator< layout::PitchLinearShape<Shape::kRow, Shape::kColumn>, Element, layout::PitchLinear, (kAdvanceRank == 0 ? 0 : 1), ThreadMap, AccessSize >

Constructor & Destructor Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::PredicatedTileIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id,
TensorCoord const &  threadblock_offset 
)
inline
Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor
thread_idID of each participating thread
threadblock_offsetInitial offset of threadblock
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::PredicatedTileIterator ( Params const &  params,
Pointer  pointer,
TensorCoord  extent,
int  thread_id 
)
inline
Parameters
paramsPrecomputed parameters object
pointerPointer to start of tensor
extentExtent of tensor
thread_idID of each participating thread

Member Function Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::clear_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::enable_mask ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::get_mask ( Mask mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::load ( Fragment frag)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::load_with_pointer_offset ( Fragment frag,
Index  pointer_offset 
)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE PredicatedTileIterator& cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::operator++ ( )
inline

Advances to the next tile in memory.

The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE PredicatedTileIterator cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::operator++ ( int  )
inline

Advances to the next tile in memory.

The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::set_mask ( Mask const &  mask)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::store ( Fragment const &  frag)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::store_with_pointer_offset ( Fragment const &  frag,
Index  pointer_offset 
)
inline

Member Data Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
int const cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, AccessSize >::kAdvanceRank = AdvanceRank
static

The documentation for this class was generated from the following file: