CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <predicated_tile_access_iterator_2dthreadtile.h>
Classes | |
class | Params |
Parameters object is precomputed state and is host-constructible. More... | |
Public Types | |
using | Shape = Shape_ |
using | Element = Element_ |
using | Layout = layout::PitchLinear |
using | ThreadMap = ThreadMap_ |
using | AccessType = AccessType_ |
using | Index = typename Layout::Index |
using | LongIndex = typename Layout::LongIndex |
using | TensorRef = TensorRef< Element, Layout > |
using | TensorView = TensorView< Element, Layout > |
using | TensorCoord = typename Layout::TensorCoord |
using | Pointer = Element * |
using | NonConstPointer = typename platform::remove_const< Element >::type * |
using | Mask = Array< uint32_t, kPredicateWordCount > |
Predicate vector stores mask to guard accesses. More... | |
Public Member Functions | |
CUTLASS_HOST_DEVICE | PredicatedTileAccessIterator2dThreadTile (Params const ¶ms, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset) |
CUTLASS_HOST_DEVICE | PredicatedTileAccessIterator2dThreadTile (Params const ¶ms, Pointer pointer, TensorCoord extent, int thread_id) |
Construct a PredicatedTileAccessIterator2dThreadTile with zero threadblock offset. More... | |
CUTLASS_HOST_DEVICE void | set_iteration_index (int index) |
Overrides the internal iteration index. More... | |
CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
Adds a pointer offset in units of Element. More... | |
CUTLASS_DEVICE void | add_tile_offset (TensorCoord const &tile_offset) |
Advances an iterator along logical dimensions of matrix in units of whole tiles. More... | |
CUTLASS_HOST_DEVICE AccessType * | get () const |
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile & | operator++ () |
Increment and return an instance to self. More... | |
CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile | operator++ (int) |
Increment and return an instance to self. More... | |
CUTLASS_HOST_DEVICE void | clear_mask () |
Clears the predicate set efficiently. More... | |
CUTLASS_HOST_DEVICE void | enable_mask () |
Clears the predicate set efficiently. More... | |
CUTLASS_HOST_DEVICE void | set_mask (Mask const &mask) |
Sets the predicate mask, overriding value stored in predicate iterator. More... | |
CUTLASS_HOST_DEVICE void | get_mask (Mask &mask) |
Gets the mask. More... | |
CUTLASS_HOST_DEVICE bool | valid () |
Returns whether access is valid or not. More... | |
Static Public Attributes | |
static int const | kAdvanceRank = AdvanceRank |
static int const | kPredicatesPerByte = 4 |
static int const | kPredicatesPerWord = 4 * kPredicatesPerByte |
static int const | kPredicateByteCount = (ThreadMap::Iterations::kCount * ThreadMap::ThreadAccessShape::kStrided + kPredicatesPerByte - 1) / kPredicatesPerByte |
Number of 32b words containing predicates. More... | |
static int const | kPredicateWordCount = (kPredicateByteCount + 3) / 4 |
static unsigned const | kPredicateMask = (1u << kPredicatesPerByte) - 1u |
Specialization of PredicatedTileAccessIterator2dThreadTile for pitch-linear data.
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::AccessType = AccessType_ |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Element = Element_ |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Index = typename Layout::Index |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Layout = layout::PitchLinear |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::LongIndex = typename Layout::LongIndex |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Mask = Array<uint32_t, kPredicateWordCount> |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::NonConstPointer = typename platform::remove_const<Element>::type * |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Pointer = Element * |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Shape = Shape_ |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorCoord = typename Layout::TensorCoord |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorRef = TensorRef<Element, Layout> |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorView = TensorView<Element, Layout> |
using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::ThreadMap = ThreadMap_ |
|
inline |
Constructs a TileIterator from its precomputed state, threadblock offset, and thread ID
params | Precomputed parameters object |
pointer | Pointer to start of tensor |
extent | Extent of tensor |
thread_id | ID of each participating thread |
threadblock_offset | Initial offset of threadblock |
|
inline |
params | Precomputed parameters object |
pointer | Pointer to start of tensor |
extent | Extent of tensor ID of each participating thread |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |