CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <predicated_tile_iterator.h>
Classes | |
struct | Mask |
Mask object. More... | |
struct | Params |
Public Types | |
using | ThreadMap = ThreadMap_ |
using | Shape = typename ThreadMap::Shape |
using | Element = Element_ |
using | Layout = layout::RowMajor |
using | TensorRef = TensorRef< Element, Layout > |
using | ConstTensorRef = typename TensorRef::ConstTensorRef |
using | Index = typename Layout::Index |
using | LongIndex = typename Layout::LongIndex |
using | TensorCoord = MatrixCoord |
using | Fragment = Array< Element, ThreadMap::Iterations::kColumn *ThreadMap::Iterations::kRow *ThreadMap::Iterations::kGroup *ThreadMap::Iterations::kCluster *ThreadMap::kElementsPerAccess > |
Fragment object. More... | |
using | AccessType = AlignedArray< Element, ThreadMap::kElementsPerAccess > |
Memory access size. More... | |
Public Member Functions | |
CUTLASS_DEVICE | PredicatedTileIterator (Params const ¶ms, Element *pointer, TensorCoord extent, int thread_idx, TensorCoord threadblock_offset=TensorCoord()) |
Constructor. More... | |
CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
Adds a pointer offset in units of Element. More... | |
CUTLASS_DEVICE void | load (Fragment &frag) |
Loads a fragment from memory. More... | |
CUTLASS_DEVICE void | store (Fragment const &frag) |
Stores a fragment to memory. More... | |
CUTLASS_HOST_DEVICE PredicatedTileIterator & | operator++ () |
Advances to the next position to load or store. More... | |
CUTLASS_DEVICE void | clear_mask () |
Efficiently enables all accesses guarded by mask. More... | |
CUTLASS_DEVICE void | enable_mask () |
Sets the mask. More... | |
CUTLASS_DEVICE void | get_mask (Mask &mask) |
Sets the mask. More... | |
CUTLASS_DEVICE void | set_mask (Mask const &mask) |
Static Public Attributes | |
static int const | kElementsPerAccess = ThreadMap::kElementsPerAccess |
static int const | kThreads = ThreadMap::kThreads |
static int const | kIterations = ThreadMap::Count::kTile |
Tile iterator used to load output tile from shared memory in epilogue.
Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess> |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::ConstTensorRef = typename TensorRef::ConstTensorRef |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Element = Element_ |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Fragment = Array< Element, ThreadMap::Iterations::kColumn * ThreadMap::Iterations::kRow * ThreadMap::Iterations::kGroup * ThreadMap::Iterations::kCluster * ThreadMap::kElementsPerAccess> |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Index = typename Layout::Index |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Layout = layout::RowMajor |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::LongIndex = typename Layout::LongIndex |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::Shape = typename ThreadMap::Shape |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::TensorCoord = MatrixCoord |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::TensorRef = TensorRef<Element, Layout> |
using cutlass::epilogue::threadblock::PredicatedTileIterator< ThreadMap_, Element_ >::ThreadMap = ThreadMap_ |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Efficiently disables all accesses guarded by mask
|
inline |
|
inline |
|
static |
|
static |
|
static |