CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <mma_simt_tile_iterator.h>
Public Types | |
using | Shape = Shape_ |
Shape of tile to load (concept: MatrixShape) More... | |
using | Element = Element_ |
Element type. More... | |
using | Layout = layout::ColumnMajor |
Layout of accumulators in memory. More... | |
using | Policy = Policy_ |
Decomposition of elements among threads. More... | |
using | TensorRef = TensorRef< Element, Layout > |
TensorRef type for loading element from a tensor. More... | |
using | Index = typename TensorRef::Index |
Index type. More... | |
using | LongIndex = typename TensorRef::LongIndex |
Long Index type. More... | |
using | TensorCoord = typename TensorRef::TensorCoord |
Coordinate for an element in the tensor. More... | |
using | ThreadShape = MatrixShape< Shape::kRow/Policy::WarpShape::kRow, Shape::kColumn/Policy::WarpShape::kColumn > |
Thraed-level shape of a fragment. More... | |
using | Iterations = MatrixShape< ThreadShape::kRow/Policy::LaneMmaShape::kM, ThreadShape::kColumn/Policy::LaneMmaShape::kN > |
Number of individual loads. More... | |
using | Delta = MatrixShape< Policy::WarpShape::kRow *Policy::LaneMmaShape::kM, Policy::WarpShape::kColumn *Policy::LaneMmaShape::kN > |
using | Fragment = Array< Element, ThreadShape::kCount > |
Fragment object holding a thread's part of a tile. More... | |
Public Member Functions | |
CUTLASS_HOST_DEVICE | MmaSimtTileIterator () |
Default ctor constructs null iterator. More... | |
CUTLASS_HOST_DEVICE | MmaSimtTileIterator (TensorRef const &ref, int lane_id) |
Constructor from TensorRef. More... | |
CUTLASS_HOST_DEVICE MmaSimtTileIterator & | add_pointer_offset (LongIndex offset) |
Adds a pointer offset to internal pointer(s) to advance through memory. More... | |
CUTLASS_HOST_DEVICE MmaSimtTileIterator & | add_tile_offset (TensorCoord const &coord) |
Advances an iterator along logical dimensions of matrix in units of whole tiles. More... | |
CUTLASS_HOST_DEVICE MmaSimtTileIterator & | operator++ () |
Advances the iterator along the advance dimension. More... | |
CUTLASS_HOST_DEVICE MmaSimtTileIterator & | operator-- () |
Advances the iterator along the advance dimension. More... | |
CUTLASS_HOST_DEVICE void | load_with_pointer_offset (Fragment &frag, Index pointer_offset) const |
Loads a fragment from memory with additional logical offset. More... | |
CUTLASS_HOST_DEVICE void | load (Fragment &frag) const |
Loads a fragment from memory at the location pointed to by the iterator. More... | |
CUTLASS_HOST_DEVICE void | store_with_pointer_offset (Fragment const &frag, Index pointer_offset) const |
Stores a fragment to memory at the location pointed to by the iterator. More... | |
CUTLASS_HOST_DEVICE void | store (Fragment const &frag) const |
Stores a fragment to memory at the location pointed to by the iterator. More... | |
Static Public Attributes | |
static Operand const | kOperand = Operand::kC |
Operand tag. More... | |
Specialization for C operands of column-major layouts
Concept: MutableRandomAccessContiguousTileIteratorConcept
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Delta = MatrixShape< Policy::WarpShape::kRow * Policy::LaneMmaShape::kM, Policy::WarpShape::kColumn * Policy::LaneMmaShape::kN > |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Element = Element_ |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Fragment = Array<Element, ThreadShape::kCount> |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Index = typename TensorRef::Index |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Iterations = MatrixShape< ThreadShape::kRow / Policy::LaneMmaShape::kM, ThreadShape::kColumn / Policy::LaneMmaShape::kN > |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Layout = layout::ColumnMajor |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::LongIndex = typename TensorRef::LongIndex |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Policy = Policy_ |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::Shape = Shape_ |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::TensorCoord = typename TensorRef::TensorCoord |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::TensorRef = TensorRef<Element, Layout> |
using cutlass::gemm::warp::MmaSimtTileIterator< Shape_, Operand::kC, Element_, layout::ColumnMajor, Policy_ >::ThreadShape = MatrixShape< Shape::kRow / Policy::WarpShape::kRow, Shape::kColumn / Policy::WarpShape::kColumn > |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
< linear offset (in units of Element) when loading
frag | fragment to be loaded from memory |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |