CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Template for reading and writing tiles of accumulators to shared memory.
#include <tile_iterator_volta_tensor_op.h>
Classes | |
struct | Detail |
Public Types | |
using | WarpShape = WarpShape_ |
using | InterleavedTileShape = gemm::GemmShape< 32, 32, 4 > |
using | Element = float |
using | Layout = layout::RowMajor |
using | TensorRef = TensorRef< Element, Layout > |
Tensor Reference object. More... | |
using | TensorCoord = MatrixCoord |
Logical coordinate in referenced tensor. More... | |
using | Index = typename TensorRef::Index |
using | LongIndex = typename TensorRef::LongIndex |
using | Policy = VoltaTensorOpPolicy< WarpShape, InterleavedTileShape, Element, Layout > |
using | Shape = MatrixShape< Policy::kRowsPerIteration, WarpShape::kN > |
Shape of the tile in memory. More... | |
using | AccessType = typename Policy::AccessType |
Array type for aligned memory accesses. More... | |
using | Fragment = typename Policy::Fragment |
This is the fragment size produced by one access of the iterator. More... | |
using | AccumulatorTile = typename Policy::AccumulatorTile |
This is the complete warp-level accumulator tile. More... | |
using | Padding = MatrixShape< 0, Policy::kElementsPerAccess > |
Padding quantity. More... | |
Public Member Functions | |
CUTLASS_HOST_DEVICE | TileIteratorVoltaTensorOp () |
Default constructor. More... | |
CUTLASS_DEVICE | TileIteratorVoltaTensorOp (TensorRef const &ref, unsigned lane_id) |
Constructor from TensorRef. More... | |
CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | add_pointer_offset (Index pointer_offset) |
Adds a pointer offset. More... | |
CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | add_tile_offset (TensorCoord const &tile_offset) |
advances in units of whole tiles along the logical coordinate space of the tensor More... | |
CUTLASS_HOST_DEVICE TileIteratorVoltaTensorOp & | operator+= (TensorCoord const &tile_offset) |
CUTLASS_DEVICE void | store_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
Store. More... | |
CUTLASS_HOST_DEVICE void | store (Fragment const &frag) |
Store. More... | |
CUTLASS_HOST_DEVICE void | load_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
Load. More... | |
CUTLASS_HOST_DEVICE void | load (Fragment const &frag) |
Load. More... | |
Static Public Attributes | |
static int const | kIterations = Policy::kIterations |
Number of times this iterator can be incremented. More... | |
static int const | kElementsPerAccess = Policy::kElementsPerAccess |
Number of elements per access. More... | |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::AccessType = typename Policy::AccessType |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::AccumulatorTile = typename Policy::AccumulatorTile |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Element = float |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Fragment = typename Policy::Fragment |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Index = typename TensorRef::Index |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::InterleavedTileShape = gemm::GemmShape<32, 32, 4> |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Layout = layout::RowMajor |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::LongIndex = typename TensorRef::LongIndex |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Padding = MatrixShape< 0, Policy::kElementsPerAccess> |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Policy = VoltaTensorOpPolicy<WarpShape, InterleavedTileShape, Element, Layout> |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::Shape = MatrixShape< Policy::kRowsPerIteration, WarpShape::kN > |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::TensorCoord = MatrixCoord |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::TensorRef = TensorRef<Element, Layout> |
using cutlass::epilogue::warp::TileIteratorVoltaTensorOp< WarpShape_, gemm::GemmShape< 32, 32, 4 >, float, layout::RowMajor >::WarpShape = WarpShape_ |
|
inline |
|
inline |
|
inline |
advances in units of whole tiles along the logical coordinate space of the tensor
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
|
static |