48 typename OperatorShape,
59 typename OperatorShape_,
66 using OperatorShape = OperatorShape_;
67 using Element = Element_;
79 Policy::kRowsPerIteration,
86 Policy::OperatorCount::kColumn * Policy::kElementsPerAccess>;
92 static int const kIterations = Policy::kIterations;
96 static int const kLanesInQuad = 4;
102 Detail::kLanesInQuad * Policy::kElementsPerAccess>;
131 pointer_(reinterpret_cast<
AccessType *>(ref.data())),
132 layout_(ref.stride()[0] /
Policy::kElementsPerAccess) {
134 int quad_id = (lane_id / Detail::kLanesInQuad);
135 int lane_in_quad = (lane_id % Detail::kLanesInQuad);
137 pointer_ += layout_({quad_id, lane_in_quad});
143 pointer_ += pointer_offset / Policy::kElementsPerAccess;
151 pointer_ += layout_({
152 tile_offset.
row() * Shape::kRow,
153 (tile_offset.
column() * Shape::kColumn / Policy::kElementsPerAccess)
162 add_tile_offset(tile_offset);
173 for (
int n = 0; n < Policy::OperatorCount::kColumn; ++n) {
174 pointer_[n * Detail::kLanesInQuad + pointer_offset / Policy::kElementsPerAccess] = frag_ptr[n];
181 store_with_pointer_offset(frag, 0);
191 for (
int n = 0; n < Policy::OperatorCount::kColumn; ++n) {
192 frag_ptr[n] = pointer_[n * Detail::kLanesInQuad + pointer_offset / Policy::kElementsPerAccess];
199 load_with_pointer_offset(frag, 0);
Describes the size of a matrix tile.
Definition: matrix_shape.h:42
CUTLASS_HOST_DEVICE Index const & column() const
Returns the column of the coordinate.
Definition: matrix_coord.h:85
Definition: aligned_buffer.h:35
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of TensorOp instructions, of which a row-oriented slice is visible per iteration.
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset) const
Load.
Definition: tile_iterator_tensor_op.h:186
CUTLASS_HOST_DEVICE TileIteratorTensorOp(TensorRef const &ref, unsigned lane_id)
Constructor from TensorRef.
Definition: tile_iterator_tensor_op.h:127
WarpShape_ WarpShape
Definition: tile_iterator_tensor_op.h:65
CUTLASS_HOST_DEVICE TileIteratorTensorOp & add_pointer_offset(Index pointer_offset)
Adds a pointer offset.
Definition: tile_iterator_tensor_op.h:142
CUTLASS_HOST_DEVICE TileIteratorTensorOp & add_tile_offset(TensorCoord const &tile_offset)
advances in units of whole tiles along the logical coordinate space of the tensor ...
Definition: tile_iterator_tensor_op.h:149
Aligned array type.
Definition: array.h:511
CUTLASS_HOST_DEVICE Index const & row() const
Returns the row of the coordinate.
Definition: matrix_coord.h:77
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Store.
Definition: tile_iterator_tensor_op.h:168
CUTLASS_HOST_DEVICE TileIteratorTensorOp & operator+=(TensorCoord const &tile_offset)
Definition: tile_iterator_tensor_op.h:161
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Store.
Definition: tile_iterator_tensor_op.h:180
Policy details related to the epilogue.
Definition: tensor_op_policy.h:50
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Template for reading and writing tiles of accumulators to shared memory.
Definition: tile_iterator_tensor_op.h:52
typename TensorRef::LongIndex LongIndex
Definition: tile_iterator_tensor_op.h:73
typename TensorRef::Index Index
Definition: tile_iterator_tensor_op.h:72
typename Layout::Index Index
Index type.
Definition: tensor_ref.h:165
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE TileIteratorTensorOp()
Default constructor.
Definition: tile_iterator_tensor_op.h:123
Array< Element, Policy::OperatorCount::kColumn *Policy::kElementsPerAccess > Fragment
This is the fragment size produced by one access of the iterator.
Definition: tile_iterator_tensor_op.h:86
Defines layout functions used by TensorRef and derived classes.
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.
CUTLASS_HOST_DEVICE void load(Fragment &frag) const
Load.
Definition: tile_iterator_tensor_op.h:198
Definition: matrix_coord.h:39
typename Layout::LongIndex LongIndex
Long index used for pointer offsets.
Definition: tensor_ref.h:168