46 namespace threadblock {
57 template <
typename Shape_,
typename Element_,
int AdvanceRank,
58 typename ThreadMap_,
int Alignment>
62 AdvanceRank, ThreadMap_, Alignment> {
65 AdvanceRank == 0 || AdvanceRank == 1,
66 "Specialization for pitch-linear iterator may along advance along the " 67 "contiguous(rank=0) or strided(rank=1) dimension.");
72 static int const kAdvanceRank = AdvanceRank;
73 static int const kAlignment = Alignment;
84 using AccessType = Array<Element, ThreadMap::kElementsPerAccess>;
101 int iteration_contiguous_;
104 int iteration_strided_;
112 : stride_(ref.stride(0) /
ThreadMap::kElementsPerAccess),
120 set_iteration_index(0);
126 iteration_contiguous_ = index % ThreadMap::Iterations::kContiguous;
127 iteration_strided_ = index / ThreadMap::Iterations::kContiguous;
133 byte_offset_ += pointer_offset *
sizeof(
Element);
142 int access_offset = iteration_strided_ * ThreadMap::Delta::kStrided * stride_ +
143 iteration_contiguous_ * ThreadMap::Delta::kContiguous /
144 ThreadMap::kElementsPerAccess;
146 char *access_byte_ptr =
147 reinterpret_cast<char *
>(access_ptr + access_offset);
149 return reinterpret_cast<AccessType *
>(access_byte_ptr + byte_offset_);
155 ++iteration_contiguous_;
157 if (iteration_contiguous_ < ThreadMap::Iterations::kContiguous)
162 iteration_contiguous_ = 0;
163 ++iteration_strided_;
165 if (iteration_strided_ < ThreadMap::Iterations::kStrided) {
171 iteration_strided_ = 0;
188 add_pointer_offset(coord.contiguous() * Shape::kContiguous +
189 coord.strided() * Shape::kStrided * stride_ *
190 ThreadMap::kElementsPerAccess);
203 template <
typename Shape_,
typename Element_,
int AdvanceRank,
204 typename ThreadMap_,
int Alignment>
208 AdvanceRank, ThreadMap_, Alignment> {
211 AdvanceRank == 0 || AdvanceRank == 1,
212 "Specialization for pitch-linear iterator may along advance along the " 213 "contiguous(rank=0) or strided(rank=1) dimension.");
218 static int const kAdvanceRank = AdvanceRank;
219 static int const kAlignment = Alignment;
233 (kAdvanceRank == 0 ? 0 : 1),
249 : iterator_({ref.
data(), ref.
stride()}, thread_id) {}
258 iterator_.add_pointer_offset(pointer_offset);
264 return reinterpret_cast<AccessType *
>(iterator_.get());
270 iterator_.add_tile_offset({coord.row(), coord.column()});
300 template <
typename Shape_,
typename Element_,
int AdvanceRank,
301 typename ThreadMap_,
int Alignment>
305 AdvanceRank, ThreadMap_, Alignment> {
308 AdvanceRank == 0 || AdvanceRank == 1,
309 "Specialization for pitch-linear iterator may along advance along the " 310 "contiguous(rank=0) or strided(rank=1) dimension.");
315 static int const kAdvanceRank = AdvanceRank;
316 static int const kAlignment = Alignment;
330 (kAdvanceRank == 0 ? 1 : 0),
346 : iterator_({ref.
data(), ref.
stride()}, thread_id) {}
355 iterator_.add_pointer_offset(pointer_offset);
361 return reinterpret_cast<AccessType *
>(iterator_.get());
367 iterator_.add_tile_offset({coord.column(), coord.row()});
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:62
Definition: aligned_buffer.h:35
Coordinate in pitch-linear space.
Definition: pitch_linear.h:52
Defines a structure containing strides, bounds, and a pointer to tensor data.
CUTLASS_HOST_DEVICE Element * data() const
Returns the pointer to referenced data.
Definition: tensor_ref.h:254
Mapping function for pitch-linear memory.
Definition: pitch_linear.h:163
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:154
Mapping function for column-major matrices.
Definition: layout/matrix.h:142
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:59
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
int64_t LongIndex
Long index type used for offsets.
Definition: pitch_linear.h:175
CUTLASS_HOST_DEVICE Stride stride() const
Returns the layout object's stride vector.
Definition: tensor_ref.h:277
Defines a Shape template for matrix tiles.
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE LongIndex offset(TensorCoord const &coord) const
Computes the offset of an index from the origin of the tensor.
Definition: tensor_ref.h:301
int32_t Index
Index type used for coordinates.
Definition: pitch_linear.h:172
Templates implementing the address computation of storing of tiles from pitch-linear rank=2 tensors...
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
Defines a canonical coordinate for rank=2 matrices offering named indices.
Defines layout functions used by TensorRef and derived classes.
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:151
Basic include for CUTLASS.
Definition: matrix_coord.h:39