100 extent.
w() * extent.
c(),
101 extent.
h() * extent.
w() * extent.
c()
125 int n = 0, h = 0, w = 0, c = 0;
127 #if defined(__CUDA_ARCH__) 129 c = int(index % static_cast<int>(stride_[0]));
131 unsigned int hw_mul, hw_shr, w_mul, w_shr, c_mul, c_shr;
137 fast_divmod(n, tmp, index,
int(stride_[2]), hw_mul, hw_shr);
138 fast_divmod(h, w, tmp,
int(stride_[1]), w_mul, w_shr);
139 fast_divmod(w, tmp, w,
int(stride_[0]), c_mul, c_shr);
142 n = int(index / (stride_[0] * stride_[1] * stride_[2]));
143 LongIndex residual = index % (stride_[0] * stride_[1] * stride_[2]);
145 h = int(residual / (stride_[0] * stride_[1]));
146 residual = (residual % (stride_[0] * stride_[1]));
148 w = int(residual / stride_[0]);
149 c = int(residual % stride_[0]);
173 if ((extent.
c() > stride_[0])
174 || (extent.
w() * stride_[0] > stride_[1])
175 || (extent.
h() * stride_[1] > stride_[2])) {
178 return extent.
n() * stride_[2];
189 static int const kRank = 4;
192 static int const kStrideRank = 3;
229 extent.
w() * extent.
h(),
230 extent.
h() * extent.
w() * extent.
c()
259 return extent.
n() * stride_[2];
266 template <
int Interleave>
271 static int const kInterleave = Interleave;
274 static int const kRank = 4;
277 static int const kStrideRank = 3;
313 kInterleave * extent.
w(),
314 kInterleave * extent.
w() * extent.
h(),
315 extent.
h() * extent.
w() * extent.
c()
324 Index c_minor = (coord.
c() % kInterleave);
325 Index c_major = (coord.
c() / kInterleave);
349 return extent.
n() * stride_[2];
356 template <
int Interleave>
361 static int const kInterleave = Interleave;
364 static int const kRank = 4;
367 static int const kStrideRank = 3;
403 kInterleave * extent.
n(),
404 kInterleave * extent.
n() * extent.
w(),
405 kInterleave * extent.
n() * extent.
w() * extent.
h()
414 Index c_minor = (coord.
c() % kInterleave);
415 Index c_major = (coord.
c() / kInterleave);
439 return (extent.
c() / kInterleave * stride_[2]);
Coord< kStrideRank > Stride
Stride vector.
Definition: tensor.h:71
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor.h:246
Defines a canonical 4D coordinate used by tensor operations.
Definition: tensor_coord.h:38
CUTLASS_HOST_DEVICE TensorCxRSKx(Stride const &stride=Stride(0))
Constructor.
Definition: tensor.h:396
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE void fast_divmod(int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)
Definition: fast_math.h:176
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:348
CUTLASS_HOST_DEVICE TensorNCxHWx(Stride const &stride=Stride(0))
Constructor.
Definition: tensor.h:306
static int const kStrideRank
Rank of stride vector.
Definition: tensor.h:59
static CUTLASS_HOST_DEVICE TensorNCxHWx packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor.h:310
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:387
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Returns the offset of a coordinate in linear memory.
Definition: tensor.h:412
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor.h:336
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:169
int32_t Index
Index type used for coordinates.
Definition: tensor.h:62
Tensor4DCoord TensorCoord
Logical coordinate (n, h, w, c)
Definition: tensor.h:68
Mapping function for 4-D NC/xHWx tensors.
Definition: tensor.h:267
int64_t LongIndex
Long index type used for offsets.
Definition: tensor.h:198
CUTLASS_HOST_DEVICE Index const & w() const
Returns the column of the coordinate.
Definition: tensor_coord.h:95
CUTLASS_HOST_DEVICE TensorNHWC(Stride const &stride=Stride(0))
Constructor.
Definition: tensor.h:88
int Index
Index type used to store elements.
Definition: coord.h:55
static CUTLASS_HOST_DEVICE TensorNCHW packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor.h:225
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor.h:163
CUTLASS_HOST_DEVICE TensorNCHW(Stride const &stride=Stride(0))
Constructor.
Definition: tensor.h:221
static int const kRank
Logical rank of tensor.
Definition: tensor.h:56
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor.h:342
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:258
CUTLASS_HOST_DEVICE Index const & c() const
Returns the channel of the coordinate.
Definition: tensor_coord.h:103
int32_t Index
Index type used for coordinates.
Definition: tensor.h:370
CUTLASS_HOST_DEVICE TensorNHWC(typename Stride::Index c, typename Stride::Index wc, typename Stride::Index hwc)
Constructor.
Definition: tensor.h:92
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor.h:432
Defines a canonical coordinate for rank=4 tensors offering named indices.
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor.h:426
Mapping function for 4-D CxRSKx tensors.
Definition: tensor.h:357
int64_t LongIndex
Long index type used for offsets.
Definition: tensor.h:65
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
static CUTLASS_HOST_DEVICE TensorNHWC packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed NHWC tensor.
Definition: tensor.h:96
Mapping function for 4-D NCHW tensors.
Definition: tensor.h:186
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor.h:157
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor.h:438
static CUTLASS_HOST_DEVICE TensorCxRSKx packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor.h:400
int32_t Index
Index type used for coordinates.
Definition: tensor.h:195
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Returns the offset of a coordinate in linear memory.
Definition: tensor.h:237
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE Index const & n() const
Returns the batch of the coordinate.
Definition: tensor_coord.h:79
CUTLASS_HOST_DEVICE void find_divisor(unsigned int &mul, unsigned int &shr, unsigned int denom)
Definition: fast_math.h:159
int64_t LongIndex
Long index type used for offsets.
Definition: tensor.h:373
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor.h:252
Defines layout functions used by TensorRef and derived classes.
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Returns the offset of a coordinate in linear memory.
Definition: tensor.h:322
int64_t LongIndex
Long index type used for offsets.
Definition: tensor.h:283
CUTLASS_HOST_DEVICE Index const & h() const
Returns the row of the coordinate.
Definition: tensor_coord.h:87
Mapping function for 4-D NHWC tensors.
Definition: tensor.h:53
int32_t Index
Index type used for coordinates.
Definition: tensor.h:280
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex index) const
Returns the logical coordinate (n, h, w, c) from a given offset in linear memory. ...
Definition: tensor.h:123
Basic include for CUTLASS.
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Returns the offset of a coordinate (n, h, w, c) in linear memory.
Definition: tensor.h:108