45 template <
int ElementSize,
int Crosswise>
82 kTileShapeContiguous * kElementsPerAccess /
kCrosswise;
88 ((kTileShapeContiguous /
kFactor) > (32 / kTileShapeContiguous))
89 ? (kTileShapeContiguous / kFactor)
147 int tile_contiguous_idx =
150 int tile_contiguous_residual =
156 int partition_contiguous_idx =
158 int partition_strided_idx =
161 int partition_contiguous_residual =
163 int partition_strided_residual =
170 int permuted_vec_contiguous_within_partition =
171 partition_contiguous_residual ^ (partition_strided_residual % 4);
173 int permuted_partition_contiguous_within_tile =
174 partition_contiguous_idx ^ (partition_strided_idx % 2);
181 permuted_partition_contiguous_within_tile *
183 permuted_vec_contiguous_within_partition) *
187 int element_strided = vec_strided_idx;
189 return element_contiguous + element_strided * stride_[0] *
kFactor;
204 return extent[1] * stride_[0];
212 template <
int ElementSize,
int Crosswise>
215 static int const kRank = 2;
218 static int const kStrideRank = 1;
239 static int const kAccessSize = Base::kAccessSize;
247 static int const kElementSize = Base::kElementSize;
248 static int const kElementsPerAccess = Base::kElementsPerAccess;
282 return layout_(coord);
312 template <
int Crosswise>
315 static int const kRank = 2;
318 static int const kStrideRank = 1;
337 static int const kAccessSize = 128;
355 static int const kElementSize = 32;
392 int c = (coord.
contiguous() % 32) / kElementsPerAccess;
395 LongIndex offset = (c ^ (2 * s)) * kElementsPerAccess + s * stride_[0] +
396 tc * 32 + ts * stride_[0] * 4 + coord.
contiguous() % 4;
413 return extent[1] * stride_[0];
421 template <
int ElementSize,
int Crosswise>
425 static int const kRank = 2;
428 static int const kStrideRank = 1;
449 static int const kAccessSize = Base::kAccessSize;
457 static int const kElementSize = Base::kElementSize;
458 static int const kElementsPerAccess = Base::kElementsPerAccess;
526 template <
int ElementSize,
int Crosswise>
530 static int const kRank = 2;
533 static int const kStrideRank = 1;
554 static int const kAccessSize = Base::kAccessSize;
562 static int const kElementSize = Base::kElementSize;
563 static int const kElementsPerAccess = Base::kElementsPerAccess;
631 template <
int ElementSize,
int Crosswise>
634 static int const kRank = 2;
637 static int const kStrideRank = 1;
658 static int const kAccessSize = Base::kAccessSize;
666 static int const kElementSize = Base::kElementSize;
667 static int const kElementsPerAccess = Base::kElementsPerAccess;
668 static int const kCrosswise = Base::kCrosswise;
669 static int const kFactor = Base::kFactor;
703 return layout_(coord);
733 template <
int ElementSize,
int Crosswise>
736 static int const kRank = 2;
739 static int const kStrideRank = 1;
760 static int const kAccessSize = Base::kAccessSize;
768 static int const kElementSize = Base::kElementSize;
769 static int const kElementsPerAccess = Base::kElementsPerAccess;
834 template <
int ElementSize,
int Crosswise>
837 static int const kRank = 2;
840 static int const kStrideRank = 1;
861 static int const kAccessSize = Base::kAccessSize;
869 static int const kElementSize = Base::kElementSize;
870 static int const kElementsPerAccess = Base::kElementsPerAccess;
934 template <
int ElementSize,
int InterleavedK>
938 static int const kRank = 2;
941 static int const kStrideRank = 1;
960 static int const kAccessSize = 128;
966 static int const kElementSize = ElementSize;
970 static int const kInterleavedK = InterleavedK;
1004 int const rows_per_smem_cache_line = 128 / kInterleavedK;
1006 int row_id = coord.
strided() / rows_per_smem_cache_line;
1007 int col_id = (coord.
strided() % rows_per_smem_cache_line) * kInterleavedK + coord.
contiguous();
1009 int access_block_id = col_id >> 4;
1010 int swizzle_access_block_id = access_block_id ^ (row_id & 1);
1012 int swizzle_col_id = swizzle_access_block_id << 4;
1014 return row_id * 128 + swizzle_col_id;
1032 return (extent[1] / kInterleavedK) * stride_[0];
1039 template <
int ElementSize,
int InterleavedK>
1043 static int const kRank = 2;
1046 static int const kStrideRank = 1;
1065 static int const kAccessSize = 128;
1071 static int const kElementSize = ElementSize;
1075 static int const kInterleavedK = InterleavedK;
1109 int const rows_per_smem_cache_line = 128 / kInterleavedK;
1111 int row_id = coord.
strided() / rows_per_smem_cache_line;
1112 int col_id = (coord.
strided() % rows_per_smem_cache_line) * kInterleavedK + coord.
contiguous();
1114 int access_block_id = col_id >> 4;
1115 int swizzle_access_block_id = access_block_id ^ (row_id & 1);
1117 int swizzle_col_id = swizzle_access_block_id << 4;
1119 return row_id * 128 + swizzle_col_id;
1137 return (extent[0] / kInterleavedK) * stride_[0];
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:434
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:460
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:240
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:791
CUTLASS_HOST_DEVICE Index const & column() const
Returns the column of the coordinate.
Definition: matrix_coord.h:85
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:412
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:944
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:670
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:640
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:888
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:137
Coordinate in pitch-linear space.
Definition: pitch_linear.h:52
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:762
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:863
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:719
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:431
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:321
CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:691
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:536
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:910
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:803
Definition: tensor_op_multiplicand_sm75.h:734
static int const kRank
Logical rank of tensor.
Definition: tensor_op_multiplicand_sm75.h:48
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:539
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:287
CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:992
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:54
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:459
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:221
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:1052
static CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:590
Definition: tensor_op_multiplicand_sm75.h:422
static CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:896
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:659
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:843
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:555
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:742
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:603
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:451
Definition: tensor_op_multiplicand_sm75.h:835
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:57
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:695
Definition: tensor_op_multiplicand_sm75.h:213
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:249
CUTLASS_HOST_DEVICE Index const & row() const
Returns the row of the coordinate.
Definition: matrix_coord.h:77
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:303
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCrosswise(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:892
static int const kTileShapeContiguous
Definition: tensor_op_multiplicand_sm75.h:78
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:820
static CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:1101
static CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:996
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:250
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:917
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:825
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:1025
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:787
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:947
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:224
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:564
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:407
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:281
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:724
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:761
CUTLASS_HOST_DEVICE TensorOpMultiplicandColumnMajorInterleaved(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:988
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:450
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:816
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:846
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:517
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:294
static int const kElementsPerAccess
Definition: tensor_op_multiplicand_sm75.h:73
static int const kStrided
Definition: pitch_linear.h:45
int32_t Index
Index type used for coordinates.
Definition: tensor_op_multiplicand_sm75.h:1049
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:745
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:871
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:586
Definition: tensor_op_multiplicand_sm75.h:46
static int const kContiguous
Definition: pitch_linear.h:44
Template based on element size (in bits) - defined in terms of pitch-linear memory.
Definition: tensor_op_multiplicand_sm75.h:935
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:597
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:771
CUTLASS_HOST_DEVICE TensorOpMultiplicand(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:126
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:477
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:1031
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:266
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:388
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:904
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:1124
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:274
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:324
CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:481
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:505
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:671
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:926
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:921
CUTLASS_HOST_DEVICE Index const & contiguous() const
Returns the contiguous dimension.
Definition: pitch_linear.h:89
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:1003
CUTLASS_HOST_DEVICE RowMajorTensorOpMultiplicandCongruous(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:582
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:511
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:610
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:556
static CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:485
int64_t LongIndex
Long index type used for offsets.
Definition: tensor_op_multiplicand_sm75.h:643
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:715
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:194
CUTLASS_HOST_DEVICE TensorOpMultiplicandCrosswise(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:687
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:708
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:270
Template based on element size (in bits) - defined in terms of pitch-linear memory.
Definition: tensor_op_multiplicand_sm75.h:1040
CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:1093
static CUTLASS_HOST_DEVICE ColumnMajorTensorOpMultiplicandCrosswise packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:795
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:565
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:198
typename Base::TileShape TileShape
Definition: tensor_op_multiplicand_sm75.h:862
static CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:381
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:298
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:616
static int const kCrosswise
Definition: tensor_op_multiplicand_sm75.h:74
static int const kTileShapeStride
Definition: tensor_op_multiplicand_sm75.h:87
Defines a canonical coordinate for rank=2 matrices offering named indices.
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:1019
CUTLASS_HOST_DEVICE TensorOpMultiplicand(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:122
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:809
static int const kElementSize
Definition: tensor_op_multiplicand_sm75.h:72
static int const kFactor
Number of kblocks to store PartitionShape::kContiguous Elements.
Definition: tensor_op_multiplicand_sm75.h:81
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Definition: tensor_op_multiplicand_sm75.h:203
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:660
typename Base::PartitionShape PartitionShape
Definition: tensor_op_multiplicand_sm75.h:241
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.
CUTLASS_HOST_DEVICE Stride stride() const
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:403
CUTLASS_HOST_DEVICE TensorCoord inverse(LongIndex offset) const
Inverse of layout function, mapping linear offset to logical coordinate.
Definition: tensor_op_multiplicand_sm75.h:498
typename Base::AccessCount AccessCount
Definition: tensor_op_multiplicand_sm75.h:872
Definition: tensor_op_multiplicand_sm75.h:632
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:1136
CUTLASS_HOST_DEVICE TensorOpMultiplicandRowMajorInterleaved(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:1097
static int const kStrideRank
Rank of stride vector.
Definition: tensor_op_multiplicand_sm75.h:51
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:702
static CUTLASS_HOST_DEVICE TensorOpMultiplicand packed(TensorCoord const &extent)
Helper returns a layout to a tightly packed tensor.
Definition: tensor_op_multiplicand_sm75.h:130
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:492
Basic include for CUTLASS.
Definition: matrix_coord.h:39
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Index ldm=0)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:373
CUTLASS_HOST_DEVICE Index const & strided() const
Returns the column of the coordinate.
Definition: pitch_linear.h:97
CUTLASS_HOST_DEVICE LongIndex capacity(TensorCoord const &extent) const
Compute the number of contiguous elements needed to store a tensor with the given size...
Definition: tensor_op_multiplicand_sm75.h:622
CUTLASS_HOST_DEVICE LongIndex operator()(TensorCoord const &coord) const
Definition: tensor_op_multiplicand_sm75.h:1108
CUTLASS_HOST_DEVICE Stride & stride()
Returns the stride of the layout.
Definition: tensor_op_multiplicand_sm75.h:1130
typename Base::PartitionCount PartitionCount
Definition: tensor_op_multiplicand_sm75.h:770
static int const kAccessSize
This layout is optimized for 128b accesses.
Definition: tensor_op_multiplicand_sm75.h:70
CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous(Stride stride)
Ctor.
Definition: tensor_op_multiplicand_sm75.h:377
Definition: tensor_op_multiplicand_sm75.h:527