58 typename MmaSimtPolicy
68 typename MmaSimtPolicy_
74 using Operator = Operator_;
82 typename Operator::ElementC,
83 Policy::kElementsPerIteration>;
87 typename Operator::ElementC,
88 Policy::kAccumulatorElementCount>;
93 static int const kIterations = Policy::kIterations;
98 using AccessType = Array<typename Operator::ElementC, Policy::kElementsPerAccess>;
107 AccessType
const *accumulators_;
117 accumulators_(reinterpret_cast<AccessType const *>(&accum)),
140 AccessType *frag_ptr =
reinterpret_cast<AccessType *
>(&frag);
143 for (
int n = 0; n < Policy::kAccessesPerIteration; ++n) {
145 int accumulator_access_offset = index_ * Policy::kAccessesPerIteration + n;
147 frag_ptr[n] = accumulators_[accumulator_access_offset];
Definition: aligned_buffer.h:35
AccumulatorTile OutputAccumulatorTile
Definition: fragment_iterator_simt.h:90
Definition: simt_policy.h:50
WarpShape_ WarpShape
Definition: fragment_iterator_simt.h:73
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Fragment iterator for SIMT accumulator arrangements.
Definition: fragment_iterator_simt.h:60
CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const
Loads a fragment from the referenced part of the accumulator tile.
Definition: fragment_iterator_simt.h:138
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Array< typename Operator::ElementC, Policy::kAccumulatorElementCount > AccumulatorTile
This is the complete warp-level accumulator tile.
Definition: fragment_iterator_simt.h:88
Array< typename Operator::ElementC, Policy::kElementsPerIteration > Fragment
This is the fragment size produced by one access of the iterator.
Definition: fragment_iterator_simt.h:83
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator--()
Decrements.
Definition: fragment_iterator_simt.h:131
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE FragmentIteratorSimt(AccumulatorTile const &accum)
Constructs an iterator.
Definition: fragment_iterator_simt.h:116
Defines layout functions used by TensorRef and derived classes.
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a 'column-major' arrangement of SimtOp instructions, of which a row-oriented slice is visible per iteration.
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator++()
Increments.
Definition: fragment_iterator_simt.h:124