CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
fragment_iterator_simt.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
38 #pragma once
39 
40 #include "cutlass/array.h"
41 #include "cutlass/layout/matrix.h"
42 
44 
46 
47 namespace cutlass {
48 namespace epilogue {
49 namespace warp {
50 
52 
54 template <
55  typename WarpShape,
56  typename Operator,
57  typename Layout,
58  typename MmaSimtPolicy
59 >
61 
63 
65 template <
66  typename WarpShape_,
67  typename Operator_ ,
68  typename MmaSimtPolicy_
69 >
70 class FragmentIteratorSimt<WarpShape_, Operator_, layout::RowMajor, MmaSimtPolicy_> {
71 public:
72 
73  using WarpShape = WarpShape_;
74  using Operator = Operator_;
75  using Layout = layout::RowMajor;
76 
79 
81  using Fragment = Array<
82  typename Operator::ElementC,
83  Policy::kElementsPerIteration>;
84 
86  using AccumulatorTile = Array<
87  typename Operator::ElementC,
88  Policy::kAccumulatorElementCount>;
89 
91 
93  static int const kIterations = Policy::kIterations;
94 
95 private:
96 
98  using AccessType = Array<typename Operator::ElementC, Policy::kElementsPerAccess>;
99 
100 private:
101 
102  //
103  // Data members
104  //
105 
107  AccessType const *accumulators_;
108 
110  int index_;
111 
112 public:
113 
117  accumulators_(reinterpret_cast<AccessType const *>(&accum)),
118  index_(0) {
119 
120  }
121 
125  ++index_;
126  return *this;
127  }
128 
132  --index_;
133  return *this;
134  }
135 
138  void load(Fragment &frag, int index_offset = 0) const {
139 
140  AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);
141 
143  for (int n = 0; n < Policy::kAccessesPerIteration; ++n) {
144 
145  int accumulator_access_offset = index_ * Policy::kAccessesPerIteration + n;
146 
147  frag_ptr[n] = accumulators_[accumulator_access_offset];
148  }
149  }
150 };
151 
153 
154 } // namespace warp
155 } // namespace epilogue
156 } // namespace cutlass
157 
Definition: aligned_buffer.h:35
Definition: simt_policy.h:50
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Fragment iterator for SIMT accumulator arrangements.
Definition: fragment_iterator_simt.h:60
CUTLASS_HOST_DEVICE void load(Fragment &frag, int index_offset=0) const
Loads a fragment from the referenced part of the accumulator tile.
Definition: fragment_iterator_simt.h:138
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Array< typename Operator::ElementC, Policy::kAccumulatorElementCount > AccumulatorTile
This is the complete warp-level accumulator tile.
Definition: fragment_iterator_simt.h:88
Array< typename Operator::ElementC, Policy::kElementsPerIteration > Fragment
This is the fragment size produced by one access of the iterator.
Definition: fragment_iterator_simt.h:83
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator--()
Decrements.
Definition: fragment_iterator_simt.h:131
Mapping function for row-major matrices.
Definition: layout/matrix.h:50
CUTLASS_HOST_DEVICE FragmentIteratorSimt(AccumulatorTile const &accum)
Constructs an iterator.
Definition: fragment_iterator_simt.h:116
Defines layout functions used by TensorRef and derived classes.
Defines basic structures needed for implementing the warp-scoped phase of the epilogue. These quantities assume a &#39;column-major&#39; arrangement of SimtOp instructions, of which a row-oriented slice is visible per iteration.
CUTLASS_HOST_DEVICE FragmentIteratorSimt & operator++()
Increments.
Definition: fragment_iterator_simt.h:124