cutlass/regular__tile__iterator__pitch__linear_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/cutlass.h"
 #include "cutlass/tensor_ref.h"
 #include "cutlass/layout/matrix.h"
 #include "cutlass/layout/pitch_linear.h"

 #include "regular_tile_iterator.h"


 namespace cutlass {
 namespace transform {
 namespace threadblock {


 template <
   typename Shape_,
   typename Element_,
   int AdvanceRank,
   typename ThreadMap_,
   int Alignment
 >
 class RegularTileIterator<Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment> {
 public:

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::PitchLinear;
   static int const kAdvanceRank = AdvanceRank;
   using ThreadMap = ThreadMap_;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

   static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
     "Advance rank may only be along the contiguous or strided dimensions.");

 private:

   //
   // Types
   //

   using AccessType = AlignedArray<Element, ThreadMap::kElementsPerAccess, kAlignment>;

   //
   // Data members
   //

   uint8_t *pointer_;

   Index stride_;

   Index increment_strided_;

   Index increment_advance_;

 public:

   CUTLASS_DEVICE
   RegularTileIterator(): pointer_(nullptr), increment_strided_(0), increment_advance_(0) { }

   CUTLASS_DEVICE
   RegularTileIterator(
     TensorRef const &ref,
     int thread_idx
   ):
     pointer_(reinterpret_cast<uint8_t *>(ref.data()) + (ref.offset(ThreadMap::initial_offset(thread_idx)) * sizeof_bits<Element>::value / 8)) {

     stride_ = ref.stride()[0];
     increment_strided_ = (ref.stride()[0] * sizeof_bits<Element>::value) * ThreadMap::Delta::kStrided / 8;

     increment_advance_ =
       (kAdvanceRank == 0 ?
         Shape::kContiguous * sizeof_bits<Element>::value / 8 :
         Shape::kStrided * (ref.stride()[0] * sizeof_bits<Element>::value / 8));
   }

   CUTLASS_DEVICE
   void load_with_pointer_offset(Fragment &frag, Index pointer_offset) {

     AccessType *frag_ptr = reinterpret_cast<AccessType *>(&frag);
     uint8_t const *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;

     CUTLASS_PRAGMA_UNROLL
     for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {

       AccessType const *access_ptr = reinterpret_cast<AccessType const *>(byte_pointer);

       CUTLASS_PRAGMA_UNROLL
       for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {

         int idx = c + s * ThreadMap::Iterations::kContiguous;
         frag_ptr[idx] = access_ptr[c * ThreadMap::Delta::kContiguous];
       }

       if (s + 1 < ThreadMap::Iterations::kStrided) {
         byte_pointer += increment_strided_;
       }
     }
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag, TensorCoord const & tile_offset) {
     load_with_pointer_offset(
       frag,
       tile_offset.contiguous() * Shape::kContiguous / ThreadMap::kElementsPerAccess +
         tile_offset.strided() * Shape::kStrided * stride_
     );
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag) {
     load_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   void store_with_pointer_offset(Fragment const &frag, Index pointer_offset) {

     AccessType const *frag_ptr = reinterpret_cast<AccessType const*>(&frag);
     uint8_t *byte_pointer = pointer_ + pointer_offset * sizeof_bits<Element>::value / 8;

     CUTLASS_PRAGMA_UNROLL
     for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) {

       AccessType *access_ptr = reinterpret_cast<AccessType *>(byte_pointer);

       CUTLASS_PRAGMA_UNROLL
       for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) {

         int idx = c + s * ThreadMap::Iterations::kContiguous;
         access_ptr[c * ThreadMap::Delta::kContiguous] = frag_ptr[idx];
       }

       if (s + 1 < ThreadMap::Iterations::kStrided) {
         byte_pointer += increment_strided_;
       }
     }
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag, TensorCoord const & tile_offset) {
     store_with_pointer_offset(
       frag,
       tile_offset.contiguous() * Shape::kContiguous + tile_offset.strided() * Shape::kStrided * stride_
     );
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag) {
     store_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator++() {
     pointer_ += increment_advance_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator--() {
     pointer_ -= increment_advance_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     pointer_ += pointer_offset;
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     int offset = sizeof_bits<Element>::value *
         (coord.contiguous() * Shape::kContiguous + coord.strided() * Shape::kStrided * stride_) / 8;
     add_pointer_offset(offset);
   }

 };


 template <
   typename Shape_,
   typename Element_,
   int AdvanceRank,
   typename ThreadMap_,
   int Alignment
 >
 class RegularTileIterator<Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment> {
 public:

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::RowMajor;
   static int const kAdvanceRank = AdvanceRank;
   using ThreadMap = ThreadMap_;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

   using Underlying = RegularTileIterator<
     layout::PitchLinearShape<Shape::kColumn, Shape::kRow>,
     Element,
     layout::PitchLinear,
     (kAdvanceRank == 0 ? 1 : 0),
     ThreadMap,
     kAlignment
   >;

   static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
     "Advance rank may only be along the row or column dimensions.");

 private:

   Underlying iterator_;

 public:

   CUTLASS_DEVICE
   RegularTileIterator() { }

   CUTLASS_DEVICE
   RegularTileIterator(
     TensorRef const &ref,
     int thread_idx
   ):
     iterator_({ref.data(), ref.stride()}, thread_idx) {

   }

   CUTLASS_HOST_DEVICE
   void load_with_pointer_offset(Fragment &frag, Index pointer_offset) {
     iterator_.load_with_pointer_offset(frag, pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag, TensorCoord const & tile_offset) {
     iterator_.load_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag) {
     iterator_.load_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   void store_with_pointer_offset(Fragment const &frag, Index pointer_offset) {
     iterator_.store_with_pointer_offset(frag, pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag, TensorCoord const & tile_offset) {
     iterator_.store_with_pointer_offset(frag, {tile_offset.column(), tile_offset.row()});
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag) {
     iterator_.store_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator++() {
     ++iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator--() {
     --iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     iterator_.add_pointer_offset(pointer_offset);
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     iterator_.add_tile_offset({coord.column(), coord.row()});
   }

 };


 template <
   typename Shape_,
   typename Element_,
   int AdvanceRank,
   typename ThreadMap_,
   int Alignment
 >
 class RegularTileIterator<Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment> {
 public:

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::ColumnMajor;
   static int const kAdvanceRank = AdvanceRank;
   using ThreadMap = ThreadMap_;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using Fragment = Array<Element, ThreadMap::Iterations::kCount * ThreadMap::kElementsPerAccess>;

   using Underlying = RegularTileIterator<
     layout::PitchLinearShape<Shape::kRow, Shape::kColumn>,
     Element,
     layout::PitchLinear,
     (kAdvanceRank == 0 ? 0 : 1),
     ThreadMap
   >;

   static_assert(kAdvanceRank == 0 || kAdvanceRank == 1,
     "Advance rank may only be along the row or column dimensions.");

 private:

   Underlying iterator_;

 public:

   CUTLASS_DEVICE
   RegularTileIterator() { }

   CUTLASS_DEVICE
   RegularTileIterator(
     TensorRef const &ref,
     int thread_idx
   ):
     iterator_({ref.data(), ref.stride()}, thread_idx) {

   }

   CUTLASS_HOST_DEVICE
   void load_with_pointer_offset(Fragment &frag, Index pointer_offset) {
     iterator_.load_with_pointer_offset(frag, pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag, TensorCoord const & tile_offset) {
     iterator_.load_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
   }

   CUTLASS_HOST_DEVICE
   void load(Fragment &frag) {
     iterator_.load_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   void store_with_pointer_offset(Fragment const &frag, Index pointer_offset) {
     iterator_.store_with_pointer_offset(frag, pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag, TensorCoord const & tile_offset) {
     iterator_.store_with_pointer_offset(frag, {tile_offset.row(), tile_offset.column()});
   }

   CUTLASS_HOST_DEVICE
   void store(Fragment const &frag) {
     iterator_.store_with_pointer_offset(frag, 0);
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator++() {
     ++iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileIterator &operator--() {
     --iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     iterator_.add_pointer_offset(pointer_offset);
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     iterator_.add_tile_offset({coord.row(), coord.column()});
   }

 };


 } // namespace threadblock
 } // namespace transform
 } // namespace cutlass

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:284

cutlass::layout::RowMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:62

cutlass
Definition: aligned_buffer.h:35

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:468

cutlass::layout::PitchLinearCoord
Coordinate in pitch-linear space.
Definition: pitch_linear.h:52

tensor_ref.h
Defines a structure containing strides, bounds, and a pointer to tensor data.

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:321

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator--
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:216

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:223

cutlass::TensorRef::data
CUTLASS_HOST_DEVICE Element * data() const
Returns the pointer to referenced data.
Definition: tensor_ref.h:254

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:71

cutlass::layout::PitchLinear
Mapping function for pitch-linear memory.
Definition: pitch_linear.h:163

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:111

cutlass::layout::ColumnMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:154

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:474

cutlass::AlignedArray
Aligned array type.
Definition: array.h:511

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:64

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:373

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:424

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:333

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:63

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:405

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:261

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:430

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:74

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:303

cutlass::layout::ColumnMajor
Mapping function for column-major matrices.
Definition: layout/matrix.h:142

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:67

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:408

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:385

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:418

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Fragment
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:76

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:448

cutlass::layout::PitchLinearShape
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43

CUTLASS_PRAGMA_UNROLL
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:169

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset
CUTLASS_HOST_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:297

cutlass::layout::RowMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:59

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator(TensorRef const &ref, int thread_idx)
Definition: regular_tile_iterator_pitch_linear.h:287

cutlass::layout::PitchLinear::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: pitch_linear.h:175

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_iterator_pitch_linear.h:383

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:380

cutlass::TensorRef::stride
CUTLASS_HOST_DEVICE Stride stride() const
Returns the layout object&#39;s stride vector.
Definition: tensor_ref.h:277

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:353

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:454

cutlass::sizeof_bits
Defines the size of an element in bits.
Definition: numeric_types.h:42

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:163

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag, TensorCoord const &tile_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:153

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileIterator & operator++()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:209

nullptr
#define nullptr
nullptr
Definition: platform.h:144

cutlass::TensorRef< Element, Layout >

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:194

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:436

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator--
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:340

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag, TensorCoord const &tile_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:442

CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:327

cutlass::transform::threadblock::RegularTileIterator
Definition: regular_tile_iterator.h:50

static_assert
#define static_assert(__e, __m)
Definition: platform.h:153

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:254

cutlass::layout::PitchLinear::Index
int32_t Index
Index type used for coordinates.
Definition: pitch_linear.h:172

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_iterator_pitch_linear.h:229

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:372

cutlass::layout::RowMajor
Mapping function for row-major matrices.
Definition: layout/matrix.h:50

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::store_with_pointer_offset
CUTLASS_HOST_DEVICE void store_with_pointer_offset(Fragment const &frag, Index pointer_offset)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:315

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::store
CUTLASS_HOST_DEVICE void store(Fragment const &frag)
Stores a fragment.
Definition: regular_tile_iterator_pitch_linear.h:203

regular_tile_iterator.h
Templates implementing storing of tiles from pitch-linear rank=2 tensors.

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileIterator
CUTLASS_DEVICE RegularTileIterator()
Definition: regular_tile_iterator_pitch_linear.h:108

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_iterator_pitch_linear.h:347

matrix.h
Defines layout functions used by TensorRef and derived classes.

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_iterator_pitch_linear.h:250

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::load
CUTLASS_HOST_DEVICE void load(Fragment &frag)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:309

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:257

pitch_linear.h
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.

cutlass::layout::ColumnMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:151

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:379

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator--
CUTLASS_HOST_DEVICE RegularTileIterator & operator--()
Advances the pointer.
Definition: regular_tile_iterator_pitch_linear.h:461

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_iterator_pitch_linear.h:70

cutlass.h
Basic include for CUTLASS.

cutlass::MatrixCoord
Definition: matrix_coord.h:39

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_iterator_pitch_linear.h:251

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_iterator_pitch_linear.h:258

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::load_with_pointer_offset
CUTLASS_DEVICE void load_with_pointer_offset(Fragment &frag, Index pointer_offset)
Loads a fragment.
Definition: regular_tile_iterator_pitch_linear.h:128

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_iterator_pitch_linear.h:376

cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Fragment
Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > Fragment
Definition: regular_tile_iterator_pitch_linear.h:263