cutlass/regular__tile__access__iterator__pitch__linear_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/cutlass.h"
 #include "cutlass/array.h"
 #include "cutlass/layout/pitch_linear.h"
 #include "cutlass/layout/matrix.h"
 #include "cutlass/matrix_coord.h"
 #include "cutlass/matrix_shape.h"
 #include "cutlass/tensor_ref.h"

 #include "cutlass/transform/threadblock/regular_tile_access_iterator.h"


 namespace cutlass {
 namespace transform {
 namespace threadblock {


 template <typename Shape_, typename Element_, int AdvanceRank,
           typename ThreadMap_, int Alignment>
 class RegularTileAccessIterator<
     Shape_, Element_,
     layout::PitchLinear,
     AdvanceRank, ThreadMap_, Alignment> {
  public:
   static_assert(
       AdvanceRank == 0 || AdvanceRank == 1,
       "Specialization for pitch-linear iterator may along advance along the "
       "contiguous(rank=0) or strided(rank=1) dimension.");

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::PitchLinear;
   static int const kAdvanceRank = AdvanceRank;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using ThreadMap = ThreadMap_;

   using AccessType = Array<Element, ThreadMap::kElementsPerAccess>;

  private:
   //
   // Data members
   //

   Index stride_;

   AccessType *pointer_;

   Index byte_offset_;

   int iteration_contiguous_;

   int iteration_strided_;

  public:
   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator(TensorRef ref,
                             int thread_id
                             )
       : stride_(ref.stride(0) / ThreadMap::kElementsPerAccess),
         byte_offset_(0) {

     layout::PitchLinearCoord thread_offset_base = ThreadMap::initial_offset(thread_id);

     // initialize pointer
     pointer_ = reinterpret_cast<AccessType *>(ref.data() + ref.offset(thread_offset_base));

     set_iteration_index(0);
   }

   CUTLASS_HOST_DEVICE
   void set_iteration_index(int index) {
     iteration_contiguous_ = index % ThreadMap::Iterations::kContiguous;
     iteration_strided_ = index / ThreadMap::Iterations::kContiguous;
   }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     byte_offset_ += pointer_offset * sizeof(Element);
   }

   CUTLASS_DEVICE
   AccessType *get() const {

     AccessType *access_ptr = pointer_;

     int access_offset = iteration_strided_ * ThreadMap::Delta::kStrided * stride_ +
                         iteration_contiguous_ * ThreadMap::Delta::kContiguous /
                             ThreadMap::kElementsPerAccess;

     char *access_byte_ptr =
         reinterpret_cast<char *>(access_ptr + access_offset);

     return reinterpret_cast<AccessType *>(access_byte_ptr + byte_offset_);
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator &operator++() {
     ++iteration_contiguous_;

     if (iteration_contiguous_ < ThreadMap::Iterations::kContiguous)
       return *this;

     // Enter here only if (iteration_contiguous_ ==
     // ThreadMap::Iteration::kContiguous)
     iteration_contiguous_ = 0;
     ++iteration_strided_;

     if (iteration_strided_ < ThreadMap::Iterations::kStrided) {
       return *this;
     }

     // Enter here only if (iteration_stride_ == ThreadMap::Iteration::kStrided)
     // which means we enter the next tile.
     iteration_strided_ = 0;

     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator operator++(int) {
     RegularTileAccessIterator prev(*this);
     this->operator++();

     return prev;
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     add_pointer_offset(coord.contiguous() * Shape::kContiguous +
                        coord.strided() * Shape::kStrided * stride_ *
                            ThreadMap::kElementsPerAccess);
   }
 };


 template <typename Shape_, typename Element_, int AdvanceRank,
           typename ThreadMap_, int Alignment>
 class RegularTileAccessIterator<
     Shape_, Element_,
     layout::ColumnMajor,
     AdvanceRank, ThreadMap_, Alignment> {
  public:
   static_assert(
       AdvanceRank == 0 || AdvanceRank == 1,
       "Specialization for pitch-linear iterator may along advance along the "
       "contiguous(rank=0) or strided(rank=1) dimension.");

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::ColumnMajor;
   static int const kAdvanceRank = AdvanceRank;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using ThreadMap = ThreadMap_;

   using UnderlyingIterator = RegularTileAccessIterator<
       layout::PitchLinearShape<Shape::kRow, Shape::kColumn>, Element,
       layout::PitchLinear,
       (kAdvanceRank == 0 ? 0 : 1),
       ThreadMap_>;

   using AccessType = typename UnderlyingIterator::AccessType;

  private:

   UnderlyingIterator iterator_;

  public:
   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator(TensorRef ref,
                             int thread_id
                             )
       : iterator_({ref.data(), ref.stride()}, thread_id) {}

   CUTLASS_HOST_DEVICE
   void set_iteration_index(int index) { iterator_.set_iteration_index(index); }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     iterator_.add_pointer_offset(pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   AccessType *get() const {
     return reinterpret_cast<AccessType *>(iterator_.get());
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     iterator_.add_tile_offset({coord.row(), coord.column()});
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator &operator++() {
     ++iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator operator++(int) {
     RegularTileAccessIterator prev(*this);
     ++iterator_;

     return prev;
   }
 };


 template <typename Shape_, typename Element_, int AdvanceRank,
           typename ThreadMap_, int Alignment>
 class RegularTileAccessIterator<
     Shape_, Element_,
     layout::RowMajor,
     AdvanceRank, ThreadMap_, Alignment> {
  public:
   static_assert(
       AdvanceRank == 0 || AdvanceRank == 1,
       "Specialization for pitch-linear iterator may along advance along the "
       "contiguous(rank=0) or strided(rank=1) dimension.");

   using Shape = Shape_;
   using Element = Element_;
   using Layout = layout::RowMajor;
   static int const kAdvanceRank = AdvanceRank;
   static int const kAlignment = Alignment;

   using Index = typename Layout::Index;
   using LongIndex = typename Layout::LongIndex;

   using TensorRef = TensorRef<Element, Layout>;
   using TensorCoord = typename Layout::TensorCoord;

   using ThreadMap = ThreadMap_;

   using UnderlyingIterator = RegularTileAccessIterator<
       layout::PitchLinearShape<Shape::kColumn, Shape::kRow>, Element,
       layout::PitchLinear,
       (kAdvanceRank == 0 ? 1 : 0),
       ThreadMap_>;

   using AccessType = typename UnderlyingIterator::AccessType;

  private:

   UnderlyingIterator iterator_;

  public:
   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator(TensorRef ref,
                             int thread_id
                             )
       : iterator_({ref.data(), ref.stride()}, thread_id) {}

   CUTLASS_HOST_DEVICE
   void set_iteration_index(int index) { iterator_.set_iteration_index(index); }

   CUTLASS_HOST_DEVICE
   void add_pointer_offset(LongIndex pointer_offset) {
     iterator_.add_pointer_offset(pointer_offset);
   }

   CUTLASS_HOST_DEVICE
   AccessType *get() const {
     return reinterpret_cast<AccessType *>(iterator_.get());
   }

   CUTLASS_DEVICE
   void add_tile_offset(TensorCoord const &coord) {
     iterator_.add_tile_offset({coord.column(), coord.row()});
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator &operator++() {
     ++iterator_;
     return *this;
   }

   CUTLASS_HOST_DEVICE
   RegularTileAccessIterator operator++(int) {
     RegularTileAccessIterator prev(*this);
     ++iterator_;

     return prev;
   }
 };


 }  // namespace threadblock
 }  // namespace transform
 }  // namespace cutlass

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_access_iterator_pitch_linear.h:76

cutlass::layout::RowMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:62

cutlass
Definition: aligned_buffer.h:35

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_access_iterator_pitch_linear.h:187

cutlass::layout::PitchLinearCoord
Coordinate in pitch-linear space.
Definition: pitch_linear.h:52

tensor_ref.h
Defines a structure containing strides, bounds, and a pointer to tensor data.

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_access_iterator_pitch_linear.h:366

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator operator++(int)
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:178

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::AccessType
Array< Element, ThreadMap::kElementsPerAccess > AccessType
Element type per access.
Definition: regular_tile_access_iterator_pitch_linear.h:84

cutlass::TensorRef::data
CUTLASS_HOST_DEVICE Element * data() const
Returns the pointer to referenced data.
Definition: tensor_ref.h:254

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_access_iterator_pitch_linear.h:322

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::set_iteration_index
CUTLASS_HOST_DEVICE void set_iteration_index(int index)
Overrides the internal iteration index.
Definition: regular_tile_access_iterator_pitch_linear.h:253

cutlass::layout::PitchLinear
Mapping function for pitch-linear memory.
Definition: pitch_linear.h:163

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_access_iterator_pitch_linear.h:216

cutlass::layout::ColumnMajor::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: layout/matrix.h:154

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset
CUTLASS_DEVICE void add_tile_offset(TensorCoord const &coord)
Adds a tile offset.
Definition: regular_tile_access_iterator_pitch_linear.h:269

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_access_iterator_pitch_linear.h:324

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::AccessType
typename UnderlyingIterator::AccessType AccessType
Definition: regular_tile_access_iterator_pitch_linear.h:333

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_access_iterator_pitch_linear.h:354

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_access_iterator_pitch_linear.h:222

cutlass::layout::ColumnMajor
Mapping function for column-major matrices.
Definition: layout/matrix.h:142

cutlass::layout::PitchLinearShape
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43

array.h
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

cutlass::layout::RowMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:59

cutlass::operator++
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694

cutlass::layout::PitchLinear::LongIndex
int64_t LongIndex
Long index type used for offsets.
Definition: pitch_linear.h:175

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator operator++(int)
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:379

cutlass::TensorRef::stride
CUTLASS_HOST_DEVICE Stride stride() const
Returns the layout object&#39;s stride vector.
Definition: tensor_ref.h:277

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator & operator++()
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:154

matrix_shape.h
Defines a Shape template for matrix tiles.

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_access_iterator_pitch_linear.h:227

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileAccessIterator
CUTLASS_HOST_DEVICE RegularTileAccessIterator(TensorRef ref, int thread_id)
Construct a TileIterator with zero threadblock offset.
Definition: regular_tile_access_iterator_pitch_linear.h:343

cutlass::TensorRef< Element, Layout >

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_access_iterator_pitch_linear.h:70

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator & operator++()
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:372

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_access_iterator_pitch_linear.h:69

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::AccessType
typename UnderlyingIterator::AccessType AccessType
Definition: regular_tile_access_iterator_pitch_linear.h:236

CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89

cutlass::TensorRef::offset
CUTLASS_HOST_DEVICE LongIndex offset(TensorCoord const &coord) const
Computes the offset of an index from the origin of the tensor.
Definition: tensor_ref.h:301

static_assert
#define static_assert(__e, __m)
Definition: platform.h:153

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::RegularTileAccessIterator
CUTLASS_HOST_DEVICE RegularTileAccessIterator(TensorRef ref, int thread_id)
Construct a TileIterator with zero threadblock offset.
Definition: regular_tile_access_iterator_pitch_linear.h:109

cutlass::layout::PitchLinear::Index
int32_t Index
Index type used for coordinates.
Definition: pitch_linear.h:172

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::set_iteration_index
CUTLASS_HOST_DEVICE void set_iteration_index(int index)
Overrides the internal iteration index.
Definition: regular_tile_access_iterator_pitch_linear.h:125

regular_tile_access_iterator.h
Templates implementing the address computation of storing of tiles from pitch-linear rank=2 tensors...

cutlass::layout::RowMajor
Mapping function for row-major matrices.
Definition: layout/matrix.h:50

matrix_coord.h
Defines a canonical coordinate for rank=2 matrices offering named indices.

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_access_iterator_pitch_linear.h:215

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Shape
Shape_ Shape
Definition: regular_tile_access_iterator_pitch_linear.h:312

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator & operator++()
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:275

cutlass::transform::threadblock::RegularTileAccessIterator
Definition: regular_tile_access_iterator.h:48

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_access_iterator_pitch_linear.h:75

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_access_iterator_pitch_linear.h:318

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::RegularTileAccessIterator
CUTLASS_HOST_DEVICE RegularTileAccessIterator(TensorRef ref, int thread_id)
Construct a TileIterator with zero threadblock offset.
Definition: regular_tile_access_iterator_pitch_linear.h:246

matrix.h
Defines layout functions used by TensorRef and derived classes.

pitch_linear.h
Defines layout functions used by TensorRef and derived classes for pitch-linear memory.

cutlass::layout::ColumnMajor::Index
int32_t Index
Index type used for coordinates.
Definition: layout/matrix.h:151

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::operator++
CUTLASS_HOST_DEVICE RegularTileAccessIterator operator++(int)
Advances to the next tile in memory.
Definition: regular_tile_access_iterator_pitch_linear.h:282

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_access_iterator_pitch_linear.h:225

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::set_iteration_index
CUTLASS_HOST_DEVICE void set_iteration_index(int index)
Overrides the internal iteration index.
Definition: regular_tile_access_iterator_pitch_linear.h:350

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_access_iterator_pitch_linear.h:132

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::Element
Element_ Element
Definition: regular_tile_access_iterator_pitch_linear.h:313

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset
CUTLASS_HOST_DEVICE void add_pointer_offset(LongIndex pointer_offset)
Adds a pointer offset in units of Element.
Definition: regular_tile_access_iterator_pitch_linear.h:257

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajor, AdvanceRank, ThreadMap_, Alignment >::Index
typename Layout::Index Index
Definition: regular_tile_access_iterator_pitch_linear.h:221

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::RowMajor, AdvanceRank, ThreadMap_, Alignment >::LongIndex
typename Layout::LongIndex LongIndex
Definition: regular_tile_access_iterator_pitch_linear.h:319

cutlass.h
Basic include for CUTLASS.

cutlass::MatrixCoord
Definition: matrix_coord.h:39

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::ThreadMap
ThreadMap_ ThreadMap
Definition: regular_tile_access_iterator_pitch_linear.h:81

cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, Alignment >::TensorCoord
typename Layout::TensorCoord TensorCoord
Definition: regular_tile_access_iterator_pitch_linear.h:79