CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Static Public Attributes | List of all members
cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment > Class Template Reference

#include <regular_tile_access_iterator_tensor_op.h>

Public Types

using Shape = Shape_
 
using Element = Element_
 
using Layout = layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>
 
using Index = typename Layout::Index
 
using LongIndex = typename Layout::LongIndex
 
using TensorRef = TensorRef< Element, Layout >
 
using TensorCoord = typename Layout::TensorCoord
 
using ThreadMap = ThreadMap_
 
using UnderlyingIterator = RegularTileAccessIterator< layout::PitchLinearShape< Shape::kRow, Shape::kColumn >, Element, layout::TensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>,(kAdvanceRank==0?0:1), ThreadMap_ >
 Underlying iterator type. More...
 
using AccessType = typename UnderlyingIterator::AccessType
 

Public Member Functions

CUTLASS_HOST_DEVICE RegularTileAccessIterator (TensorRef ref, int thread_id)
 Construct a TileIterator with zero threadblock offset. More...
 
CUTLASS_HOST_DEVICE void set_iteration_index (int index)
 Overrides the internal iteration index. More...
 
CUTLASS_HOST_DEVICE void add_pointer_offset (LongIndex pointer_offset)
 Adds a pointer offset in units of Element. More...
 
CUTLASS_HOST_DEVICE AccessTypeget () const
 Returns a pointer. More...
 
CUTLASS_DEVICE void add_tile_offset (TensorCoord const &coord)
 Adds a tile offset. More...
 
CUTLASS_HOST_DEVICE RegularTileAccessIteratoroperator++ ()
 Advances to the next tile in memory. More...
 
CUTLASS_HOST_DEVICE RegularTileAccessIterator operator++ (int)
 Advances to the next tile in memory. More...
 

Static Public Attributes

static int const kAdvanceRank = AdvanceRank
 
static int const kAlignment = Alignment
 

Detailed Description

template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, int Alignment>
class cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >

Tile Iterator specialized for column-major congruous TensorOp formats.

Satisfies: ForwardTileIteratorConcept | ReadableContiguousTileIteratorConcept | WriteableContiguousTileIteratorConcept

Member Typedef Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::AccessType = typename UnderlyingIterator::AccessType
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::Element = Element_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::Index = typename Layout::Index
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::Layout = layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits<Element_>::value, int(128 / sizeof(Element_))>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::LongIndex = typename Layout::LongIndex
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::Shape = Shape_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::TensorCoord = typename Layout::TensorCoord
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::TensorRef = TensorRef<Element, Layout>
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::ThreadMap = ThreadMap_
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
using cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::UnderlyingIterator = RegularTileAccessIterator< layout::PitchLinearShape<Shape::kRow, Shape::kColumn>, Element, layout::TensorOpMultiplicandCongruous<sizeof_bits<Element_>::value, int(128 / sizeof(Element_))>, (kAdvanceRank == 0 ? 0 : 1), ThreadMap_>

Constructor & Destructor Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::RegularTileAccessIterator ( TensorRef  ref,
int  thread_id 
)
inline
Parameters
refPointer to start of tensor
thread_idID of each participating thread

Member Function Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::add_pointer_offset ( LongIndex  pointer_offset)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_DEVICE void cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::add_tile_offset ( TensorCoord const &  coord)
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE AccessType* cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::get ( ) const
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE RegularTileAccessIterator& cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::operator++ ( )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE RegularTileAccessIterator cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::operator++ ( int  )
inline
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::set_iteration_index ( int  index)
inline

Member Data Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
int const cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::kAdvanceRank = AdvanceRank
static
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int Alignment>
int const cutlass::transform::threadblock::RegularTileAccessIterator< Shape_, Element_, layout::ColumnMajorTensorOpMultiplicandCongruous< sizeof_bits< Element_ >::value, int(128/sizeof(Element_))>, AdvanceRank, ThreadMap_, Alignment >::kAlignment = Alignment
static

The documentation for this class was generated from the following file: