CUTLASS: cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ > Class Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

#include <predicated_tile_access_iterator_2dthreadtile.h>

Classes
class	Params
	Parameters object is precomputed state and is host-constructible. More...

Public Types
using	Shape = Shape_

using	Element = Element_

using	Layout = layout::PitchLinear

using	ThreadMap = ThreadMap_

using	AccessType = AccessType_

using	Index = typename Layout::Index

using	LongIndex = typename Layout::LongIndex

using	TensorRef = TensorRef< Element, Layout >

using	TensorView = TensorView< Element, Layout >

using	TensorCoord = typename Layout::TensorCoord

using	Pointer = Element *

using	NonConstPointer = typename platform::remove_const< Element >::type *

using	Mask = Array< uint32_t, kPredicateWordCount >
	Predicate vector stores mask to guard accesses. More...

Public Member Functions
CUTLASS_HOST_DEVICE	PredicatedTileAccessIterator2dThreadTile (Params const &params, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset)

CUTLASS_HOST_DEVICE	PredicatedTileAccessIterator2dThreadTile (Params const &params, Pointer pointer, TensorCoord extent, int thread_id)
	Construct a PredicatedTileAccessIterator2dThreadTile with zero threadblock offset. More...

CUTLASS_HOST_DEVICE void	set_iteration_index (int index)
	Overrides the internal iteration index. More...

CUTLASS_HOST_DEVICE void	add_pointer_offset (LongIndex pointer_offset)
	Adds a pointer offset in units of Element. More...

CUTLASS_DEVICE void	add_tile_offset (TensorCoord const &tile_offset)
	Advances an iterator along logical dimensions of matrix in units of whole tiles. More...

CUTLASS_HOST_DEVICE AccessType *	get () const

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile &	operator++ ()
	Increment and return an instance to self. More...

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile	operator++ (int)
	Increment and return an instance to self. More...

CUTLASS_HOST_DEVICE void	clear_mask ()
	Clears the predicate set efficiently. More...

CUTLASS_HOST_DEVICE void	enable_mask ()
	Clears the predicate set efficiently. More...

CUTLASS_HOST_DEVICE void	set_mask (Mask const &mask)
	Sets the predicate mask, overriding value stored in predicate iterator. More...

CUTLASS_HOST_DEVICE void	get_mask (Mask &mask)
	Gets the mask. More...

CUTLASS_HOST_DEVICE bool	valid ()
	Returns whether access is valid or not. More...

Static Public Attributes
static int const	kAdvanceRank = AdvanceRank

static int const	kPredicatesPerByte = 4

static int const	kPredicatesPerWord = 4 * kPredicatesPerByte

static int const	kPredicateByteCount = (ThreadMap::Iterations::kCount * ThreadMap::ThreadAccessShape::kStrided + kPredicatesPerByte - 1) / kPredicatesPerByte
	Number of 32b words containing predicates. More...

static int const	kPredicateWordCount = (kPredicateByteCount + 3) / 4

static unsigned const	kPredicateMask = (1u << kPredicatesPerByte) - 1u

Detailed Description

template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, typename AccessType_>
class cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >

Specialization of PredicatedTileAccessIterator2dThreadTile for pitch-linear data.

Member Typedef Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::AccessType = AccessType_

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Element = Element_

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Index = typename Layout::Index

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Layout = layout::PitchLinear

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::LongIndex = typename Layout::LongIndex

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Mask = Array<uint32_t, kPredicateWordCount>

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::NonConstPointer = typename platform::remove_const<Element>::type *

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Pointer = Element *

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::Shape = Shape_

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorCoord = typename Layout::TensorCoord

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorRef = TensorRef<Element, Layout>

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::TensorView = TensorView<Element, Layout>

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

using cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::ThreadMap = ThreadMap_

Constructor & Destructor Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator2dThreadTile	(	Params const &	params,
		Pointer	pointer,
		TensorCoord	extent,
		int	thread_id,
		TensorCoord const &	threadblock_offset
	)

inline

Constructs a TileIterator from its precomputed state, threadblock offset, and thread ID

Parameters

params	Precomputed parameters object
pointer	Pointer to start of tensor
extent	Extent of tensor
thread_id	ID of each participating thread
threadblock_offset	Initial offset of threadblock

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::PredicatedTileAccessIterator2dThreadTile	(	Params const &	params,
		Pointer	pointer,
		TensorCoord	extent,
		int	thread_id
	)

inline

Parameters

params	Precomputed parameters object
pointer	Pointer to start of tensor
extent	Extent of tensor ID of each participating thread

Member Function Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::add_pointer_offset ( LongIndex pointer_offset )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::add_tile_offset ( TensorCoord const & tile_offset )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::clear_mask ( )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::enable_mask ( )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE AccessType* cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::get ( ) const

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::get_mask ( Mask & mask )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile& cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE PredicatedTileAccessIterator2dThreadTile cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::operator++ ( int )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::set_iteration_index ( int index )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE void cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::set_mask ( Mask const & mask )

inline

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

CUTLASS_HOST_DEVICE bool cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::valid ( )

inline

Member Data Documentation

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

int const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kAdvanceRank = AdvanceRank

static

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

int const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateByteCount = (ThreadMap::Iterations::kCount * ThreadMap::ThreadAccessShape::kStrided + kPredicatesPerByte - 1) / kPredicatesPerByte

static

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

unsigned const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateMask = (1u << kPredicatesPerByte) - 1u

static

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

int const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicatesPerByte = 4

static

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

int const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicatesPerWord = 4 * kPredicatesPerByte

static

template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , typename AccessType_ >

int const cutlass::transform::threadblock::PredicatedTileAccessIterator2dThreadTile< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessType_ >::kPredicateWordCount = (kPredicateByteCount + 3) / 4

static

The documentation for this class was generated from the following file:

predicated_tile_access_iterator_2dthreadtile.h