CUTLASS: cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

#include <pitch_linear_thread_map.h>

Classes
struct	Detail
	Internal implementation details. More...

Public Types
using	TensorCoord = layout::PitchLinearCoord
	Tensor coordinate. More...

using	Shape = Shape_
	Tile shape. More...

using	ThreadAccessShape = layout::PitchLinearShape< kElementsPerAccess, 1 >
	Shape of access by each thread. More...

using	Iterations = typename platform::conditional< Threads >=Detail::ShapeVec::kContiguous, layout::PitchLinearShape< 1,(Threads >=Detail::ShapeVec::kContiguous?Detail::ShapeVec::kStrided/(kThreads/Detail::ShapeVec::kContiguous):0) >, layout::PitchLinearShape< Detail::ShapeVec::kContiguous/kThreads, Detail::ShapeVec::kStrided > >::type
	Number of iterations by each thread. More...

using	Delta = typename platform::conditional< Threads >=Detail::ShapeVec::kContiguous, layout::PitchLinearShape< 1, kThreads/Detail::ShapeVec::kContiguous >, layout::PitchLinearShape< kThreads *kElementsPerAccess, 1 > >::type

Static Public Member Functions
static CUTLASS_HOST_DEVICE TensorCoord	initial_offset (int thread_id)

Static Public Attributes
static int const	kThreads = Threads
	Number of threads total. More...

static int const	kElementsPerAccess = ElementsPerAccess
	Extract vector length from Layout. More...

Detailed Description

template<typename Shape_, int Threads, int ElementsPerAccess = 1>
struct cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >

Strip-mines a pitch-linear tile among a given number of threads, first along the contiguous dimension then along the strided dimension.

The tile must be divisible by the thread count such that all threads may execute the same number of iterations with the same delta to exhaustively cover the tile.

This class satisfies the "RegularThreadMapping" concept.

Member Typedef Documentation

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

using cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::Delta = typename platform::conditional< Threads >= Detail::ShapeVec::kContiguous, layout::PitchLinearShape< 1, kThreads / Detail::ShapeVec::kContiguous >, layout::PitchLinearShape< kThreads * kElementsPerAccess, 1 > >::type

Interval between accesses along each dimension of the tensor's logical coordinate space (in units of Elements)

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

using cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::Iterations = typename platform::conditional< Threads >= Detail::ShapeVec::kContiguous, layout::PitchLinearShape< 1, (Threads >= Detail::ShapeVec::kContiguous ? Detail::ShapeVec::kStrided / (kThreads / Detail::ShapeVec::kContiguous) : 0) >, layout::PitchLinearShape< Detail::ShapeVec::kContiguous / kThreads, Detail::ShapeVec::kStrided > >::type

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

using cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::Shape = Shape_

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

using cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::TensorCoord = layout::PitchLinearCoord

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

using cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::ThreadAccessShape = layout::PitchLinearShape<kElementsPerAccess, 1>

Member Function Documentation

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

static CUTLASS_HOST_DEVICE TensorCoord cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::initial_offset ( int thread_id )

inlinestatic

Maps thread ID to a coordinate offset within the tensor's logical coordinate space (in units of Elements)

Member Data Documentation

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

int const cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::kElementsPerAccess = ElementsPerAccess

static

template<typename Shape_ , int Threads, int ElementsPerAccess = 1>

int const cutlass::transform::PitchLinearStripminedThreadMap< Shape_, Threads, ElementsPerAccess >::kThreads = Threads

static

The documentation for this struct was generated from the following file:

pitch_linear_thread_map.h