CUTLASS: cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap Struct Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Compacted thread map in which the 4D region is contiguous.

#include <output_tile_thread_map.h>

Public Types
using	Shape = Shape_

using	Iterations = OutputTileShape< Detail::RowArrangement::kIterationsColumn, Detail::RowArrangement::kIterationsRow, Detail::kIterationsGroup, Detail::kIterationsCluster, 1 >

using	Delta = OutputTileShape< Detail::RowArrangement::kDeltaColumn, Detail::RowArrangement::kDeltaRow, Detail::kCompactedDeltaGroup, Detail::kCompactedDeltaCluster, 1 >

Static Public Member Functions
static CUTLASS_HOST_DEVICE MatrixCoord	initial_offset (int thread_idx)
	Function to compute each thread's initial offset. More...

Static Public Attributes
static int const	kElementsPerAccess = ElementsPerAccess
	Number of elements within each vector access. More...

static int const	kThreads = Threads
	Number of threads. More...

Member Typedef Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Delta = OutputTileShape< Detail::RowArrangement::kDeltaColumn, Detail::RowArrangement::kDeltaRow, Detail::kCompactedDeltaGroup, Detail::kCompactedDeltaCluster, 1>

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Iterations = OutputTileShape< Detail::RowArrangement::kIterationsColumn, Detail::RowArrangement::kIterationsRow, Detail::kIterationsGroup, Detail::kIterationsCluster, 1>

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Shape = Shape_

Member Function Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

static CUTLASS_HOST_DEVICE MatrixCoord cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::initial_offset ( int thread_idx )

inlinestatic

Member Data Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

int const cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::kElementsPerAccess = ElementsPerAccess

static

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>

int const cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::kThreads = Threads

static

The documentation for this struct was generated from the following file:

output_tile_thread_map.h