CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap Struct Reference

Compacted thread map in which the 4D region is contiguous.

#include <output_tile_thread_map.h>

Public Types

using Shape = Shape_
 
using Iterations = OutputTileShape< Detail::RowArrangement::kIterationsColumn, Detail::RowArrangement::kIterationsRow, Detail::kIterationsGroup, Detail::kIterationsCluster, 1 >
 
using Delta = OutputTileShape< Detail::RowArrangement::kDeltaColumn, Detail::RowArrangement::kDeltaRow, Detail::kCompactedDeltaGroup, Detail::kCompactedDeltaCluster, 1 >
 

Static Public Member Functions

static CUTLASS_HOST_DEVICE MatrixCoord initial_offset (int thread_idx)
 Function to compute each thread's initial offset. More...
 

Static Public Attributes

static int const kElementsPerAccess = ElementsPerAccess
 Number of elements within each vector access. More...
 
static int const kThreads = Threads
 Number of threads. More...
 

Member Typedef Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Delta = OutputTileShape< Detail::RowArrangement::kDeltaColumn, Detail::RowArrangement::kDeltaRow, Detail::kCompactedDeltaGroup, Detail::kCompactedDeltaCluster, 1>
template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Iterations = OutputTileShape< Detail::RowArrangement::kIterationsColumn, Detail::RowArrangement::kIterationsRow, Detail::kIterationsGroup, Detail::kIterationsCluster, 1>
template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::Shape = Shape_

Member Function Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
static CUTLASS_HOST_DEVICE MatrixCoord cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::initial_offset ( int  thread_idx)
inlinestatic

Member Data Documentation

template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::kElementsPerAccess = ElementsPerAccess
static
template<typename Shape_ , typename Count_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::OutputTileOptimalThreadMap< Shape_, Count_, Threads, ElementsPerAccess, ElementSize >::CompactedThreadMap::kThreads = Threads
static

The documentation for this struct was generated from the following file: