CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize > Struct Template Reference

#include <output_tile_thread_map.h>

Classes

struct  Detail
 

Public Types

using WarpCount = WarpCount_
 
using MmaCount = MmaCount_
 
using Iterations = MmaCount
 
using Delta = layout::PitchLinearShape< kWarpSize *kElementsPerAccess, 1 >
 

Static Public Member Functions

static CUTLASS_HOST_DEVICE layout::PitchLinearCoord initial_offset (int thread_idx)
 Initial offset function. More...
 

Static Public Attributes

static int const kWarpSize = 32
 
static int const kThreads = Threads
 
static int const kWarpCount = kThreads / kWarpSize
 
static int const kElementsPerAccess = ElementsPerAccess
 
static int const kElementSize = ElementSize
 

Detailed Description

template<typename WarpCount_, typename MmaCount_, int Threads, int ElementsPerAccess, int ElementSize>
struct cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >

Template metaprogram for partitioning a 3D interleaved layout across warps to achieve several performance objectives:

Member Typedef Documentation

template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::Delta = layout::PitchLinearShape<kWarpSize * kElementsPerAccess, 1>
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::Iterations = MmaCount
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::MmaCount = MmaCount_
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
using cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::WarpCount = WarpCount_

Member Function Documentation

template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
static CUTLASS_HOST_DEVICE layout::PitchLinearCoord cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::initial_offset ( int  thread_idx)
inlinestatic

Member Data Documentation

template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kElementSize = ElementSize
static
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kElementsPerAccess = ElementsPerAccess
static
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kThreads = Threads
static
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kWarpCount = kThreads / kWarpSize
static
template<typename WarpCount_ , typename MmaCount_ , int Threads, int ElementsPerAccess, int ElementSize>
int const cutlass::epilogue::threadblock::InterleavedOutputTileThreadMap< WarpCount_, MmaCount_, Threads, ElementsPerAccess, ElementSize >::kWarpSize = 32
static

The documentation for this struct was generated from the following file: