CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <pitch_linear_thread_map.h>
Public Types | |
using | TensorCoord = layout::PitchLinearCoord |
using | Iterations = layout::PitchLinearShape< Shape::kContiguous/(kThreads *kElementsPerAccess), Shape::kStrided > |
using | Delta = layout::PitchLinearShape< 1, 1 > |
Static Public Member Functions | |
static CUTLASS_HOST_DEVICE TensorCoord | initial_offset (int thread_id) |
Static Public Attributes | |
static int const | kThreads = Threads |
static int const | kElementsPerAccess = ElementsPerAccess |
using cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< Shape, Threads, ElementsPerAccess >::Delta = layout::PitchLinearShape<1, 1> |
using cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< Shape, Threads, ElementsPerAccess >::Iterations = layout::PitchLinearShape< Shape::kContiguous / (kThreads * kElementsPerAccess), Shape::kStrided> |
using cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< Shape, Threads, ElementsPerAccess >::TensorCoord = layout::PitchLinearCoord |
|
inlinestatic |
|
static |
|
static |