CUTLASS: cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor > Struct Template Reference

CUTLASS

CUDA Templates for Linear Algebra Subroutines and Solvers

Partial specialization for row-major.

#include <volta_tensor_op_policy.h>

Public Types
using	WarpShape = WarpShape_

using	InterleavedTileShape = gemm::GemmShape< 32, 32, 4 >

using	ElementC = half_t

using	Layout = layout::RowMajor

using	InstructionShape = gemm::GemmShape< 16, 16, 4 >
	Shape of one warp-levelinstruction. More...

using	MmaIterations = MatrixShape< InterleavedTileShape::kM/InstructionShape::kM, InterleavedTileShape::kN/InstructionShape::kN >
	Number of mma operations performed for one 32x32x4 interleaved tile. More...

using	TileIterations = MatrixShape< WarpShape::kM/InterleavedTileShape::kM, WarpShape::kN/InterleavedTileShape::kN >
	Number of 32x32x4 interleaved tiles performed to cover the warp-level GEMM shape. More...

using	AccessType = AlignedArray< ElementC, kElementsPerAccess >
	Array type for aligned memory accesses. More...

using	Fragment = Array< ElementC, kElementsPerAccess kAccessesPerInterleavedTile TileIterations::kColumn >
	This is the fragment size produced by one access of the iterator. More...

using	AccumulatorTile = Array< ElementC, TileIterations::kCount MmaIterations::kCount kElementsPerMma >
	This is the complete warp-level accumulator tile. More...

Static Public Attributes
static int const	kElementsPerMma = 8
	Number of accumulator elements owned by each thread per Mma. More...

static int const	kRowsPerIteration = 16

static int const	kElementsPerAccess = 4
	Number of accumulator elements stored per memory instruction to shared memory. More...

static int const	kAccessesPerInterleavedTile = 4
	Number of accesses performed per interleaved tile. More...

static int const	kIterations = TileIterations::kRow * 2
	Total number of iterations needed to cover the entire tile. More...

Member Typedef Documentation

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::AccessType = AlignedArray<ElementC, kElementsPerAccess>

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::AccumulatorTile = Array< ElementC, TileIterations::kCount * MmaIterations::kCount * kElementsPerMma>

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::ElementC = half_t

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Fragment = Array< ElementC, kElementsPerAccess * kAccessesPerInterleavedTile * TileIterations::kColumn>

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::InstructionShape = gemm::GemmShape<16, 16, 4>

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::InterleavedTileShape = gemm::GemmShape<32, 32, 4>

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::Layout = layout::RowMajor

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::MmaIterations = MatrixShape< InterleavedTileShape::kM / InstructionShape::kM, InterleavedTileShape::kN / InstructionShape::kN >

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::TileIterations = MatrixShape< WarpShape::kM / InterleavedTileShape::kM, WarpShape::kN / InterleavedTileShape::kN >

template<typename WarpShape_ >

using cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::WarpShape = WarpShape_

Member Data Documentation

template<typename WarpShape_ >

int const cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::kAccessesPerInterleavedTile = 4

static

template<typename WarpShape_ >

int const cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::kElementsPerAccess = 4

static

template<typename WarpShape_ >

int const cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::kElementsPerMma = 8

static

template<typename WarpShape_ >

int const cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::kIterations = TileIterations::kRow * 2

static

template<typename WarpShape_ >

int const cutlass::epilogue::warp::VoltaTensorOpPolicy< WarpShape_, gemm::GemmShape< 32, 32, 4 >, half_t, layout::RowMajor >::kRowsPerIteration = 16

static

The documentation for this struct was generated from the following file:

volta_tensor_op_policy.h