#include <batched_reduction_traits.h>

Classes
struct	Params

Public Types
typedef BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >	This_

typedef cutlass::reduction::BatchedReduction< This_ >	KernelClass
	The struct that consumes this Traits. More...

typedef OutputTile_	OutputTile

typedef SubTile_	SubTile

typedef ThreadShape_	ThreadShape

typedef ScalarA_	ScalarA
	The input pointer type. More...

typedef ScalarC_	ScalarC

typedef ScalarD_	ScalarD
	The output pointer type. More...

typedef ScalarAlphaBeta_	ScalarAlphaBeta
	The alpha beta type. More...

typedef ScalarAccum_	ScalarAccum
	The type for accumulation. More...

typedef Index_	Index
	The index. More...

typedef BlockSwizzle_	BlockSwizzle
	The thread block swizzle. More...

typedef Functor_	Functor

Static Public Attributes
static const int	ReductionSize = ReductionSize_

static const bool	ThreadShapeMultiple2 = (ThreadShape::kW % 2 == 0)
	check if threadShape is multiple of 2. More...

static int const	kThreads = SubTile::kW / ThreadShape::kW

static int const	maxInReg = maxInReg_

static int const	maxOutReg = maxOutReg_

Member Typedef Documentation

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef BlockSwizzle_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::BlockSwizzle

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef Functor_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Functor

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef Index_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::Index

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef cutlass::reduction::BatchedReduction<This_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::KernelClass

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef OutputTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::OutputTile

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ScalarA_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarA

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ScalarAccum_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAccum

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ScalarAlphaBeta_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarAlphaBeta

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ScalarC_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarC

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ScalarD_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ScalarD

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef SubTile_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::SubTile

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef BatchedReductionTraits<ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_> cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::This_

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

typedef ThreadShape_ cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShape

Member Data Documentation

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::kThreads = SubTile::kW / ThreadShape::kW

static

Parameteres object constructable on the host The number of threads per thread block. can be deduced

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxInReg = maxInReg_

static

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

int const cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::maxOutReg = maxOutReg_

static

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

const int cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ReductionSize = ReductionSize_

static

template<typename ScalarA_ , typename ScalarC_ , typename ScalarD_ , typename ScalarAlphaBeta_ , typename ScalarAccum_ , int ReductionSize_ = 1, typename OutputTile_ = Shape<1, 1, 128>, typename SubTile_ = Shape<1, 1, 64>, typename ThreadShape_ = Shape<1, 1, 2>, typename Index_ = int, typename BlockSwizzle_ = DefaultBlockSwizzle, int maxInReg_ = 160, int maxOutReg_ = 64, typename Functor_ = typename cutlass::gemm::LinearScaling<ScalarAlphaBeta_, typename cutlass::gemm::FragmentMultiplyAdd<ScalarAlphaBeta_, ScalarAccum_, (ThreadShape_::kW % 2 == 0)> >>

const bool cutlass::reduction::BatchedReductionTraits< ScalarA_, ScalarC_, ScalarD_, ScalarAlphaBeta_, ScalarAccum_, ReductionSize_, OutputTile_, SubTile_, ThreadShape_, Index_, BlockSwizzle_, maxInReg_, maxOutReg_, Functor_ >::ThreadShapeMultiple2 = (ThreadShape::kW % 2 == 0)

static

The documentation for this struct was generated from the following file:

batched_reduction_traits.h

Classes

Public Types

Static Public Attributes

Member Typedef Documentation

Member Data Documentation