Partial specialization for column-major output exchanges problem size and operand.

#include <gemm_batched.h>

Classes
struct	Arguments
	Argument structure. More...

Public Types
using	ElementA = ElementA_

using	LayoutA = LayoutA_

using	TensorRefA = TensorRef< ElementA const, LayoutA >

using	ElementB = ElementB_

using	LayoutB = LayoutB_

using	TensorRefB = TensorRef< ElementB const, LayoutB >

using	ElementC = ElementC_

using	LayoutC = layout::ColumnMajor

using	TensorRefC = TensorRef< ElementC const, LayoutC >

using	TensorRefD = TensorRef< ElementC, LayoutC >

using	ElementAccumulator = ElementAccumulator_

using	OperatorClass = OperatorClass_

using	ArchTag = ArchTag_

using	ThreadblockShape = ThreadblockShape_

using	WarpShape = WarpShape_

using	InstructionShape = InstructionShape_

using	EpilogueOutputOp = EpilogueOutputOp_

using	ThreadblockSwizzle = ThreadblockSwizzle_

using	UnderlyingOperator = GemmBatched< ElementB, typename layout::LayoutTranspose< LayoutB >::type, ElementA, typename layout::LayoutTranspose< LayoutA >::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, kAlignmentB, kAlignmentA >

using	UnderlyingArguments = typename UnderlyingOperator::Arguments

using	GemmKernel = typename UnderlyingOperator::GemmKernel

Public Member Functions
	GemmBatched ()
	Constructs the GEMM. More...

Status	initialize (Arguments const &args, void *workspace=nullptr, cudaStream_t stream=nullptr)
	Initializes GEMM state from arguments. More...

Status	update (Arguments const &args, void *workspace=nullptr)
	Lightweight update given a subset of arguments. More...

Status	run (cudaStream_t stream=nullptr)
	Runs the kernel using initialized state. More...

Status	operator() (cudaStream_t stream=nullptr)
	Runs the kernel using initialized state. More...

Status	operator() (Arguments const &args, void *workspace=nullptr, cudaStream_t stream=nullptr)
	Runs the kernel using initialized state. More...

Static Public Member Functions
static UnderlyingArguments	to_underlying_arguments (Arguments const &args)
	Helper to construct a transposed equivalent for the underying GEMM operator. More...

static Status	can_implement (Arguments const &args)
	Determines whether the GEMM can execute the given problem. More...

static size_t	get_workspace_size (Arguments const &args)
	Gets the workspace size. More...

Static Public Attributes
static int const	kStages = Stages

static int const	kAlignmentA = AlignmentA

static int const	kAlignmentB = AlignmentB

static int const	kAlignmentC = EpilogueOutputOp::kCount

static bool const	kSplitKSerial = false

Member Typedef Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ArchTag = ArchTag_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementA = ElementA_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementAccumulator = ElementAccumulator_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementB = ElementB_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ElementC = ElementC_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::EpilogueOutputOp = EpilogueOutputOp_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::GemmKernel = typename UnderlyingOperator::GemmKernel

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::InstructionShape = InstructionShape_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutA = LayoutA_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutB = LayoutB_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::LayoutC = layout::ColumnMajor

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::OperatorClass = OperatorClass_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefA = TensorRef<ElementA const, LayoutA>

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefB = TensorRef<ElementB const, LayoutB>

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefC = TensorRef<ElementC const, LayoutC>

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::TensorRefD = TensorRef<ElementC, LayoutC>

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ThreadblockShape = ThreadblockShape_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::ThreadblockSwizzle = ThreadblockSwizzle_

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::UnderlyingArguments = typename UnderlyingOperator::Arguments

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::UnderlyingOperator = GemmBatched< ElementB, typename layout::LayoutTranspose<LayoutB>::type, ElementA, typename layout::LayoutTranspose<LayoutA>::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ThreadblockSwizzle, Stages, kAlignmentB, kAlignmentA >

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

using cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::WarpShape = WarpShape_

Constructor & Destructor Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::GemmBatched ( )

inline

Member Function Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

static Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::can_implement ( Arguments const & args )

inlinestatic

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

static size_t cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::get_workspace_size ( Arguments const & args )

inlinestatic

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::initialize	(	Arguments const &	args,
		void *	workspace = `nullptr`,
		cudaStream_t	stream = `nullptr`
	)

inline

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::operator() ( cudaStream_t stream = nullptr )

inline

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::operator()	(	Arguments const &	args,
		void *	workspace = `nullptr`,
		cudaStream_t	stream = `nullptr`
	)

inline

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::run ( cudaStream_t stream = nullptr )

inline

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

static UnderlyingArguments cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::to_underlying_arguments ( Arguments const & args )

inlinestatic

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

Status cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::update	(	Arguments const &	args,
		void *	workspace = `nullptr`
	)

inline

Member Data Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentA = AlignmentA

static

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentB = AlignmentB

static

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kAlignmentC = EpilogueOutputOp::kCount

static

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

bool const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kSplitKSerial = false

static

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ThreadblockSwizzle_ , int Stages, int AlignmentA, int AlignmentB, typename Operator_ >

int const cutlass::gemm::device::GemmBatched< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ThreadblockSwizzle_, Stages, AlignmentA, AlignmentB, Operator_ >::kStages = Stages

static

The documentation for this class was generated from the following file:

device/gemm_batched.h

Classes

Public Types

Public Member Functions

Static Public Member Functions

Static Public Attributes

Member Typedef Documentation

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation