Partial specialization for column-major output.
#include <gemm_splitk_parallel.h>
|
using | ElementA = ElementA_ |
|
using | LayoutA = LayoutA_ |
|
using | ElementB = ElementB_ |
|
using | LayoutB = LayoutB_ |
|
using | ElementC = ElementC_ |
|
using | LayoutC = layout::ColumnMajor |
|
using | ElementAccumulator = ElementAccumulator_ |
|
using | OperatorClass = OperatorClass_ |
|
using | ArchTag = ArchTag_ |
|
using | ThreadblockShape = ThreadblockShape_ |
|
using | WarpShape = WarpShape_ |
|
using | InstructionShape = InstructionShape_ |
|
using | ConvertScaledOp = ConvertScaledOp_ |
|
using | EpilogueOutputOp = EpilogueOutputOp_ |
|
using | ReductionOp = ReductionOp_ |
|
using | ThreadblockSwizzle = ThreadblockSwizzle_ |
|
using | Operator = Operator_ |
|
using | UnderlyingOperator = GemmSplitKParallel< ElementB, typename layout::LayoutTranspose< LayoutB >::type, ElementA, typename layout::LayoutTranspose< LayoutA >::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ConvertScaledOp, ReductionOp, ThreadblockSwizzle, Stages, kAlignmentA, kAlignmentB, Operator > |
|
using | UnderlyingArguments = typename UnderlyingOperator::Arguments |
|
using | GemmKernel = typename UnderlyingOperator::GemmKernel |
|
using | ReductionKernel = typename UnderlyingOperator::ReductionKernel |
|
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ArchTag = ArchTag_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ConvertScaledOp = ConvertScaledOp_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementA = ElementA_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementAccumulator = ElementAccumulator_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementB = ElementB_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementC = ElementC_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::EpilogueOutputOp = EpilogueOutputOp_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::GemmKernel = typename UnderlyingOperator::GemmKernel |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::InstructionShape = InstructionShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutA = LayoutA_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutB = LayoutB_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutC = layout::ColumnMajor |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::Operator = Operator_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::OperatorClass = OperatorClass_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ReductionKernel = typename UnderlyingOperator::ReductionKernel |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ReductionOp = ReductionOp_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ThreadblockShape = ThreadblockShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ThreadblockSwizzle = ThreadblockSwizzle_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::UnderlyingArguments = typename UnderlyingOperator::Arguments |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::UnderlyingOperator = GemmSplitKParallel< ElementB, typename layout::LayoutTranspose<LayoutB>::type, ElementA, typename layout::LayoutTranspose<LayoutA>::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ConvertScaledOp, ReductionOp, ThreadblockSwizzle, Stages, kAlignmentA, kAlignmentB, Operator > |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::WarpShape = WarpShape_ |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::GemmSplitKParallel |
( |
| ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::can_implement |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static size_t cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::get_workspace_size |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::initialize |
( |
Arguments const & |
args, |
|
|
void * |
workspace |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() |
( |
cudaStream_t |
stream = nullptr | ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() |
( |
Arguments const & |
args, |
|
|
void * |
workspace = nullptr , |
|
|
cudaStream_t |
stream = nullptr |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::run |
( |
cudaStream_t |
stream = nullptr | ) |
|
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static UnderlyingArguments cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::to_underlying_arguments |
( |
Arguments const & |
args | ) |
|
|
inlinestatic |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::update |
( |
Arguments const & |
args, |
|
|
void * |
workspace = nullptr |
|
) |
| |
|
inline |
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
int const cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::kStages = Stages |
|
static |
The documentation for this class was generated from the following file: