CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ > Class Template Reference

Partial specialization for column-major output.

#include <gemm_splitk_parallel.h>

Classes

struct  Arguments
 Argument structure. More...
 

Public Types

using ElementA = ElementA_
 
using LayoutA = LayoutA_
 
using ElementB = ElementB_
 
using LayoutB = LayoutB_
 
using ElementC = ElementC_
 
using LayoutC = layout::ColumnMajor
 
using ElementAccumulator = ElementAccumulator_
 
using OperatorClass = OperatorClass_
 
using ArchTag = ArchTag_
 
using ThreadblockShape = ThreadblockShape_
 
using WarpShape = WarpShape_
 
using InstructionShape = InstructionShape_
 
using ConvertScaledOp = ConvertScaledOp_
 
using EpilogueOutputOp = EpilogueOutputOp_
 
using ReductionOp = ReductionOp_
 
using ThreadblockSwizzle = ThreadblockSwizzle_
 
using Operator = Operator_
 
using UnderlyingOperator = GemmSplitKParallel< ElementB, typename layout::LayoutTranspose< LayoutB >::type, ElementA, typename layout::LayoutTranspose< LayoutA >::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ConvertScaledOp, ReductionOp, ThreadblockSwizzle, Stages, kAlignmentA, kAlignmentB, Operator >
 
using UnderlyingArguments = typename UnderlyingOperator::Arguments
 
using GemmKernel = typename UnderlyingOperator::GemmKernel
 
using ReductionKernel = typename UnderlyingOperator::ReductionKernel
 

Public Member Functions

 GemmSplitKParallel ()
 Constructs the GEMM. More...
 
Status initialize (Arguments const &args, void *workspace)
 Initializes GEMM state from arguments. More...
 
Status update (Arguments const &args, void *workspace=nullptr)
 Lightweight update given a subset of arguments. More...
 
Status run (cudaStream_t stream=nullptr)
 Runs the kernel using initialized state. More...
 
Status operator() (cudaStream_t stream=nullptr)
 Runs the kernel using initialized state. More...
 
Status operator() (Arguments const &args, void *workspace=nullptr, cudaStream_t stream=nullptr)
 Runs the kernel using initialized state. More...
 

Static Public Member Functions

static UnderlyingArguments to_underlying_arguments (Arguments const &args)
 Helper to construct a transposed equivalent for the underying GEMM operator. More...
 
static Status can_implement (Arguments const &args)
 Determines whether the GEMM can execute the given problem. More...
 
static size_t get_workspace_size (Arguments const &args)
 Gets the workspace size. More...
 

Static Public Attributes

static int const kStages = Stages
 

Member Typedef Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ArchTag = ArchTag_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ConvertScaledOp = ConvertScaledOp_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementA = ElementA_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementAccumulator = ElementAccumulator_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementB = ElementB_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ElementC = ElementC_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::EpilogueOutputOp = EpilogueOutputOp_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::GemmKernel = typename UnderlyingOperator::GemmKernel
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::InstructionShape = InstructionShape_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutA = LayoutA_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutB = LayoutB_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::LayoutC = layout::ColumnMajor
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::Operator = Operator_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::OperatorClass = OperatorClass_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ReductionKernel = typename UnderlyingOperator::ReductionKernel
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ReductionOp = ReductionOp_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ThreadblockShape = ThreadblockShape_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::ThreadblockSwizzle = ThreadblockSwizzle_
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::UnderlyingArguments = typename UnderlyingOperator::Arguments
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::UnderlyingOperator = GemmSplitKParallel< ElementB, typename layout::LayoutTranspose<LayoutB>::type, ElementA, typename layout::LayoutTranspose<LayoutA>::type, ElementC, layout::RowMajor, ElementAccumulator, OperatorClass, ArchTag, ThreadblockShape, WarpShape, InstructionShape, EpilogueOutputOp, ConvertScaledOp, ReductionOp, ThreadblockSwizzle, Stages, kAlignmentA, kAlignmentB, Operator >
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
using cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::WarpShape = WarpShape_

Constructor & Destructor Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::GemmSplitKParallel ( )
inline

Member Function Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::can_implement ( Arguments const &  args)
inlinestatic
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static size_t cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::get_workspace_size ( Arguments const &  args)
inlinestatic
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::initialize ( Arguments const &  args,
void *  workspace 
)
inline
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() ( cudaStream_t  stream = nullptr)
inline
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::operator() ( Arguments const &  args,
void *  workspace = nullptr,
cudaStream_t  stream = nullptr 
)
inline
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::run ( cudaStream_t  stream = nullptr)
inline
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
static UnderlyingArguments cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::to_underlying_arguments ( Arguments const &  args)
inlinestatic
template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
Status cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::update ( Arguments const &  args,
void *  workspace = nullptr 
)
inline

Member Data Documentation

template<typename ElementA_ , typename LayoutA_ , typename ElementB_ , typename LayoutB_ , typename ElementC_ , typename ElementAccumulator_ , typename OperatorClass_ , typename ArchTag_ , typename ThreadblockShape_ , typename WarpShape_ , typename InstructionShape_ , typename EpilogueOutputOp_ , typename ConvertScaledOp_ , typename ReductionOp_ , typename ThreadblockSwizzle_ , int Stages, int kAlignmentA, int kAlignmentB, typename Operator_ >
int const cutlass::gemm::device::GemmSplitKParallel< ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, layout::ColumnMajor, ElementAccumulator_, OperatorClass_, ArchTag_, ThreadblockShape_, WarpShape_, InstructionShape_, EpilogueOutputOp_, ConvertScaledOp_, ReductionOp_, ThreadblockSwizzle_, Stages, kAlignmentA, kAlignmentB, Operator_ >::kStages = Stages
static

The documentation for this class was generated from the following file: