CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Template for GEMM performing a reduction over K partitions in parallel. More...
#include "cutlass/cutlass.h"
#include "cutlass/numeric_types.h"
#include "cutlass/arch/arch.h"
#include "cutlass/device_kernel.h"
#include "cutlass/gemm/threadblock/threadblock_swizzle.h"
#include "cutlass/gemm/kernel/gemm.h"
#include "cutlass/gemm/kernel/default_gemm_splitk_parallel.h"
#include "cutlass/gemm/device/default_gemm_configuration.h"
#include "cutlass/epilogue/thread/conversion_op.h"
#include "cutlass/reduction/kernel/reduce_split_k.h"
#include "cutlass/reduction/thread/reduction_operators.h"
Go to the source code of this file.
Namespaces | |
cutlass | |
cutlass::gemm | |
cutlass::gemm::device | |