CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::thread::detail::Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true > Struct Template Reference

#include <mma_sm60.h>

Public Types

using FragmentA = Array< half_t, Shape::kMK >
 A operand storage. More...
 
using FragmentB = Array< half_t, Shape::kKN >
 B operand storage. More...
 
using FragmentC = Array< half_t, Shape::kMN >
 C operand storage. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE void operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
 Computes a matrix product D = A * B + C. More...
 

Member Typedef Documentation

template<typename Shape >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true >::FragmentA = Array<half_t, Shape::kMK>
template<typename Shape >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true >::FragmentB = Array<half_t, Shape::kKN>
template<typename Shape >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true >::FragmentC = Array<half_t, Shape::kMN>

Member Function Documentation

template<typename Shape >
CUTLASS_HOST_DEVICE void cutlass::gemm::thread::detail::Mma_HFMA2< Shape, layout::RowMajor, layout::RowMajor, layout::ColumnMajor, true >::operator() ( FragmentC D,
FragmentA const &  A,
FragmentB const &  B,
FragmentC const &  C 
)
inline

Initialize output with input

Use 1x2x1 HFMA2 sequence for bulk of computation


The documentation for this struct was generated from the following file: