CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::thread::detail::Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false > Struct Template Reference

#include <mma_sm60.h>

Public Types

using FragmentA = Array< half_t, Shape::kMK >
 A operand storage. More...
 
using FragmentB = Array< half_t, Shape::kKN >
 B operand storage. More...
 
using FragmentC = Array< half_t, Shape::kMN >
 C operand storage. More...
 

Public Member Functions

CUTLASS_HOST_DEVICE void operator() (FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
 Computes a matrix product D = A * B + C. More...
 

Member Typedef Documentation

template<typename Shape , typename LayoutA , typename LayoutB >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false >::FragmentA = Array<half_t, Shape::kMK>
template<typename Shape , typename LayoutA , typename LayoutB >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false >::FragmentB = Array<half_t, Shape::kKN>
template<typename Shape , typename LayoutA , typename LayoutB >
using cutlass::gemm::thread::detail::Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false >::FragmentC = Array<half_t, Shape::kMN>

Member Function Documentation

template<typename Shape , typename LayoutA , typename LayoutB >
CUTLASS_HOST_DEVICE void cutlass::gemm::thread::detail::Mma_HFMA2< Shape, LayoutA, LayoutB, layout::RowMajor, false >::operator() ( FragmentC D,
FragmentA const &  A,
FragmentB const &  B,
FragmentC const &  C 
)
inline

Initialize output with input

Use 1x1x2 HFMA2 sequence for bulk of computation


The documentation for this struct was generated from the following file: