cutlass/gemm_2thread_2mma__sm50_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/cutlass.h"
 #include "cutlass/tensor_ref.h"
 #include "cutlass/layout/matrix.h"
 #include "cutlass/arch/mma.h"
 #include "cutlass/gemm/gemm.h"
 #include "cutlass/gemm/thread/mma.h"


 namespace cutlass {
 namespace gemm {
 namespace thread {


 template <
   typename Shape_,
   typename ElementA_,
   typename LayoutA_,
   typename ElementB_,
   typename LayoutB_,
   typename ElementC_,
   typename LayoutC_,
   typename Operator_
 >
 struct MmaGeneric {

   using Shape = Shape_;

   using ElementA = ElementA_;

   using LayoutA = LayoutA_;

   using ElementB = ElementB_;

   using LayoutB = LayoutB_;

   using ElementC = ElementC_;

   using LayoutC = LayoutC_;

   using Operator = Operator_;

   using FragmentA = Array<ElementA, Shape::kMK>;

   using FragmentB = Array<ElementB, Shape::kKN>;

   using FragmentC = Array<ElementC, Shape::kMN>;

   using MmaOp = arch::Mma<
     gemm::GemmShape<1,1,1>,
     1,
     ElementA, LayoutA,
     ElementB, LayoutB,
     ElementC, LayoutC,
     Operator>;

   //
   // Methods
   //

   CUTLASS_HOST_DEVICE
   void operator()(
     FragmentC & D,
     FragmentA const & A,
     FragmentB const & B,
     FragmentC const & C) {

     TensorRef<ElementA const, LayoutA> a_ref(
       reinterpret_cast<ElementA const *>(&A), LayoutA::packed({Shape::kM, Shape::kK}));

     TensorRef<ElementB const, LayoutB> b_ref(
       reinterpret_cast<ElementB const *>(&B), LayoutB::packed({Shape::kK, Shape::kN}));

     TensorRef<ElementC, LayoutC> d_ref(
       reinterpret_cast<ElementC *>(&D), LayoutC::packed({ Shape::kM, Shape::kN }));

     MmaOp mma_op;

     // Copy accumulators
     D = C;

     // Compute matrix product
     CUTLASS_PRAGMA_UNROLL
     for (int k = 0; k < Shape::kK; ++k) {

       CUTLASS_PRAGMA_UNROLL
       for (int n = 0; n < Shape::kN; ++n) {

         CUTLASS_PRAGMA_UNROLL
         for (int m = 0; m < Shape::kM; ++m) {

           int m_serpentine = (n % 2) ? (Shape::kM - 1 - m) : m;

           MatrixCoord mn(m_serpentine, n);
           MatrixCoord mk(m_serpentine, k);
           MatrixCoord kn(k, n);

           Array<ElementC, 1> d;
           Array<ElementA, 1> a;
           Array<ElementB, 1> b;

           d[0] = d_ref.at(mn);
           a[0] = a_ref.at(mk);
           b[0] = b_ref.at(kn);

           mma_op(d, a, b, d);

           d_ref.at(mn) = d[0];
         }
       }
     }
   }
 };


 template <
   typename Shape_,
   typename ElementA_,
   typename LayoutA_,
   typename ElementB_,
   typename LayoutB_,
   typename ElementC_,
   typename LayoutC_
 >
 struct Mma<
   Shape_,
   ElementA_,
   LayoutA_,
   ElementB_,
   LayoutB_,
   ElementC_,
   LayoutC_,
   arch::OpMultiplyAdd,
   bool> {

   using Shape = Shape_;

   using ElementA = ElementA_;

   using LayoutA = LayoutA_;

   using ElementB = ElementB_;

   using LayoutB = LayoutB_;

   using ElementC = ElementC_;

   using LayoutC = LayoutC_;

   using Operator = arch::OpMultiplyAdd;

   using FragmentA = Array<ElementA, Shape::kMK>;

   using FragmentB = Array<ElementB, Shape::kKN>;

   using FragmentC = Array<ElementC, Shape::kMN>;

   //
   // Methods
   //

   CUTLASS_HOST_DEVICE
   void operator()(
     FragmentC & D,
     FragmentA const & A,
     FragmentB const & B,
     FragmentC const & C) {

     MmaGeneric<
       Shape,
       ElementA,
       LayoutA,
       ElementB,
       LayoutB,
       ElementC,
       LayoutC,
       Operator> mma;

     mma(D, A, B, C);
   }
 };


 } // namespace thread
 } // namespace gemm
 } // namespace cutlass

cutlass::gemm::thread::MmaGeneric::Operator
Operator_ Operator
Underlying mathematical operator.
Definition: gemm/thread/mma_sm50.h:89

cutlass
Definition: aligned_buffer.h:35

cutlass::gemm::thread::MmaGeneric::FragmentB
Array< ElementB, Shape::kKN > FragmentB
B operand storage.
Definition: gemm/thread/mma_sm50.h:95

tensor_ref.h
Defines a structure containing strides, bounds, and a pointer to tensor data.

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::ElementA
ElementA_ ElementA
Data type of operand A.
Definition: gemm/thread/mma_sm50.h:203

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::ElementB
ElementB_ ElementB
Data type of operand B.
Definition: gemm/thread/mma_sm50.h:209

cutlass::gemm::thread::MmaGeneric::FragmentC
Array< ElementC, Shape::kMN > FragmentC
C operand storage.
Definition: gemm/thread/mma_sm50.h:98

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::operator()
CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
Computes a matrix product D = A * B + C.
Definition: gemm/thread/mma_sm50.h:238

cutlass::gemm::thread::MmaGeneric::LayoutA
LayoutA_ LayoutA
Layout of A matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:74

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentC
Array< ElementC, Shape::kMN > FragmentC
C operand storage.
Definition: gemm/thread/mma_sm50.h:230

cutlass::gemm::thread::MmaGeneric::LayoutC
LayoutC_ LayoutC
Layout of C matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:86

gemm.h
Defines common types used for all GEMM-like operators.

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::ElementC
ElementC_ ElementC
Element type of operand C.
Definition: gemm/thread/mma_sm50.h:215

cutlass::gemm::thread::MmaGeneric::ElementA
ElementA_ ElementA
Data type of operand A.
Definition: gemm/thread/mma_sm50.h:71

cutlass::gemm::thread::MmaGeneric::LayoutB
LayoutB_ LayoutB
Layout of B matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:80

CUTLASS_PRAGMA_UNROLL
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110

mma.h
Templates exposing architecture support for multiply-add operations.

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutB
LayoutB_ LayoutB
Layout of B matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:212

cutlass::gemm::thread::MmaGeneric
Gemplate that handles all packed matrix layouts.
Definition: gemm/thread/mma_sm50.h:65

cutlass::TensorRef< ElementA const, LayoutA >

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentA
Array< ElementA, Shape::kMK > FragmentA
A operand storage.
Definition: gemm/thread/mma_sm50.h:224

cutlass::gemm::thread::MmaGeneric::FragmentA
Array< ElementA, Shape::kMK > FragmentA
A operand storage.
Definition: gemm/thread/mma_sm50.h:92

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::Operator
arch::OpMultiplyAdd Operator
Underlying mathematical operator.
Definition: gemm/thread/mma_sm50.h:221

CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89

cutlass::gemm::thread::MmaGeneric::Shape
Shape_ Shape
Size of the Gemm problem - concept: gemm::GemmShape<>
Definition: gemm/thread/mma_sm50.h:68

mma.h
Templates exposing architecture support for warp-level multiply-add operations.

cutlass::gemm::GemmShape
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::FragmentB
Array< ElementB, Shape::kKN > FragmentB
B operand storage.
Definition: gemm/thread/mma_sm50.h:227

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::Shape
Shape_ Shape
Size of the Gemm problem - concept: gemm::GemmShape<>
Definition: gemm/thread/mma_sm50.h:200

cutlass::gemm::thread::MmaGeneric::operator()
CUTLASS_HOST_DEVICE void operator()(FragmentC &D, FragmentA const &A, FragmentB const &B, FragmentC const &C)
Computes a matrix product D = A * B + C.
Definition: gemm/thread/mma_sm50.h:115

cutlass::TensorRef::at
CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const
Returns a reference to the element at a given Coord.
Definition: tensor_ref.h:307

cutlass::gemm::thread::Mma
Structure to compute the matrix product.
Definition: gemm/thread/mma.h:66

matrix.h
Defines layout functions used by TensorRef and derived classes.

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutA
LayoutA_ LayoutA
Layout of A matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:206

cutlass::arch::Mma
Matrix multiply-add operation.
Definition: arch/mma.h:92

cutlass::gemm::thread::Mma< Shape_, ElementA_, LayoutA_, ElementB_, LayoutB_, ElementC_, LayoutC_, arch::OpMultiplyAdd, bool >::LayoutC
LayoutC_ LayoutC
Layout of C matrix (concept: layout::MapFunc)
Definition: gemm/thread/mma_sm50.h:218

cutlass.h
Basic include for CUTLASS.

cutlass::MatrixCoord
Definition: matrix_coord.h:39

cutlass::gemm::thread::MmaGeneric::ElementB
ElementB_ ElementB
Data type of operand B.
Definition: gemm/thread/mma_sm50.h:77

cutlass::gemm::thread::MmaGeneric::ElementC
ElementC_ ElementC
Element type of operand C.
Definition: gemm/thread/mma_sm50.h:83

cutlass::arch::Mma< gemm::GemmShape< 1, 1, 1 >, 1, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, Operator >
Matrix multiply-add operation - specialized for 1x1x1x1 matrix multiply operation.
Definition: arch/mma.h:113