cutlass/mma__simt_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/cutlass.h"
 #include "cutlass/array.h"
 #include "cutlass/numeric_types.h"
 #include "cutlass/matrix_shape.h"
 #include "cutlass/gemm/gemm.h"
 #include "cutlass/gemm/warp/mma.h"

 #include "cutlass/gemm/thread/mma.h"

 #include "cutlass/gemm/warp/mma_simt_tile_iterator.h"
 #include "cutlass/gemm/warp/mma_simt_policy.h"


 namespace cutlass {
 namespace gemm {
 namespace warp {


 template <
   typename Shape_,
   typename ElementA_,
   typename LayoutA_,
   typename ElementB_,
   typename LayoutB_,
   typename ElementC_,
   typename LayoutC_,
   typename Policy_,
   int PartitionsK = 1,
   typename Enable = bool
 >
 class MmaSimt {
 public:
   using Shape = Shape_;

   using ElementA = ElementA_;

   using LayoutA = LayoutA_;

   using ElementB = ElementB_;

   using LayoutB = LayoutB_;

   using ElementC = ElementC_;

   using LayoutC = LayoutC_;

   using Policy = Policy_;

   using OperatorClass = arch::OpClassSimt;

   using ThreadLayoutA = typename platform::conditional< platform::is_same< layout::ColumnMajorInterleaved<4>, LayoutA >::value,
                   layout::ColumnMajor,
                   typename platform::conditional < platform::is_same< layout::RowMajorInterleaved<4>, LayoutA >::value,
                       layout::RowMajor,
                       LayoutA>::type
                  >::type;

   using ThreadLayoutB = typename platform::conditional< platform::is_same< layout::ColumnMajorInterleaved<4>, LayoutB >::value,
                   layout::ColumnMajor,
                   typename platform::conditional < platform::is_same< layout::RowMajorInterleaved<4>, LayoutB >::value,
                       layout::RowMajor,
                       LayoutB>::type
                  >::type;

   static constexpr bool use_dp4a = (platform::is_same< layout::ColumnMajorInterleaved<4>, LayoutA>::value ||
                                     platform::is_same< layout::RowMajorInterleaved<4>, LayoutA >::value) &&
                                     platform::is_same< ElementA, int8_t >::value &&
                                     platform::is_same< ElementB, int8_t >::value;

   using dp4a_type = typename platform::conditional< use_dp4a , int8_t, bool >::type;

   using ThreadMma = thread::Mma<
     GemmShape<
       Shape::kM / Policy::WarpShape::kRow,
       Shape::kN / Policy::WarpShape::kColumn,
       Policy::LaneMmaShape::kK>,
     ElementA,
     ThreadLayoutA,
     ElementB,
     ThreadLayoutB,
     ElementC,
     LayoutC,
     arch::OpMultiplyAdd,
     dp4a_type
   >;

 public:

   using IteratorA = MmaSimtTileIterator<
     MatrixShape<Shape::kM, Policy::LaneMmaShape::kK>,
     Operand::kA,
     ElementA,
     LayoutA,
     Policy,
     PartitionsK,
     Shape::kK
   >;

   using FragmentA = typename IteratorA::Fragment;

   using IteratorB = MmaSimtTileIterator<
     MatrixShape<Policy::LaneMmaShape::kK, Shape::kN>,
     Operand::kB,
     ElementB,
     LayoutB,
     Policy,
     PartitionsK,
     Shape::kK
   >;

   using FragmentB = typename IteratorB::Fragment;

   using IteratorC = MmaSimtTileIterator<
     MatrixShape<Shape::kM, Shape::kN>,
     Operand::kC,
     ElementC,
     LayoutC,
     Policy
   >;

   using FragmentC = typename ThreadMma::FragmentC;

 public:

   //
   // Methods
   //

   CUTLASS_DEVICE
   MmaSimt() {}

   CUTLASS_DEVICE
   void operator()(
     FragmentC &d,
     FragmentA const &a,
     FragmentB const &b,
     FragmentC const &c, int group_idx = 0) const {

     ThreadMma mma;

     mma(d, a, b, c);
   }
 };


 } // namespace warp
 } // namespace gemm
 } // namespace cutlass
mma_simt_policy.h
Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions...

cutlass::MatrixShape
Describes the size of a matrix tile.
Definition: matrix_shape.h:42

cutlass::gemm::warp::MmaSimt::ElementC
ElementC_ ElementC
Data type of accumulator matrix C.
Definition: mma_simt.h:92

cutlass
Definition: aligned_buffer.h:35

constexpr
#define constexpr
Definition: platform.h:137

mma_simt_tile_iterator.h
Describes the lane policy used by warp-level matrix multiply operators targeting SIMT instructions...

cutlass::platform::conditional::type
T type
Definition: platform.h:326

cutlass::platform::is_same
std::is_same (false specialization)
Definition: platform.h:394

cutlass::gemm::warp::MmaSimt::FragmentC
typename ThreadMma::FragmentC FragmentC
Storage for C tile.
Definition: mma_simt.h:180

cutlass::gemm::warp::MmaSimt::Shape
Shape_ Shape
Shape of warp-level matrix operation (concept: GemmShape)
Definition: mma_simt.h:77

cutlass::gemm::warp::MmaSimt
Structure to compute the matrix product targeting CUDA cores and SIMT math instructions.
Definition: mma_simt.h:74

gemm.h
Defines common types used for all GEMM-like operators.

cutlass::gemm::warp::MmaSimt::operator()
CUTLASS_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c, int group_idx=0) const
Performs a warp-level matrix multiply-accumulate operation.
Definition: mma_simt.h:194

cutlass::gemm::warp::MmaSimt::use_dp4a
static constexpr bool use_dp4a
Definition: mma_simt.h:117

cutlass::gemm::warp::MmaSimt::LayoutC
LayoutC_ LayoutC
Layout of accumulator matrix C.
Definition: mma_simt.h:95

cutlass::layout::ColumnMajor
Mapping function for column-major matrices.
Definition: layout/matrix.h:142

array.h
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

cutlass::gemm::Operand::kC
B multiplicand.

mma.h
Templates exposing architecture support for warp-level multiply-add operations.

cutlass::gemm::warp::MmaSimtTileIterator
Definition: mma_simt_tile_iterator.h:69

cutlass::gemm::Operand::kA

matrix_shape.h
Defines a Shape template for matrix tiles.

cutlass::gemm::warp::MmaSimt::OperatorClass
arch::OpClassSimt OperatorClass
Indicates class of matrix operator.
Definition: mma_simt.h:101

cutlass::gemm::warp::MmaSimt::ThreadLayoutB
typename platform::conditional< platform::is_same< layout::ColumnMajorInterleaved< 4 >, LayoutB >::value, layout::ColumnMajor, typename platform::conditional< platform::is_same< layout::RowMajorInterleaved< 4 >, LayoutB >::value, layout::RowMajor, LayoutB >::type >::type ThreadLayoutB
Definition: mma_simt.h:115

cutlass::gemm::warp::MmaSimt::LayoutA
LayoutA_ LayoutA
Layout of multiplicand A.
Definition: mma_simt.h:83

mma.h
Templates exposing architecture support for warp-level multiply-add operations.

numeric_types.h
Top-level include for all CUTLASS numeric types.

cutlass::gemm::GemmShape
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57

cutlass::platform::conditional
std::conditional (true specialization)
Definition: platform.h:325

cutlass::gemm::warp::MmaSimt::Policy
Policy_ Policy
Shape of the warp in units of thread (concept: MmaLanePolicySimt)
Definition: mma_simt.h:98

cutlass::gemm::warp::MmaSimt::FragmentA
typename IteratorA::Fragment FragmentA
Storage for A tile.
Definition: mma_simt.h:154

cutlass::gemm::warp::MmaSimt::dp4a_type
typename platform::conditional< use_dp4a, int8_t, bool >::type dp4a_type
Definition: mma_simt.h:122

cutlass::layout::RowMajor
Mapping function for row-major matrices.
Definition: layout/matrix.h:50

cutlass::gemm::thread::Mma
Structure to compute the matrix product.
Definition: gemm/thread/mma.h:66

cutlass::gemm::warp::MmaSimt::ElementA
ElementA_ ElementA
Data type of multiplicand A.
Definition: mma_simt.h:80

cutlass::gemm::Operand::kB
A multiplicand.

cutlass::gemm::warp::MmaSimt::ThreadLayoutA
typename platform::conditional< platform::is_same< layout::ColumnMajorInterleaved< 4 >, LayoutA >::value, layout::ColumnMajor, typename platform::conditional< platform::is_same< layout::RowMajorInterleaved< 4 >, LayoutA >::value, layout::RowMajor, LayoutA >::type >::type ThreadLayoutA
Definition: mma_simt.h:108

cutlass::gemm::warp::MmaSimt::ElementB
ElementB_ ElementB
Data type of multiplicand B.
Definition: mma_simt.h:86

cutlass.h
Basic include for CUTLASS.

cutlass::gemm::warp::MmaSimt::LayoutB
LayoutB_ LayoutB
Layout of multiplicand B.
Definition: mma_simt.h:89

cutlass::gemm::warp::MmaSimt::MmaSimt
CUTLASS_DEVICE MmaSimt()
Ctor.
Definition: mma_simt.h:190

cutlass::gemm::warp::MmaSimt::FragmentB
typename IteratorB::Fragment FragmentB
Storage for B tile.
Definition: mma_simt.h:168