cutlass/wmma__sm72_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <assert.h>
 #include "cutlass/layout/matrix.h"

 namespace cutlass {
 namespace arch {

 //
 // WMMA template structure defines nvcuda::wmma::fragments and static assert for
 // wmma native instruction sizes supported for int8_t
 //
 template <
 typename Shape_,
 typename LayoutA_,
 typename LayoutB_,
 typename LayoutC_>
 struct Wmma<
   Shape_,
   int8_t,
   LayoutA_,
   int8_t,
   LayoutB_,
   int32_t,
   LayoutC_,
   cutlass::arch::OpMultiplyAdd
 > {
 #if defined(CUTLASS_ARCH_WMMA_SM72_ENABLED)
   using Shape = Shape_;
   using ElementA = int8_t;
   using LayoutA = LayoutA_;
   using ElementB = int8_t;
   using LayoutB = LayoutB_;
   using ElementC = int32_t;
   using LayoutC = LayoutC_;
   using Operator = cutlass::arch::OpMultiplyAdd;

   // check supported wmma shape for the given multiplicand data types
   static_assert(
     platform::is_same<cutlass::gemm::GemmShape<16, 16, 16>, Shape>::value ||
     platform::is_same<cutlass::gemm::GemmShape< 8, 32, 16>, Shape>::value ||
     platform::is_same<cutlass::gemm::GemmShape<32,  8, 16>, Shape>::value,
     "Supported list of wmma operator shape for s8 multiplicands are: 16x16x16, 8x328x16, and 32x8x16");


   // Wmma Fragment
   using FragmentA = nvcuda::wmma::fragment<
           nvcuda::wmma::matrix_a,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementA>::Type,
           typename CutlassToWmmaLayout<LayoutA>::Layout>;

   using FragmentB = nvcuda::wmma::fragment<
           nvcuda::wmma::matrix_b,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementB>::Type,
           typename CutlassToWmmaLayout<LayoutB>::Layout>;

   using FragmentC = nvcuda::wmma::fragment<
           nvcuda::wmma::accumulator,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementC>::Type>;

   CUTLASS_DEVICE
   void operator()(
     FragmentC &D,
     FragmentA const &A,
     FragmentB const &B,
     FragmentC const &C) const {

       nvcuda::wmma::mma_sync(D, A, B, C);
   }

 #else
     static_assert(false, "wmma.mma.sync interger type multiplicands is avialable only for SM72 and beyond");
 #endif

 };

 //
 // WMMA template structure defines nvcuda::wmma::fragments and static assert for
 // wmma native instruction sizes supported for uint8_t
 //
 template <
 typename Shape_,
 typename LayoutA_,
 typename LayoutB_,
 typename LayoutC_>
 struct Wmma<
   Shape_,
   uint8_t,
   LayoutA_,
   uint8_t,
   LayoutB_,
   int32_t,
   LayoutC_,
   cutlass::arch::OpMultiplyAdd
 > {
 #if defined(CUTLASS_ARCH_WMMA_SM72_ENABLED)
   using Shape = Shape_;
   using ElementA = uint8_t;
   using LayoutA = LayoutA_;
   using ElementB = uint8_t;
   using LayoutB = LayoutB_;
   using ElementC = int32_t;
   using LayoutC = LayoutC_;
   using Operator = cutlass::arch::OpMultiplyAdd;

   // check supported wmma shape for the given multiplicand data types
   static_assert(
     platform::is_same<cutlass::gemm::GemmShape<16, 16, 16>, Shape>::value ||
     platform::is_same<cutlass::gemm::GemmShape< 8, 32, 16>, Shape>::value ||
     platform::is_same<cutlass::gemm::GemmShape<32,  8, 16>, Shape>::value,
     "Supported list of wmma operator shape for u8 multiplicands are: 16x16x16, 8x328x16, and 32x8x16");

   // Wmma Fragment
   using FragmentA = nvcuda::wmma::fragment<
           nvcuda::wmma::matrix_a,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementA>::Type,
           typename CutlassToWmmaLayout<LayoutA>::Layout>;

   using FragmentB = nvcuda::wmma::fragment<
           nvcuda::wmma::matrix_b,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementB>::Type,
           typename CutlassToWmmaLayout<LayoutB>::Layout>;

   using FragmentC = nvcuda::wmma::fragment<
           nvcuda::wmma::accumulator,
           Shape::kM,
           Shape::kN,
           Shape::kK,
           typename CutlassToWmmaDataType<ElementC>::Type>;

   CUTLASS_DEVICE
   void operator()(
     FragmentC &D,
     FragmentA const &A,
     FragmentB const &B,
     FragmentC const &C) const {

       nvcuda::wmma::mma_sync(D, A, B, C);
   }

 #else
     static_assert(false, "wmma.mma.sync interger type multiplicands is avialable only for SM72 and beyond");
 #endif

 };

 } // namespace arch
 } // namespace cutlass
cutlass
Definition: aligned_buffer.h:35

cutlass::platform::is_same
std::is_same (false specialization)
Definition: platform.h:394

cutlass::gemm::GemmShape
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57

static_assert
#define static_assert(__e, __m)
Definition: platform.h:153

matrix.h
Defines layout functions used by TensorRef and derived classes.