cutlass/wmma_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 // CUTLASS WMMA does not support clang at present.
 #if !defined(__clang__)

 #if (__CUDACC_VER_MAJOR__ >= 9)
 #if (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700))
 #define CUTLASS_ARCH_WMMA_ENABLED
 #define CUTLASS_ARCH_WMMA_SM70_ENABLED
 #endif
 #endif

 #if (__CUDACC_VER_MAJOR__ >= 10)
 #if (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 720))
 #define CUTLASS_ARCH_INTEGER_MATRIX_MULTIPLY_ENABLED
 #define CUTLASS_ARCH_WMMA_SM72_ENABLED
 #endif
 #endif

 #if (__CUDACC_VER_MAJOR__ >= 10)
 #if (!defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 750))
 #define CUTLASS_SUBBYTE_INTEGER_MATRIX_MULTIPLY_ENABLED
 #define CUTLASS_ARCH_WMMA_SM75_ENABLED
 #endif
 #endif

 #endif //__clang__

 #if defined(CUTLASS_ARCH_WMMA_ENABLED)

 #include <mma.h>
 #include "cutlass/arch/mma.h"
 #include "cutlass/array.h"
 #include "cutlass/numeric_types.h"
 #include "cutlass/gemm/gemm.h"


 namespace cutlass {
 namespace arch {

 enum class MemoryKind {
   kShared,  // Data resides in shared memory
   kGlobal   // Data resides in global memory
 };


 struct WarpParams {
   static int const kThreadsPerWarp = 32;
   static int const kQuadsPerWarp = 8;
   static int const kThreadsPerQuad = 4;
 };

 template <typename Type_>
 struct CutlassToWmmaDataType{
   using Type = Type_;
 };

 template<>
 struct CutlassToWmmaDataType<cutlass::half_t> {
   using Type = __half;
 };


 template<>
 struct CutlassToWmmaDataType<int8_t> {
   using Type = signed char;
 };

 template<>
 struct CutlassToWmmaDataType<uint8_t> {
   using Type = unsigned char;
 };

 template<>
 struct CutlassToWmmaDataType<int32_t> {
   using Type = int;
 };

 #if defined(CUTLASS_SUBBYTE_INTEGER_MATRIX_MULTIPLY_ENABLED)
 template<>
 struct CutlassToWmmaDataType<cutlass::int4b_t> {
   using Type = nvcuda::wmma::experimental::precision::s4;
 };

 template<>
 struct CutlassToWmmaDataType<cutlass::uint4b_t> {
   using Type = nvcuda::wmma::experimental::precision::u4;
 };

 template<>
 struct CutlassToWmmaDataType<cutlass::uint1b_t> {
   using Type = nvcuda::wmma::experimental::precision::b1;
 };
 #endif

 template <typename Layout_>
 struct CutlassToWmmaLayout {
 };

 template <>
 struct CutlassToWmmaLayout<cutlass::layout::RowMajor> {
   using Layout = nvcuda::wmma::row_major;
   static nvcuda::wmma::layout_t const value = nvcuda::wmma::layout_t::mem_row_major;
 };

 template <>
 struct CutlassToWmmaLayout<cutlass::layout::ColumnMajor> {
   using Layout = nvcuda::wmma::col_major;
   static nvcuda::wmma::layout_t const value = nvcuda::wmma::layout_t::mem_col_major;
 };

 template <typename Type_>
 struct WmmaToCutlassDataType{
   using Type = Type_;
 };

 template<>
 struct WmmaToCutlassDataType<__half> {
   using Type = cutlass::half_t;
 };

 // WMMA template structure defines nvcuda::wmma::fragments and static assertion chaeks
 // for a specific template paramterized data type (Element[A|B|C]), layout (Layout[A|B|C]),
 // and native wmma size (Shape)
 template <
   typename Shape_,
   typename ElementA_,
   typename LayoutA_,
   typename ElementB_,
   typename LayoutB_,
   typename ElementC_,
   typename LayoutC_,
   typename Operator_ = cutlass::arch::OpMultiplyAdd
 >
 struct Wmma;


 } // namespace arch
 } // namespace cutlass


 //
 // Specializations for each compute capability
 //
 #ifdef CUTLASS_ARCH_WMMA_SM70_ENABLED
 #include "cutlass/arch/wmma_sm70.h"
 #endif

 #ifdef CUTLASS_ARCH_WMMA_SM72_ENABLED
 #include "cutlass/arch/wmma_sm72.h"
 #endif

 #ifdef CUTLASS_ARCH_WMMA_SM75_ENABLED
 #include "cutlass/arch/wmma_sm75.h"
 #endif


 #endif //CUTLASS_ARCH_WMMA_ENABLED
cutlass::uint4b_t
integer_subbyte< 4, false > uint4b_t
4-bit Unsigned integer type
Definition: integer_subbyte.h:158

cutlass
Definition: aligned_buffer.h:35

wmma_sm75.h
Matrix multiply.

cutlass::uint1b_t
integer_subbyte< 1, false > uint1b_t
1-bit Unsigned integer type
Definition: integer_subbyte.h:152

wmma_sm70.h
Matrix multiply.

cutlass::half_t
IEEE half-precision floating-point type.
Definition: half.h:126

gemm.h
Defines common types used for all GEMM-like operators.

wmma_sm72.h
Matrix multiply.

array.h
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

mma.h
Templates exposing architecture support for multiply-add operations.

numeric_types.h
Top-level include for all CUTLASS numeric types.

cutlass::int4b_t
integer_subbyte< 4, true > int4b_t
4-bit Integer type
Definition: integer_subbyte.h:155