cutlass/mma__base_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/aligned_buffer.h"
 #include "cutlass/arch/memory.h"
 #include "cutlass/array.h"
 #include "cutlass/cutlass.h"
 #include "cutlass/gemm/gemm.h"
 #include "cutlass/matrix_shape.h"
 #include "cutlass/numeric_types.h"

 namespace cutlass {
 namespace gemm {
 namespace threadblock {


 template <
     typename Operator_,
     typename SmemPaddingA_,
     typename SmemPaddingB_,
     int PartitionsK = 1>
 struct MmaPolicy {
   using Operator = Operator_;

   using SmemPaddingA = SmemPaddingA_;

   using SmemPaddingB = SmemPaddingB_;

   static int const kPartitionsK = PartitionsK;
 };


 template <
     typename Shape_,
     typename Policy_,
     int Stages,
     typename Enable = bool>
 class MmaBase {
  public:
   using Shape = Shape_;

   using Policy = Policy_;

   //
   // Dependent types
   //

   using Operator = typename Policy::Operator;

   using WarpGemm = typename Policy::Operator::Shape;

   using WarpCount = GemmShape<Shape::kM / WarpGemm::kM,
                               Shape::kN / WarpGemm::kN,
                               Shape::kK / WarpGemm::kK>;

   static int const kWarpGemmIterations =
       (WarpGemm::kK / Operator::Policy::MmaShape::kK);

   static int const kStages = Stages;

   using TensorRefA = TensorRef<typename Operator::ElementA, typename Operator::LayoutA>;

   using TensorRefB = TensorRef<typename Operator::ElementB, typename Operator::LayoutB>;

   //
   // Nested structs
   //

   class SharedStorage {
    public:
     //
     // Type definitions
     //

     using ShapeA = MatrixShape<Shape::kM + Policy::SmemPaddingA::kRow,
                                Shape::kK * kStages +
                                    Policy::SmemPaddingA::kColumn>;

     using ShapeB =
         MatrixShape<Shape::kK * kStages + Policy::SmemPaddingB::kRow,
                     Shape::kN + Policy::SmemPaddingB::kColumn>;

    public:
     //
     // Data members
     //

     AlignedBuffer<typename Operator::ElementA, ShapeA::kCount> operand_A;

     AlignedBuffer<typename Operator::ElementB, ShapeB::kCount> operand_B;

    public:

     //
     // Methods
     //

     CUTLASS_DEVICE
     static typename Operator::LayoutA LayoutA() {
       return Operator::LayoutA::packed({ShapeA::kRow, ShapeA::kColumn});
     }

     CUTLASS_HOST_DEVICE
     static typename Operator::LayoutB LayoutB() {
       return Operator::LayoutB::packed({ShapeB::kRow, ShapeB::kColumn});
     }

     CUTLASS_HOST_DEVICE
     TensorRefA operand_A_ref() {
       return TensorRefA{operand_A.data(), LayoutA()};
     }

     CUTLASS_HOST_DEVICE
     TensorRefB operand_B_ref() {
       return TensorRefB{operand_B.data(), LayoutB()};
     }
   };

  protected:

   //
   // Data members
   //

   typename Operator::IteratorA warp_tile_iterator_A_;

   typename Operator::IteratorB warp_tile_iterator_B_;

 public:

   CUTLASS_DEVICE
   MmaBase(
       SharedStorage &shared_storage,
       int thread_idx,
       int warp_idx,
       int lane_idx
     ):
       warp_tile_iterator_A_(shared_storage.operand_A_ref(), lane_idx),
       warp_tile_iterator_B_(shared_storage.operand_B_ref(), lane_idx) {

   }
 };


 }  // namespace threadblock
 }  // namespace gemm
 }  // namespace cutlass

cutlass::gemm::threadblock::MmaBase< Shape_, Policy_, 1 >::Policy
Policy_ Policy
Definition: mma_base.h:89

cutlass::MatrixShape
Describes the size of a matrix tile.
Definition: matrix_shape.h:42

cutlass
Definition: aligned_buffer.h:35

memory.h
Architecture-specific operators on memory.

cutlass::gemm::threadblock::MmaBase::SharedStorage::operand_B
AlignedBuffer< typename Operator::ElementB, ShapeB::kCount > operand_B
Buffer for B operand.
Definition: mma_base.h:150

cutlass::gemm::threadblock::MmaBase::warp_tile_iterator_B_
Operator::IteratorB warp_tile_iterator_B_
Iterator to load a warp-scoped tile of B operand from shared memory.
Definition: mma_base.h:193

cutlass::gemm::threadblock::MmaBase< Shape_, Policy_, 1 >::WarpGemm
typename Policy::Operator::Shape WarpGemm
Definition: mma_base.h:100

gemm.h
Defines common types used for all GEMM-like operators.

cutlass::gemm::threadblock::MmaBase::SharedStorage
Shared storage object needed by threadblock-scoped GEMM.
Definition: mma_base.h:125

cutlass::gemm::threadblock::MmaBase::Shape
Shape_ Shape
Policy describing tuning details.
Definition: mma_base.h:88

array.h
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

cutlass::gemm::threadblock::MmaPolicy::Operator
Operator_ Operator
Warp-level GEMM operator (concept: gemm::warp::MmaTensorOp or gemm::warp::MmaSimt) ...
Definition: mma_base.h:58

cutlass::gemm::threadblock::MmaPolicy::SmemPaddingA
SmemPaddingA_ SmemPaddingA
Padding used for A operand in shared memory.
Definition: mma_base.h:61

matrix_shape.h
Defines a Shape template for matrix tiles.

cutlass::gemm::threadblock::MmaBase::SharedStorage::LayoutB
static CUTLASS_HOST_DEVICE Operator::LayoutB LayoutB()
Returns a layout object for the B matrix.
Definition: mma_base.h:166

cutlass::gemm::threadblock::MmaPolicy
Policy object describing MmaTensorOp.
Definition: mma_base.h:56

cutlass::TensorRef
Definition: tensor_ref.h:146

aligned_buffer.h
AlignedBuffer is a container for trivially copyable elements suitable for use in unions and shared me...

CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89

numeric_types.h
Top-level include for all CUTLASS numeric types.

cutlass::AlignedBuffer
Modifies semantics of cutlass::Array<> to provide guaranteed alignment.
Definition: aligned_buffer.h:45

cutlass::gemm::threadblock::MmaBase::SharedStorage::operand_A_ref
CUTLASS_HOST_DEVICE TensorRefA operand_A_ref()
Returns a TensorRef to the A operand.
Definition: mma_base.h:172

cutlass::gemm::GemmShape
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57

cutlass::AlignedBuffer::data
CUTLASS_HOST_DEVICE pointer data()
Definition: aligned_buffer.h:84

cutlass::gemm::threadblock::MmaBase::MmaBase
CUTLASS_DEVICE MmaBase(SharedStorage &shared_storage, int thread_idx, int warp_idx, int lane_idx)
Construct from tensor references.
Definition: mma_base.h:199

cutlass::gemm::threadblock::MmaBase
Definition: mma_base.h:83

cutlass::gemm::threadblock::MmaBase< Shape_, Policy_, 1 >::Operator
typename Policy::Operator Operator
Warp-level Mma.
Definition: mma_base.h:96

cutlass::gemm::threadblock::MmaBase::warp_tile_iterator_A_
Operator::IteratorA warp_tile_iterator_A_
Iterator to load a warp-scoped tile of A operand from shared memory.
Definition: mma_base.h:190

cutlass::gemm::threadblock::MmaBase::SharedStorage::operand_A
AlignedBuffer< typename Operator::ElementA, ShapeA::kCount > operand_A
Buffer for A operand.
Definition: mma_base.h:147

cutlass::gemm::threadblock::MmaBase::SharedStorage::LayoutA
static CUTLASS_DEVICE Operator::LayoutA LayoutA()
Returns a layout object for the A matrix.
Definition: mma_base.h:160

cutlass::gemm::threadblock::MmaPolicy::SmemPaddingB
SmemPaddingB_ SmemPaddingB
Padding used for B operand in shared memory.
Definition: mma_base.h:64

cutlass::gemm::threadblock::MmaPolicy::kPartitionsK
static int const kPartitionsK
Number of partitions of K dimension.
Definition: mma_base.h:67

cutlass::gemm::threadblock::MmaBase::SharedStorage::operand_B_ref
CUTLASS_HOST_DEVICE TensorRefB operand_B_ref()
Returns a TensorRef to the B operand.
Definition: mma_base.h:178

cutlass.h
Basic include for CUTLASS.