cutlass/default__gemv__core_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include "cutlass/cutlass.h"
 #include "cutlass/array.h"
 #include "cutlass/numeric_types.h"
 #include "cutlass/matrix_shape.h"

 #include "cutlass/layout/matrix.h"

 #include "cutlass/platform/platform.h"

 #include "cutlass/gemm/gemm.h"
 #include "cutlass/gemm/thread/mma.h"

 #include "cutlass/transform/threadblock/predicated_tile_iterator.h"
 #include "cutlass/transform/pitch_linear_thread_map.h"

 #include "cutlass/gemm/threadblock/gemv.h"

 namespace cutlass {
 namespace gemm {
 namespace threadblock {

 template <
   typename Shape_,
   typename ThreadShape_,
   typename ElementA_,
   typename LayoutA_,
   typename ElementB_,
   typename LayoutB_,
   typename ElementC_,
   typename LayoutC_
 >
 struct DefaultGemvCore {

   using Shape = Shape_;
   using ThreadShape = ThreadShape_;

   using LayoutA = LayoutA_;
   using LayoutB = LayoutB_;
   using LayoutC = LayoutC_;

   using ElementA = ElementA_;
   using ElementB = ElementB_;
   using ElementC = ElementC_;

   static int const kThreadsPerN = Shape::kN / ThreadShape::kN;

   using IteratorPolicyA = typename platform::conditional<
                             platform::is_same<LayoutA, layout::RowMajor>::value,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous<
                               layout::PitchLinearShape<Shape::kK, Shape::kM>, 1, ThreadShape::kK>,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided<
                               layout::PitchLinearShape<Shape::kM, Shape::kK>, 1, ThreadShape::kM>>::type;

   using IteratorA = cutlass::transform::threadblock::PredicatedTileIterator<
                           cutlass::MatrixShape<Shape::kM, Shape::kK>, ElementA, LayoutA, 1, IteratorPolicyA>;

   using IteratorPolicyB = typename platform::conditional<
                             platform::is_same<LayoutB, layout::RowMajor>::value,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous<
                               layout::PitchLinearShape<Shape::kN, Shape::kK>, kThreadsPerN, ThreadShape::kN>,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided<
                               layout::PitchLinearShape<Shape::kK, Shape::kN>, kThreadsPerN, ThreadShape::kK>>::type;

   using IteratorB = cutlass::transform::threadblock::PredicatedTileIterator<
                             cutlass::MatrixShape<Shape::kK, Shape::kN>, ElementB, LayoutB, 0, IteratorPolicyB>;

   using IteratorPolicyC = typename platform::conditional<
                             platform::is_same<LayoutC, layout::RowMajor>::value,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous<
                               layout::PitchLinearShape<Shape::kN, Shape::kM>, kThreadsPerN, ThreadShape::kN>,
                             cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided<
                               layout::PitchLinearShape<Shape::kM, Shape::kN>, kThreadsPerN, ThreadShape::kM>>::type;

   using IteratorC = cutlass::transform::threadblock::PredicatedTileIterator<
                              cutlass::MatrixShape<Shape::kM, Shape::kN>, ElementC, LayoutC, 0, IteratorPolicyC>;

   using MmaSimtOp = typename cutlass::gemm::thread::Mma<
     cutlass::gemm::GemmShape<ThreadShape::kM, ThreadShape::kN, Shape::kK>,
     ElementA,
     LayoutA,
     ElementB,
     LayoutB,
     ElementC,
     LayoutC>;

   using Operator = MmaSimtOp;

   // Assertions for correctness
   static_assert((Shape::kM == 1), "M=1 is required for GEMV");

   static_assert((ThreadShape::kM == 1), "M=1 is required for GEMV");

   static_assert(Shape::kK % ThreadShape::kK == 0, "Shape::K must be a multiple of ThreadShape::K");

   static_assert(((ThreadShape::kK == 1) ||
                 (ThreadShape::kK == 2) ||
                 (ThreadShape::kK == 4) ||
                 (ThreadShape::kK == 8) ||
                 (ThreadShape::kK == 16) ||
                 (ThreadShape::kK == 32)
                ),
               "ThreadShape::K must be a 1, 2, 4, 8, 16 or 32");
 };


 } // namespace threadblock
 } // namespace gemm
 } // namespace cutlass
cutlass::MatrixShape
Describes the size of a matrix tile.
Definition: matrix_shape.h:42

cutlass
Definition: aligned_buffer.h:35

cutlass::gemm::threadblock::DefaultGemvCore::Shape
Shape_ Shape
Definition: default_gemv_core.h:70

cutlass::platform::is_same
std::is_same (false specialization)
Definition: platform.h:394

pitch_linear_thread_map.h
Templates implementing how threads are mapped to a given tile.

cutlass::gemm::threadblock::DefaultGemvCore::ThreadShape
ThreadShape_ ThreadShape
Definition: default_gemv_core.h:71

gemm.h
Defines common types used for all GEMM-like operators.

cutlass::gemm::threadblock::DefaultGemvCore::ElementA
ElementA_ ElementA
Definition: default_gemv_core.h:77

platform.h
C++ features that may be otherwise unimplemented for CUDA device functions.

cutlass::gemm::threadblock::DefaultGemvCore::IteratorPolicyC
typename platform::conditional< platform::is_same< LayoutC, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kM >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kN >, kThreadsPerN, ThreadShape::kM >>::type IteratorPolicyC
Definition: default_gemv_core.h:108

cutlass::layout::PitchLinearShape
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43

array.h
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...

matrix_shape.h
Defines a Shape template for matrix tiles.

cutlass::gemm::threadblock::DefaultGemvCore::IteratorPolicyA
typename platform::conditional< platform::is_same< LayoutA, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kK, Shape::kM >, 1, ThreadShape::kK >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kK >, 1, ThreadShape::kM >>::type IteratorPolicyA
Definition: default_gemv_core.h:88

cutlass::gemm::threadblock::DefaultGemvCore::IteratorPolicyB
typename platform::conditional< platform::is_same< LayoutB, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kK >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kK, Shape::kN >, kThreadsPerN, ThreadShape::kK >>::type IteratorPolicyB
Definition: default_gemv_core.h:98

cutlass::gemm::threadblock::DefaultGemvCore::ElementC
ElementC_ ElementC
Definition: default_gemv_core.h:79

cutlass::gemm::threadblock::DefaultGemvCore::Operator
MmaSimtOp Operator
Definition: default_gemv_core.h:122

cutlass::gemm::threadblock::DefaultGemvCore::MmaSimtOp
typename cutlass::gemm::thread::Mma< cutlass::gemm::GemmShape< ThreadShape::kM, ThreadShape::kN, Shape::kK >, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC > MmaSimtOp
Definition: default_gemv_core.h:120

mma.h
Templates exposing architecture support for warp-level multiply-add operations.

numeric_types.h
Top-level include for all CUTLASS numeric types.

cutlass::gemm::GemmShape
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57

cutlass::gemm::threadblock::DefaultGemvCore
Definition: default_gemv_core.h:68

cutlass::platform::conditional
std::conditional (true specialization)
Definition: platform.h:325

static_assert
#define static_assert(__e, __m)
Definition: platform.h:153

cutlass::gemm::threadblock::DefaultGemvCore::LayoutB
LayoutB_ LayoutB
Definition: default_gemv_core.h:74

cutlass::transform::threadblock::PredicatedTileIterator
Definition: transform/threadblock/predicated_tile_iterator.h:133

cutlass::gemm::threadblock::DefaultGemvCore::kThreadsPerN
static int const kThreadsPerN
Definition: default_gemv_core.h:81

cutlass::gemm::thread::Mma
Structure to compute the matrix product.
Definition: gemm/thread/mma.h:66

matrix.h
Defines layout functions used by TensorRef and derived classes.

gemv.h
Template for a threadblock-scoped GEMV kernel.

predicated_tile_iterator.h
Templates implementing loading of tiles from pitch-linear rank=2 tensors.

cutlass::gemm::threadblock::DefaultGemvCore::ElementB
ElementB_ ElementB
Definition: default_gemv_core.h:78

cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided
Definition: pitch_linear_thread_map.h:168

cutlass::gemm::threadblock::DefaultGemvCore::LayoutC
LayoutC_ LayoutC
Definition: default_gemv_core.h:75

cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous
Definition: pitch_linear_thread_map.h:140

cutlass.h
Basic include for CUTLASS.

cutlass::gemm::threadblock::DefaultGemvCore::LayoutA
LayoutA_ LayoutA
Definition: default_gemv_core.h:73