54 namespace threadblock {
60 typename ThreadShape_,
127 static_assert((ThreadShape::kM == 1),
"M=1 is required for GEMV");
129 static_assert(Shape::kK % ThreadShape::kK == 0,
"Shape::K must be a multiple of ThreadShape::K");
132 (ThreadShape::kK == 2) ||
133 (ThreadShape::kK == 4) ||
134 (ThreadShape::kK == 8) ||
135 (ThreadShape::kK == 16) ||
136 (ThreadShape::kK == 32)
138 "ThreadShape::K must be a 1, 2, 4, 8, 16 or 32");
Describes the size of a matrix tile.
Definition: matrix_shape.h:42
Definition: aligned_buffer.h:35
Shape_ Shape
Definition: default_gemv_core.h:70
Templates implementing how threads are mapped to a given tile.
ThreadShape_ ThreadShape
Definition: default_gemv_core.h:71
Defines common types used for all GEMM-like operators.
ElementA_ ElementA
Definition: default_gemv_core.h:77
typename platform::conditional< platform::is_same< LayoutC, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kM >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kN >, kThreadsPerN, ThreadShape::kM >>::type IteratorPolicyC
Definition: default_gemv_core.h:108
Template defining a shape used by pitch-linear operators.
Definition: pitch_linear.h:43
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
Defines a Shape template for matrix tiles.
typename platform::conditional< platform::is_same< LayoutA, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kK, Shape::kM >, 1, ThreadShape::kK >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kM, Shape::kK >, 1, ThreadShape::kM >>::type IteratorPolicyA
Definition: default_gemv_core.h:88
typename platform::conditional< platform::is_same< LayoutB, layout::RowMajor >::value, cutlass::transform::PitchLinearTilePolicyStripminedThreadContiguous< layout::PitchLinearShape< Shape::kN, Shape::kK >, kThreadsPerN, ThreadShape::kN >, cutlass::transform::PitchLinearTilePolicyStripminedThreadStrided< layout::PitchLinearShape< Shape::kK, Shape::kN >, kThreadsPerN, ThreadShape::kK >>::type IteratorPolicyB
Definition: default_gemv_core.h:98
ElementC_ ElementC
Definition: default_gemv_core.h:79
MmaSimtOp Operator
Definition: default_gemv_core.h:122
typename cutlass::gemm::thread::Mma< cutlass::gemm::GemmShape< ThreadShape::kM, ThreadShape::kN, Shape::kK >, ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC > MmaSimtOp
Definition: default_gemv_core.h:120
Templates exposing architecture support for warp-level multiply-add operations.
Top-level include for all CUTLASS numeric types.
Shape of a matrix multiply-add operation.
Definition: include/cutlass/gemm/gemm.h:57
Definition: default_gemv_core.h:68
LayoutB_ LayoutB
Definition: default_gemv_core.h:74
static int const kThreadsPerN
Definition: default_gemv_core.h:81
Structure to compute the matrix product.
Definition: gemm/thread/mma.h:66
Defines layout functions used by TensorRef and derived classes.
Template for a threadblock-scoped GEMV kernel.
ElementB_ ElementB
Definition: default_gemv_core.h:78
LayoutC_ LayoutC
Definition: default_gemv_core.h:75
Basic include for CUTLASS.
LayoutA_ LayoutA
Definition: default_gemv_core.h:73