CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
include
cutlass
arch
simd_sm61.h
Go to the documentation of this file.
1
/***************************************************************************************************
2
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3
*
4
* Redistribution and use in source and binary forms, with or without modification, are permitted
5
* provided that the following conditions are met:
6
* * Redistributions of source code must retain the above copyright notice, this list of
7
* conditions and the following disclaimer.
8
* * Redistributions in binary form must reproduce the above copyright notice, this list of
9
* conditions and the following disclaimer in the documentation and/or other materials
10
* provided with the distribution.
11
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12
* to endorse or promote products derived from this software without specific prior written
13
* permission.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
*
24
**************************************************************************************************/
29
#pragma once
30
31
#include "
simd.h
"
32
33
namespace
cutlass
{
34
namespace
arch {
35
37
39
CUTLASS_HOST_DEVICE
40
template
<>
41
int32_t
dot
(Array<int8_t, 4>
const
&a, Array<int8_t, 4>
const
&b, int32_t accum) {
42
43
return
accum;
44
}
45
47
CUTLASS_HOST_DEVICE
48
template
<>
49
int32_t
dot
(Array<uint8_t, 4>
const
&a, Array<int8_t, 4>
const
&b, int32_t accum) {
50
51
return
accum;
52
}
53
55
CUTLASS_HOST_DEVICE
56
template
<>
57
int32_t
dot
(Array<int8_t, 4>
const
&a, Array<uint8_t, 4>
const
&b, int32_t accum) {
58
59
return
accum;
60
}
61
63
CUTLASS_HOST_DEVICE
64
template
<>
65
int32_t
dot
(Array<uint8_t, 4>
const
&a, Array<uint8_t, 4>
const
&b, int32_t accum) {
66
67
return
accum;
68
}
69
71
73
CUTLASS_HOST_DEVICE
74
template
<>
75
int32_t
dot
(Array<int16_t, 2>
const
&a, Array<int8_t, 2>
const
&b, int32_t accum) {
76
77
return
accum;
78
}
79
81
CUTLASS_HOST_DEVICE
82
template
<>
83
int32_t
dot
(Array<uint16_t, 2>
const
&a, Array<int8_t, 2>
const
&b, int32_t accum) {
84
85
return
accum;
86
}
87
89
CUTLASS_HOST_DEVICE
90
template
<>
91
int32_t
dot
(Array<int16_t, 2>
const
&a, Array<uint8_t, 2>
const
&b, int32_t accum) {
92
93
return
accum;
94
}
95
97
CUTLASS_HOST_DEVICE
98
template
<>
99
int32_t
dot
(Array<uint16_t, 2>
const
&a, Array<uint8_t, 2>
const
&b, int32_t accum) {
100
101
return
accum;
102
}
103
105
107
CUTLASS_HOST_DEVICE
108
template
<>
109
int32_t
dot
(Array<int16_t, 2>
const
&a, Array<int16_t, 2>
const
&b, int32_t accum) {
110
111
return
accum;
112
}
113
115
CUTLASS_HOST_DEVICE
116
template
<>
117
int32_t
dot
(Array<uint16_t, 2>
const
&a, Array<int16_t, 2>
const
&b, int32_t accum) {
118
119
return
accum;
120
}
121
123
CUTLASS_HOST_DEVICE
124
template
<>
125
int32_t
dot
(Array<int16_t, 2>
const
&a, Array<uint16_t, 2>
const
&b, int32_t accum) {
126
127
return
accum;
128
}
129
131
CUTLASS_HOST_DEVICE
132
template
<>
133
int32_t
dot
(Array<uint16_t, 2>
const
&a, Array<uint16_t, 2>
const
&b, int32_t accum) {
134
135
return
accum;
136
}
137
139
140
}
// namespace arch
141
}
// namespace cutlass
cutlass
Definition:
aligned_buffer.h:35
cutlass::arch::dot
CUTLASS_HOST_DEVICE Accumulator dot(Array< T, N > const &a, Array< T, N > const &b, Accumulator accum)
Definition:
simd.h:101
CUTLASS_HOST_DEVICE
#define CUTLASS_HOST_DEVICE
Definition:
cutlass.h:89
simd.h
Templates exposing SIMD operators.
Generated by
1.8.11