CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
half.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #ifndef CUTLASS_ENABLE_F16C
33 #define CUTLASS_ENABLE_F16C 0
34 #endif
35 
36 #if defined(__CUDACC_RTC__)
37 /* All floating-point numbers can be put in one of these categories. */
38 enum
39  {
40  FP_NAN =
41 # define FP_NAN 0
42  FP_NAN,
43  FP_INFINITE =
44 # define FP_INFINITE 1
45  FP_INFINITE,
46  FP_ZERO =
47 # define FP_ZERO 2
48  FP_ZERO,
49  FP_SUBNORMAL =
50 # define FP_SUBNORMAL 3
51  FP_SUBNORMAL,
52  FP_NORMAL =
53 # define FP_NORMAL 4
54  FP_NORMAL
55  };
56 
57 // F16C extensions are not meaningful when compiling for NVRTC which only accommodates device code.
58 #undef CUTLASS_ENABLE_F16C
59 #define CUTLASS_ENABLE_F16C 0
60 
61 #else
62 #include <cmath>
63 #include <limits>
64 #include <cstdint>
65 #endif
66 
68 
69 #include <cuda_fp16.h>
70 
71 #include "cutlass/cutlass.h"
72 
74 
75 // Optionally target F16C extentions to accelerate half-precision conversion.
76 #if !defined(__CUDA_ARCH__) && (CUTLASS_ENABLE_F16C)
77 #if defined(_MSC_VER)
78 
79 #include <immintrin.h>
80 
81 #define F16C_ROUND_NEAREST 0
82 
83 #if !defined(__CUDA_ARCH__)
84 extern __inline float _cvtsh_ss (unsigned short __S) {
85  __m128i packed;
86  std::memcpy(&packed, &__S, sizeof(__S));
87 
88  __m128 result = _mm_cvtph_ps(packed);
89 
90  float flt;
91  std::memcpy(&flt, &result, sizeof(flt));
92 
93  return flt;
94 }
95 
96 __inline unsigned short _cvtss_sh (float __F, const int) {
97  __m128 packed;
98  std::memcpy(&packed, &__F, sizeof(__F));
99 
100  __m128i result = _mm_cvtps_ph(packed, F16C_ROUND_NEAREST);
101 
102  unsigned short u;
103  std::memcpy(&u, &result, sizeof(u));
104 
105  return u;
106 }
107 #endif
108 
109 #else
110 
111 // Linux
112 #include <x86intrin.h>
113 #define F16C_ROUND_NEAREST (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
114 
115 #endif
116 #endif // !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
117 
119 
120 
121 namespace cutlass {
122 
124 
126 struct alignas(2) half_t {
127 
128  //
129  // Data members
130  //
131 
133  uint16_t storage;
134 
135  //
136  // Static conversion operators
137  //
138 
141  static half_t bitcast(uint16_t x) {
142  half_t h;
143  h.storage = x;
144  return h;
145  }
146 
148  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
149  // Avoid inlining in device code if no hardware support
150  __device__ __noinline__
151  #else
153  #endif
154  static half_t convert(float const& flt) {
155  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
156  return half_t(__float2half_rn(flt));
157  #elif !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
158  unsigned short u = _cvtss_sh(flt, F16C_ROUND_NEAREST);
159  return bitcast(u);
160  #else
161  // software implementation rounds toward nearest even
162  unsigned const& s = reinterpret_cast<unsigned const &>(flt);
163  uint16_t sign = uint16_t((s >> 16) & 0x8000);
164  int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);
165  int mantissa = s & 0x7fffff;
166  uint16_t u = 0;
167 
168  if ((s & 0x7fffffff) == 0) {
169  // sign-preserving zero
170  return bitcast(sign);
171  }
172 
173  if (exp > 15) {
174  if (exp == 128 && mantissa) {
175  // not a number
176  u = 0x7fff;
177  } else {
178  // overflow to infinity
179  u = sign | 0x7c00;
180  }
181  return bitcast(u);
182  }
183 
184  int sticky_bit = 0;
185 
186  if (exp >= -14) {
187  // normal fp32 to normal fp16
188  exp = uint16_t(exp + uint16_t(15));
189  u = uint16_t(((exp & 0x1f) << 10));
190  u = uint16_t(u | (mantissa >> 13));
191  } else {
192  // normal single-precision to subnormal half_t-precision representation
193  int rshift = (-14 - exp);
194  if (rshift < 32) {
195  mantissa |= (1 << 23);
196 
197  sticky_bit = ((mantissa & ((1 << rshift) - 1)) != 0);
198 
199  mantissa = (mantissa >> rshift);
200  u = (uint16_t(mantissa >> 13) & 0x3ff);
201  } else {
202  mantissa = 0;
203  u = 0;
204  }
205  }
206 
207  // round to nearest even
208  int round_bit = ((mantissa >> 12) & 1);
209  sticky_bit |= ((mantissa & ((1 << 12) - 1)) != 0);
210 
211  if ((round_bit && sticky_bit) || (round_bit && (u & 1))) {
212  u = uint16_t(u + 1);
213  }
214 
215  u |= sign;
216 
217  return bitcast(u);
218  #endif
219  }
220 
223  static half_t convert(int const& n) {
224  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
225  return half_t(__int2half_rn(n));
226  #else
227  return convert(float(n));
228  #endif
229  }
230 
233  static half_t convert(unsigned const& n) {
234  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
235  return half_t(__uint2half_rn(n));
236  #else
237  return convert(float(n));
238  #endif
239  }
240 
242  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530)
243  // Avoid inlining in device code if no hardware support
244  __device__ __noinline__
245  #else
247  #endif
248  static float convert(half_t const& x) {
249  #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
250  return __half2float(x.to_half());
251  #elif !defined(__CUDA_ARCH__) && CUTLASS_ENABLE_F16C
252  unsigned short u = x.storage;
253  return _cvtsh_ss(u);
254  #else
255  uint16_t const &h = x.storage;
256  int sign = ((h >> 15) & 1);
257  int exp = ((h >> 10) & 0x1f);
258  int mantissa = (h & 0x3ff);
259  unsigned f = 0;
260 
261  if (exp > 0 && exp < 31) {
262  // normal
263  exp += 112;
264  f = (sign << 31) | (exp << 23) | (mantissa << 13);
265  } else if (exp == 0) {
266  if (mantissa) {
267  // subnormal
268  exp += 113;
269  while ((mantissa & (1 << 10)) == 0) {
270  mantissa <<= 1;
271  exp--;
272  }
273  mantissa &= 0x3ff;
274  f = (sign << 31) | (exp << 23) | (mantissa << 13);
275  } else {
276  // sign-preserving zero
277  f = (sign << 31);
278  }
279  } else if (exp == 31) {
280  if (mantissa) {
281  f = 0x7fffffff; // not a number
282  } else {
283  f = (0xff << 23) | (sign << 31); // inf
284  }
285  }
286  return reinterpret_cast<float const&>(f);
287  #endif
288  }
289 
290  //
291  // Methods
292  //
293 
296  half_t() { }
297 
300  explicit half_t(half const & x): storage(reinterpret_cast<uint16_t const &>(x)) {
301 
302  }
303 
306  explicit half_t(float x) {
307  storage = convert(x).storage;
308  }
309 
312  explicit half_t(double x): half_t(float(x)) {
313 
314  }
315 
318  explicit half_t(int x) {
319  storage = convert(x).storage;
320  }
321 
324  explicit half_t(unsigned x) {
325  storage = convert(x).storage;
326  }
327 
330  half_t & operator=(half const &x) {
331  storage = reinterpret_cast<uint16_t const &>(x);
332  return *this;
333  }
334 
337  operator float() const {
338  return convert(*this);
339  }
340 
343  operator double() const {
344  return double(convert(*this));
345  }
346 
349  explicit operator int() const {
350  return int(convert(*this));
351  }
352 
355  operator bool() const {
356  return (convert(*this) != 0.0f);
357  }
358 
361  half to_half() const {
362  return reinterpret_cast<half const &>(storage);
363  }
364 
367  uint16_t& raw() {
368  return storage;
369  }
370 
373  uint16_t raw() const {
374  return storage;
375  }
376 
379  bool signbit() const {
380  return ((storage & 0x8000) != 0);
381  }
382 
385  int exponent_biased() const {
386  return int((storage >> 10) & 0x1f);
387  }
388 
391  int exponent() const {
392  return exponent_biased() - 15;
393  }
394 
397  int mantissa() const {
398  return int(storage & 0x3ff);
399  }
400 };
401 
403 
405 bool signbit(cutlass::half_t const& h) {
406  return ((h.raw() & 0x8000) != 0);
407 }
408 
411  return cutlass::half_t::bitcast(h.raw() & 0x7fff);
412 }
413 
415 bool isnan(cutlass::half_t const& h) {
416  return (h.exponent_biased() == 0x1f) && h.mantissa();
417 }
418 
420 bool isfinite(cutlass::half_t const& h) {
421  return (h.exponent_biased() != 0x1f);
422 }
423 
425 cutlass::half_t nanh(const char*) {
426  // NVIDIA canonical NaN
427  return cutlass::half_t::bitcast(0x7fff);
428 }
429 
431 bool isinf(cutlass::half_t const& h) {
432  return (h.exponent_biased() == 0x1f) && !h.mantissa();
433 }
434 
436 bool isnormal(cutlass::half_t const& h) {
437  return h.exponent_biased() && h.exponent_biased() != 0x1f;
438 }
439 
442  int exp = h.exponent_biased();
443  int mantissa = h.mantissa();
444  if (exp == 0x1f) {
445  if (mantissa) {
446  return FP_NAN;
447  }
448  else {
449  return FP_INFINITE;
450  }
451  }
452  else if (!exp) {
453  if (mantissa) {
454  return FP_SUBNORMAL;
455  }
456  else {
457  return FP_ZERO;
458  }
459  }
460  return FP_NORMAL;
461 }
462 
465 #if defined(__CUDACC_RTC__)
466  return cutlass::half_t(sqrtf(float(h)));
467 #else
468  return cutlass::half_t(std::sqrt(float(h)));
469 #endif
470 }
471 
473 half_t copysign(half_t const& a, half_t const& b) {
474 
475  uint16_t a_mag = (reinterpret_cast<uint16_t const &>(a) & 0x7fff);
476  uint16_t b_sign = (reinterpret_cast<uint16_t const &>(b) & 0x8000);
477  uint16_t result = (a_mag | b_sign);
478 
479  return reinterpret_cast<cutlass::half_t const &>(result);
480 }
481 
483 
484 } // namespace cutlass
485 
487 //
488 // Standard Library operations and definitions
489 //
491 
492 namespace std {
493 
494 #if !defined(__CUDACC_RTC__)
495 template <>
497 struct numeric_limits<cutlass::half_t> {
498  static bool const is_specialized = true;
499  static bool const is_signed = true;
500  static bool const is_integer = false;
501  static bool const is_exact = false;
502  static bool const has_infinity = true;
503  static bool const has_quiet_NaN = true;
504  static bool const has_signaling_NaN = false;
505  static std::float_denorm_style const has_denorm = std::denorm_present;
506  static bool const has_denorm_loss = true;
507  static std::float_round_style const round_style = std::round_to_nearest;
508  static bool const is_iec559 = true;
509  static bool const is_bounded = true;
510  static bool const is_modulo = false;
511  static int const digits = 10;
512 
514  static cutlass::half_t min() { return cutlass::half_t::bitcast(0x0001); }
515 
517  static cutlass::half_t lowest() { return cutlass::half_t::bitcast(0xfbff); }
518 
520  static cutlass::half_t max() { return cutlass::half_t::bitcast(0x7bff); }
521 
523  static cutlass::half_t epsilon() { return cutlass::half_t::bitcast(0x1800); }
524 
526  static cutlass::half_t round_error() { return cutlass::half_t(0.5f); }
527 
529  static cutlass::half_t infinity() { return cutlass::half_t::bitcast(0x7c00); }
530 
533 
536 
539 };
540 #endif
541 
542 } // namespace std
543 
545 //
546 // Arithmetic operators
547 //
549 
550 namespace cutlass {
551 
553 
555 bool operator==(half_t const& lhs, half_t const& rhs) {
556 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
557  return __heq(lhs.to_half(), rhs.to_half());
558 #else
559  return float(lhs) == float(rhs);
560 #endif
561 }
562 
564 bool operator!=(half_t const& lhs, half_t const& rhs) {
565 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
566  return __hne(lhs.to_half(), rhs.to_half());
567 #else
568  return float(lhs) != float(rhs);
569 #endif
570 }
571 
573 bool operator<(half_t const& lhs, half_t const& rhs) {
574 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
575  return __hlt(lhs.to_half(), rhs.to_half());
576 #else
577  return float(lhs) < float(rhs);
578 #endif
579 }
580 
582 bool operator<=(half_t const& lhs, half_t const& rhs) {
583 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
584  return __hle(lhs.to_half(), rhs.to_half());
585 #else
586  return float(lhs) <= float(rhs);
587 #endif
588 }
589 
591 bool operator>(half_t const& lhs, half_t const& rhs) {
592 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
593  return __hgt(lhs.to_half(), rhs.to_half());
594 #else
595  return float(lhs) > float(rhs);
596 #endif
597 }
598 
600 bool operator>=(half_t const& lhs, half_t const& rhs) {
601 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
602  return __hge(lhs.to_half(), rhs.to_half());
603 #else
604  return float(lhs) >= float(rhs);
605 #endif
606 }
607 
609 half_t operator+(half_t const& lhs, half_t const& rhs) {
610 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
611  return half_t(__hadd(lhs.to_half(), rhs.to_half()));
612 #else
613  return half_t(float(lhs) + float(rhs));
614 #endif
615 }
616 
618 half_t operator-(half_t const& lhs) {
619 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
620  return half_t(__hneg(lhs.to_half()));
621 #else
622  return half_t(-float(lhs));
623 #endif
624 }
625 
627 half_t operator-(half_t const& lhs, half_t const& rhs) {
628 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
629  return half_t(__hsub(lhs.to_half(), rhs.to_half()));
630 #else
631  return half_t(float(lhs) - float(rhs));
632 #endif
633 }
634 
636 half_t operator*(half_t const& lhs, half_t const& rhs) {
637 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
638  return half_t(__hmul(lhs.to_half(), rhs.to_half()));
639 #else
640  return half_t(float(lhs) * float(rhs));
641 #endif
642 }
643 
645 half_t operator/(half_t const& lhs, half_t const& rhs) {
646 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
647  return half_t(__hdiv(lhs.to_half(), rhs.to_half()));
648 #else
649  return half_t(float(lhs) / float(rhs));
650 #endif
651 }
652 
654 half_t& operator+=(half_t & lhs, half_t const& rhs) {
655 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
656  lhs = half_t(__hadd(lhs.to_half(), rhs.to_half()));
657 #else
658  lhs = half_t(float(lhs) + float(rhs));
659 #endif
660  return lhs;
661 }
662 
664 half_t& operator-=(half_t & lhs, half_t const& rhs) {
665 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
666  lhs = half_t(__hsub(lhs.to_half(), rhs.to_half()));
667 #else
668  lhs = half_t(float(lhs) - float(rhs));
669 #endif
670  return lhs;
671 }
672 
674 half_t& operator*=(half_t & lhs, half_t const& rhs) {
675 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
676  lhs = half_t(__hmul(lhs.to_half(), rhs.to_half()));
677 #else
678  lhs = half_t(float(lhs) * float(rhs));
679 #endif
680  return lhs;
681 }
682 
684 half_t& operator/=(half_t & lhs, half_t const& rhs) {
685 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
686  lhs = half_t(__hdiv(lhs.to_half(), rhs.to_half()));
687 #else
688  lhs = half_t(float(lhs) / float(rhs));
689 #endif
690  return lhs;
691 }
692 
695 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
696  lhs = half_t(__hadd(lhs.to_half(), half_t(1.0f).to_half()));
697 #else
698  float tmp(lhs);
699  ++tmp;
700  lhs = half_t(tmp);
701 #endif
702  return lhs;
703 }
704 
707 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
708  lhs = half_t(__hsub(lhs.to_half(), half_t(1.0f).to_half()));
709 #else
710  float tmp(lhs);
711  --tmp;
712  lhs = half_t(tmp);
713 #endif
714  return lhs;
715 }
716 
718 half_t operator++(half_t & lhs, int) {
719  half_t ret(lhs);
720 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
721  lhs = half_t(__hadd(lhs.to_half(), half_t(1.0f).to_half()));
722 #else
723  float tmp(lhs);
724  tmp++;
725  lhs = half_t(tmp);
726 #endif
727  return ret;
728 }
729 
731 half_t operator--(half_t & lhs, int) {
732  half_t ret(lhs);
733 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530)
734  lhs = half_t(__hsub(lhs.to_half(), half_t(1.0f).to_half()));
735 #else
736  float tmp(lhs);
737  tmp--;
738  lhs = half_t(tmp);
739 #endif
740  return ret;
741 }
742 
744 
745 } // namespace cutlass
746 
748 
749 //
750 // User-defined literals
751 //
752 
754 cutlass::half_t operator "" _hf(long double x) {
755  return cutlass::half_t(float(x));
756 }
757 
759 cutlass::half_t operator "" _hf(unsigned long long int x) {
760  return cutlass::half_t(int(x));
761 }
762 
static cutlass::half_t max()
Maximum finite value.
Definition: half.h:520
static CUTLASS_HOST_DEVICE half_t bitcast(uint16_t x)
Constructs from an unsigned short.
Definition: half.h:141
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE half_t(int x)
Integer conversion - round to nearest even.
Definition: half.h:318
CUTLASS_HOST_DEVICE T abs(complex< T > const &z)
Returns the magnitude of the complex number.
Definition: complex.h:313
static cutlass::half_t signaling_NaN()
Returns smallest finite value.
Definition: half.h:535
static cutlass::half_t infinity()
Returns smallest finite value.
Definition: half.h:529
CUTLASS_HOST_DEVICE half_t()
Default constructor.
Definition: half.h:296
static CUTLASS_HOST_DEVICE half_t convert(float const &flt)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:154
CUTLASS_HOST_DEVICE half_t & operator/=(half_t &lhs, half_t const &rhs)
Definition: half.h:684
uint16_t storage
Storage type.
Definition: half.h:133
IEEE half-precision floating-point type.
Definition: half.h:126
static CUTLASS_HOST_DEVICE half_t convert(int const &n)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:223
CUTLASS_HOST_DEVICE bool isnormal(cutlass::half_t const &h)
Definition: half.h:436
STL namespace.
CUTLASS_HOST_DEVICE bool operator<=(half_t const &lhs, half_t const &rhs)
Definition: half.h:582
static cutlass::half_t denorm_min()
Returns smallest finite value.
Definition: half.h:538
CUTLASS_HOST_DEVICE complex< T > exp(complex< T > const &z)
Computes the complex exponential of z.
Definition: complex.h:375
static CUTLASS_HOST_DEVICE float convert(half_t const &x)
Converts a half-precision value stored as a uint16_t to a float.
Definition: half.h:248
CUTLASS_HOST_DEVICE half_t & operator+=(half_t &lhs, half_t const &rhs)
Definition: half.h:654
CUTLASS_HOST_DEVICE half_t(unsigned x)
Integer conversion - round toward zero.
Definition: half.h:324
CUTLASS_HOST_DEVICE half_t operator+(half_t const &lhs, half_t const &rhs)
Definition: half.h:609
CUTLASS_HOST_DEVICE half_t & operator-=(half_t &lhs, half_t const &rhs)
Definition: half.h:664
static cutlass::half_t round_error()
Returns smallest finite value.
Definition: half.h:526
CUTLASS_HOST_DEVICE half_t & operator++(half_t &lhs)
Definition: half.h:694
CUTLASS_HOST_DEVICE bool signbit() const
Returns the sign bit.
Definition: half.h:379
CUTLASS_HOST_DEVICE cutlass::half_t sqrt(cutlass::half_t const &h)
Definition: half.h:464
CUTLASS_HOST_DEVICE int fpclassify(cutlass::half_t const &h)
Definition: half.h:441
CUTLASS_HOST_DEVICE bool operator!=(half_t const &lhs, half_t const &rhs)
Definition: half.h:564
CUTLASS_HOST_DEVICE half to_half() const
Bitcasts to CUDA&#39;s half type.
Definition: half.h:361
CUTLASS_HOST_DEVICE half_t & operator=(half const &x)
Assignment.
Definition: half.h:330
CUTLASS_HOST_DEVICE half_t(half const &x)
Reinterpret cast from CUDA&#39;s half type.
Definition: half.h:300
CUTLASS_HOST_DEVICE uint16_t raw() const
Accesses raw internal state.
Definition: half.h:373
CUTLASS_HOST_DEVICE bool isinf(cutlass::half_t const &h)
Definition: half.h:431
CUTLASS_HOST_DEVICE half_t copysign(half_t const &a, half_t const &b)
Definition: half.h:473
CUTLASS_HOST_DEVICE half_t & operator--(half_t &lhs)
Definition: half.h:706
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_HOST_DEVICE cutlass::half_t nanh(const char *)
Definition: half.h:425
CUTLASS_HOST_DEVICE half_t(float x)
Floating point conversion.
Definition: half.h:306
CUTLASS_HOST_DEVICE bool operator>(half_t const &lhs, half_t const &rhs)
Definition: half.h:591
CUTLASS_HOST_DEVICE half_t operator-(half_t const &lhs)
Definition: half.h:618
CUTLASS_HOST_DEVICE bool isfinite(cutlass::half_t const &h)
Definition: half.h:420
CUTLASS_HOST_DEVICE half_t & operator*=(half_t &lhs, half_t const &rhs)
Definition: half.h:674
static cutlass::half_t min()
Least positive value.
Definition: half.h:514
CUTLASS_HOST_DEVICE int mantissa() const
Returns the mantissa.
Definition: half.h:397
static cutlass::half_t lowest()
Minimum finite value.
Definition: half.h:517
static cutlass::half_t epsilon()
Returns smallest finite value.
Definition: half.h:523
CUTLASS_HOST_DEVICE bool operator==(half_t const &lhs, half_t const &rhs)
Definition: half.h:555
CUTLASS_HOST_DEVICE Coord< Rank, Index > operator/(Index s, Coord< Rank, Index > coord)
Scalar division.
Definition: coord.h:360
CUTLASS_HOST_DEVICE bool operator>=(half_t const &lhs, half_t const &rhs)
Definition: half.h:600
CUTLASS_HOST_DEVICE half_t(double x)
Floating point conversion.
Definition: half.h:312
static CUTLASS_HOST_DEVICE half_t convert(unsigned const &n)
FP32 -> FP16 conversion - rounds to nearest even.
Definition: half.h:233
CUTLASS_HOST_DEVICE bool operator<(half_t const &lhs, half_t const &rhs)
Definition: half.h:573
Basic include for CUTLASS.
CUTLASS_HOST_DEVICE complex< T > sqrt(complex< T > const &z)
Computes the square root of complex number z.
Definition: complex.h:393
CUTLASS_HOST_DEVICE uint16_t & raw()
Accesses raw internal state.
Definition: half.h:367
static cutlass::half_t quiet_NaN()
Returns smallest finite value.
Definition: half.h:532
CUTLASS_HOST_DEVICE int exponent() const
Returns the unbiased exponent.
Definition: half.h:391
CUTLASS_HOST_DEVICE int exponent_biased() const
Returns the biased exponent.
Definition: half.h:385
CUTLASS_HOST_DEVICE half_t operator*(half_t const &lhs, half_t const &rhs)
Definition: half.h:636
CUTLASS_HOST_DEVICE bool isnan(cutlass::half_t const &h)
Definition: half.h:415