CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
device/tensor_fill.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 /* \file
26  \brief Defines device-side elementwise operations on TensorView. Note, the operations defined
27  in this header are not specialized for any particular data layout and are therefore not
28  intended to offer the best possible performance. Rather, they are intended to be generic
29  reference implementations to support the CUTLASS unit tests.
30 */
31 
32 #pragma once
33 
34 #if !defined(__CUDACC_RTC__)
35 
36 // Standard Library includes
37 #include <utility>
38 #include <cstdlib>
39 #include <cmath>
40 #include <type_traits>
41 #include <cstdint>
42 
43 #endif
44 
45 // CUDA includes
46 #include <cublas_v2.h>
47 #include <curand_kernel.h>
48 
49 // Cutlass includes
50 #include "cutlass/cutlass.h"
51 #include "cutlass/array.h"
52 #include "cutlass/tensor_view.h"
53 
56 
58 
59 namespace cutlass {
60 namespace reference {
61 namespace device {
62 
65 
66 namespace detail {
67 
68 template <typename FloatType>
69 CUTLASS_DEVICE
70 FloatType random_normal_float(curandState_t *state) {
71  return curand_normal(state);
72 }
73 
74 template <>
75 CUTLASS_DEVICE
76 double random_normal_float<double>(curandState_t *state) {
77  return curand_normal_double(state);
78 }
79 
80 template <typename FloatType>
81 CUTLASS_DEVICE
82 FloatType random_uniform_float(curandState_t *state) {
83  return curand_uniform(state);
84 }
85 
86 template <>
87 CUTLASS_DEVICE
88 double random_uniform_float<double>(curandState_t *state) {
89  return curand_uniform_double(state);
90 }
91 
92 template <typename Element>
94 
95  using FloatType = typename std::conditional<(sizeof(Element) > 4), double, float>::type;
96  using IntType = typename std::conditional<(sizeof(Element) > 4), int64_t, int>::type;
97 
99  struct Params {
100 
101  //
102  // Data members
103  //
104 
105  uint64_t seed;
109 
110  //
111  // Methods
112  //
113 
116  uint64_t seed_ = 0,
117  Element mean_ = 0,
118  Element stddev_ = 1,
119  int int_scale_ = -1
120  ):
121  seed(seed_),
122  mean(static_cast<FloatType>(mean_)),
123  stddev(static_cast<FloatType>(stddev_)),
124  int_scale(int_scale_) {
125 
126  }
127  };
128 
129  //
130  // Data members
131  //
132 
135 
137  curandState_t rng_state;
138 
139  //
140  // Methods
141  //
142 
144  CUTLASS_DEVICE
145  RandomGaussianFunc(Params const &params): params(params) {
146 
147  uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
148 
149  curand_init(params.seed, gtid, 0, &rng_state);
150  }
151 
153  CUTLASS_DEVICE
154  Element operator()() {
155 
156  FloatType rnd = random_normal_float<FloatType>(&rng_state);
157  rnd = params.mean + params.stddev * rnd;
158 
159  Element result;
160  if (params.int_scale >= 0) {
161  rnd = FloatType(IntType(rnd * FloatType(IntType(1) << params.int_scale)));
162  result = Element(rnd / FloatType(IntType(1) << params.int_scale));
163  }
164  else {
165  result = Element(rnd);
166  }
167 
168  return result;
169  }
170 };
171 
173 template <
174  typename Element,
175  typename Layout>
177 
180 
182  typedef typename TensorView::Element T;
183 
186 
188 
190  struct Params {
191 
192  //
193  // Data members
194  //
195 
198 
199  //
200  // Methods
201  //
202 
205  TensorView view_ = TensorView(),
206  typename RandomFunc::Params random_ = typename RandomFunc::Params()
207  ):
208  view(view_), random(random_) {
209 
210  }
211  };
212 
213  //
214  // Data members
215  //
216 
219 
220  //
221  // Methods
222  //
223 
225  CUTLASS_DEVICE
226  TensorFillRandomGaussianFunc(Params const &params): params(params), random(params.random) {
227 
228  }
229 
231  CUTLASS_DEVICE
232  void operator()(TensorCoord const &coord) {
233 
234  params.view.at(coord) = random();
235  }
236 };
237 
238 } // namespace detail
239 
241 
243 template <
244  typename Element,
245  typename Layout>
248  uint64_t seed,
249  Element mean = Element(0),
250  Element stddev = Element(1),
251  int bits = -1) {
252 
255  using RandomFunc = detail::RandomGaussianFunc<Element>;
257  using Params = typename Func::Params;
258 
260  view.extent(),
261  Params(view, typename RandomFunc::Params(seed, mean, stddev, bits))
262  );
263 }
264 
266 
268 template <typename Element>
270  Element *ptr,
271  size_t capacity,
272  uint64_t seed,
273  Element mean = Element(0),
274  Element stddev = Element(1),
275  int bits = -1) {
276 
279  using RandomFunc = detail::RandomGaussianFunc<Element>;
280 
281  typename RandomFunc::Params params(seed, mean, stddev, bits);
282 
284 }
285 
288 
289 namespace detail {
290 
292 template <typename Element>
294 
295  using FloatType = typename std::conditional<
296  (sizeof(Element) > 4),
297  double,
298  float>::type;
299 
300  using IntType = typename std::conditional<
301  (sizeof(Element) > 4),
302  int64_t,
303  int>::type;
304 
306  struct Params {
307 
308  //
309  // Data members
310  //
311 
312  uint64_t seed;
316 
319  Params() { }
320 
321  //
322  // Methods
323  //
324 
327  uint64_t seed_ = 0,
328  Element max = 1,
329  Element min_ = 0,
330  int int_scale_ = -1
331  ):
332  seed(seed_),
333  range(static_cast<FloatType>(max - min_)),
334  min(static_cast<FloatType>(min_)),
335  int_scale(int_scale_) {
336 
337  }
338  };
339 
340  //
341  // Data members
342  //
343 
346 
348  curandState_t rng_state;
349 
350  //
351  // Methods
352  //
353 
355  CUTLASS_DEVICE
356  RandomUniformFunc(Params const &params): params(params) {
357 
358  uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
359 
360  curand_init(params.seed, gtid, 0, &rng_state);
361  }
362 
364  CUTLASS_DEVICE
365  Element operator()() {
366 
367  FloatType rnd = random_uniform_float<FloatType>(&rng_state);
368  rnd = params.min + params.range * rnd;
369 
370  // Random values are cast to integer after scaling by a power of two to facilitate error
371  // testing
372  Element result;
373 
374  if (params.int_scale >= 0) {
375  rnd = FloatType(IntType(rnd * FloatType(IntType(1) << params.int_scale)));
376  result = Element(rnd / FloatType(IntType(1) << params.int_scale));
377  }
378  else {
379  result = Element(rnd);
380  }
381 
382  return result;
383  }
384 };
385 
387 template <
388  typename Element,
389  typename Layout>
391 
394 
396  typedef typename TensorView::Element T;
397 
400 
402 
404  struct Params {
405 
406  //
407  // Data members
408  //
409 
412 
415  Params() { }
416 
417  //
418  // Methods
419  //
420 
423  TensorView view_ = TensorView(),
424  typename RandomFunc::Params random_ = RandomFunc::Params()
425  ):
426  view(view_), random(random_) {
427 
428  }
429  };
430 
431  //
432  // Data members
433  //
434 
437 
438  //
439  // Methods
440  //
441 
443  CUTLASS_DEVICE
444  TensorFillRandomUniformFunc(Params const &params): params(params), random(params.random) {
445  }
446 
448  CUTLASS_DEVICE
449  void operator()(TensorCoord const &coord) {
450 
451  params.view.at(coord) = random();
452  }
453 };
454 
455 } // namespace detail
456 
458 
460 template <
461  typename Element,
462  typename Layout>
465  uint64_t seed,
466  Element max = Element(1),
467  Element min = Element(0),
468  int bits = -1) {
469 
472  using RandomFunc = detail::RandomUniformFunc<Element>;
474  using Params = typename Func::Params;
475 
476  typename RandomFunc::Params random(seed, max, min, bits);
477 
479  view.size(),
480  Params(view, random)
481  );
482 }
483 
485 
487 template <typename Element>
489  Element *ptr,
490  size_t capacity,
491  uint64_t seed,
492  Element max = Element(1),
493  Element min = Element(0),
494  int bits = -1) {
495 
498  using RandomFunc = detail::RandomUniformFunc<Element>;
499  typename RandomFunc::Params params(seed, max, min, bits);
500 
502 }
503 
506 
507 namespace detail {
508 
510 template <
511  typename Element,
512  typename Layout>
514 
517 
519  typedef typename TensorView::Element T;
520 
523 
525  struct Params {
526 
527  //
528  // Data members
529  //
530 
532  Element diag;
533  Element other;
534 
537  Params() { }
538 
539  //
540  // Methods
541  //
542 
545  TensorView view_ = TensorView(),
546  Element diag_ = Element(1),
547  Element other_ = Element(0)
548  ):
549  view(view_), diag(diag_), other(other_) {
550 
551  }
552  };
553 
554  //
555  // Data members
556  //
557 
560 
561  //
562  // Methods
563  //
564 
566  CUTLASS_DEVICE
567  TensorFillDiagonalFunc(Params const &params): params(params) {
568 
569  }
570 
572  CUTLASS_DEVICE
573  void operator()(TensorCoord const &coord) {
574 
575  bool is_diag = true;
576 
578  for (int i = 1; i < Layout::kRank; ++i) {
579  if (coord[i] != coord[i - 1]) {
580  is_diag = false;
581  break;
582  }
583  }
584 
585  params.view.at(coord) = (is_diag ? params.diag : params.other);
586  }
587 };
588 
589 } // namespace detail
590 
592 
594 template <
595  typename Element,
596  typename Layout>
599  Element diag = Element(1),
600  Element other = Element(0)) {
601 
603  typedef typename Func::Params Params;
604 
606  view.size(),
607  Params(view, diag, other)
608  );
609 }
610 
612 
614 template <
615  typename Element,
616  typename Layout>
619  Element val = Element(0)) {
620 
621  TensorFillDiagonal(view, val, val);
622 }
623 
625 
627 template <
628  typename Element,
629  typename Layout>
632 
633  TensorFillDiagonal(view, Element(1), Element(0));
634 }
635 
638 
639 namespace detail {
640 
642 template <
643  typename Element,
644  typename Layout>
646 
649 
651  typedef typename TensorView::Element T;
652 
655 
657  struct Params {
658 
659  //
660  // Data members
661  //
662 
664  Element diag;
665 
668  Params() { }
669 
670  //
671  // Methods
672  //
673 
676  TensorView view_ = TensorView(),
677  Element diag_ = Element(1)
678  ):
679  view(view_), diag(diag_) {
680 
681  }
682  };
683 
684  //
685  // Data members
686  //
687 
690 
691  //
692  // Methods
693  //
694 
696  CUTLASS_DEVICE
697  TensorUpdateDiagonalFunc(Params const &params): params(params) {
698 
699  }
700 
702  CUTLASS_DEVICE
703  void operator()(TensorCoord const &coord) {
704 
705  bool is_diag = true;
706 
708  for (int i = 1; i < Layout::kRank; ++i) {
709  if (coord[i] != coord[i - 1]) {
710  is_diag = false;
711  break;
712  }
713  }
714 
715  if (is_diag) {
716  params.view.at(coord) = params.diag;
717  }
718  }
719 };
720 
721 } // namespace detail
722 
724 
726 template <
727  typename Element,
728  typename Layout>
731  Element diag = Element(1)) {
732 
734  typedef typename Func::Params Params;
735 
737  view.size(),
738  Params(view, diag)
739  );
740 }
741 
744 
745 namespace detail {
746 
748 template <
749  typename Element,
750  typename Layout>
752 
755 
757  typedef typename TensorView::Element T;
758 
761 
763  struct Params {
764 
765  //
766  // Data members
767  //
768 
770  Element other;
771 
774  Params() { }
775 
776  //
777  // Methods
778  //
779 
782  TensorView view_ = TensorView(),
783  Element other_ = Element(0)
784  ):
785  view(view_), other(other_) {
786 
787  }
788  };
789 
790  //
791  // Data members
792  //
793 
796 
797  //
798  // Methods
799  //
800 
802  CUTLASS_DEVICE
803  TensorUpdateOffDiagonalFunc(Params const &params): params(params) {
804 
805  }
806 
808  CUTLASS_DEVICE
809  void operator()(TensorCoord const &coord) {
810 
811  bool is_diag = true;
812 
814  for (int i = 1; i < Layout::kRank; ++i) {
815  if (coord[i] != coord[i - 1]) {
816  is_diag = false;
817  break;
818  }
819  }
820 
821  if (!is_diag) {
822  params.view.at(coord) = params.other;
823  }
824  }
825 };
826 
827 } // namespace detail
828 
830 
832 template <
833  typename Element,
834  typename Layout>
837  Element other = Element(1)) {
838 
840  typedef typename Func::Params Params;
841 
843  view.size(),
844  Params(view, other)
845  );
846 }
847 
850 
851 namespace detail {
852 
854 template <
855  typename Element,
856  typename Layout>
858 
861 
863  typedef typename TensorView::Element T;
864 
867 
869  struct Params {
870 
871  //
872  // Data members
873  //
874 
876  Array<Element, Layout::kRank> v;
877  Element s;
878 
881  Params() { }
882 
883  //
884  // Methods
885  //
886 
889  TensorView view_,
890  Array<Element, Layout::kRank> const & v_,
891  Element s_ = Element(0)
892  ):
893  view(view_), v(v_), s(s_) {
894 
895  }
896  };
897 
898  //
899  // Data members
900  //
901 
904 
905  //
906  // Methods
907  //
908 
910  CUTLASS_DEVICE
911  TensorFillLinearFunc(Params const &params): params(params) {
912 
913  }
914 
916  CUTLASS_DEVICE
917  void operator()(TensorCoord const &coord) {
918  Element sum = params.s;
919 
921  for (int i = 0; i < Layout::kRank; ++i) {
922  sum += params.v[i] * Element(coord[i]);
923  }
924 
925  params.view.at(coord) = sum;
926  }
927 };
928 
929 } // namespace detail
930 
932 
934 template <
935  typename Element,
936  typename Layout>
939  Array<Element, Layout::kRank> const & v,
940  Element s = Element(0)) {
941 
943  using Params = typename Func::Params;
944 
946  view.size(),
947  Params(view, v, s)
948  );
949 }
950 
953 
955 template <
956  typename Element
957 >
959  Element *ptr,
960  int64_t capacity,
961  Element v = Element(1),
962  Element s = Element(0)) {
963 
964 }
965 
968 
970 template <
971  typename Element
972 >
974  Element *ptr,
975  size_t capacity,
976  uint64_t seed,
977  Distribution dist) {
978 
979  if (dist.kind == Distribution::Gaussian) {
980  BlockFillRandomGaussian<Element>(
981  ptr,
982  capacity,
983  seed,
984  static_cast<Element>(dist.gaussian.mean),
985  static_cast<Element>(dist.gaussian.stddev),
986  dist.int_scale);
987  }
988  else if (dist.kind == Distribution::Uniform) {
989  BlockFillRandomUniform<Element>(
990  ptr,
991  capacity,
992  seed,
993  static_cast<Element>(dist.uniform.max),
994  static_cast<Element>(dist.uniform.min),
995  dist.int_scale);
996  }
997 }
998 
1001 
1002 namespace detail {
1003 
1005 template <
1006  typename Element,
1007  typename Layout>
1009 
1012 
1014  typedef typename TensorView::Element T;
1015 
1018 
1020  struct Params {
1021 
1022  //
1023  // Data members
1024  //
1025 
1027  Element const *ptr;
1028 
1031  Params() { }
1032 
1033  //
1034  // Methods
1035  //
1036 
1039  TensorView view_,
1040  Element const *ptr_
1041  ):
1042  view(view_), ptr(ptr_) {
1043 
1044  }
1045  };
1046 
1047  //
1048  // Data members
1049  //
1050 
1053 
1054  //
1055  // Methods
1056  //
1057 
1059  CUTLASS_DEVICE
1060  TensorCopyDiagonalInFunc(Params const &params): params(params) {
1061 
1062  }
1063 
1065  CUTLASS_DEVICE
1066  void operator()(TensorCoord const &coord) {
1067  bool is_diagonal = true;
1068 
1070  for (int i = 1; i < Layout::kRank; ++i) {
1071  if (coord[i] != coord[0]) {
1072  is_diagonal = false;
1073  }
1074  }
1075  if (is_diagonal) {
1076  params.view.at(coord) = params.ptr[coord[0]];
1077  }
1078  }
1079 };
1080 
1081 } // namespace detail
1082 
1084 
1086 template <
1087  typename Element,
1088  typename Layout>
1091  Element const *ptr) {
1092 
1094  using Params = typename Func::Params;
1095 
1097  view.size(),
1098  Params(view, ptr)
1099  );
1100 }
1101 
1104 
1105 
1106 namespace detail {
1107 
1109 template <
1110  typename Element,
1111  typename Layout>
1113 
1116 
1118  typedef typename TensorView::Element T;
1119 
1122 
1124  struct Params {
1125 
1126  //
1127  // Data members
1128  //
1129 
1131  Element *ptr;
1132 
1135  Params() { }
1136 
1137  //
1138  // Methods
1139  //
1140 
1143  TensorView view_,
1144  Element *ptr_
1145  ):
1146  view(view_), ptr(ptr_) {
1147 
1148  }
1149  };
1150 
1151  //
1152  // Data members
1153  //
1154 
1157 
1158  //
1159  // Methods
1160  //
1161 
1163  CUTLASS_DEVICE
1164  TensorCopyDiagonalOutFunc(Params const &params): params(params) {
1165 
1166  }
1167 
1169  CUTLASS_DEVICE
1170  void operator()(TensorCoord const &coord) {
1171  bool is_diagonal = true;
1172 
1174  for (int i = 1; i < Layout::kRank; ++i) {
1175  if (coord[i] != coord[0]) {
1176  is_diagonal = false;
1177  }
1178  }
1179  if (is_diagonal) {
1180  params.ptr[coord[0]] = params.view.at(coord);
1181  }
1182  }
1183 };
1184 
1185 } // namespace detail
1186 
1188 
1190 template <
1191  typename Element,
1192  typename Layout>
1194  Element *ptr,
1196 
1198  using Params = typename Func::Params;
1199 
1201  view.size(),
1202  Params(view, ptr)
1203  );
1204 }
1205 
1208 
1209 } // namespace device
1210 } // namespace reference
1211 } // namespace cutlass
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:866
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:751
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:881
Params(TensorView view_=TensorView(), Element diag_=Element(1))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:675
CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
std::max
Definition: platform.h:189
CUTLASS_DEVICE RandomGaussianFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:145
CUTLASS_DEVICE TensorFillRandomUniformFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:444
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:1031
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:760
Definition: aligned_buffer.h:35
Definition: distribution.h:40
RandomFunc random
Definition: device/tensor_fill.h:436
void TensorCopyDiagonalOut(Element *ptr, TensorView< Element, Layout > view)
Copies the diagonal of a tensor into a dense buffer in host memory.
Definition: device/tensor_fill.h:1193
typename std::conditional< (sizeof(Element) > 4), int64_t, int >::type IntType
Definition: device/tensor_fill.h:303
CUTLASS_DEVICE Element operator()()
Compute random value and update RNG state.
Definition: device/tensor_fill.h:365
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:415
Definition: distribution.h:40
TensorView view
Definition: device/tensor_fill.h:663
struct cutlass::Distribution::@18::@20 uniform
Uniform distribution.
Parameters structure.
Definition: device/tensor_fill.h:1020
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:863
Element const * ptr
Definition: device/tensor_fill.h:1027
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:1017
Parameters structure.
Definition: device/tensor_fill.h:99
Kind kind
Active variant kind.
Definition: distribution.h:64
Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=RandomFunc::Params())
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:422
void TensorFillIdentity(TensorView< Element, Layout > view)
Fills a tensor&#39;s diagonal with 1 and 0 everywhere else.
Definition: device/tensor_fill.h:630
CUTLASS_HOST_DEVICE TensorCoord const & extent() const
Returns the extent of the view (the size along each logical dimension).
Definition: tensor_view.h:167
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:645
int int_scale
Definition: device/tensor_fill.h:315
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:396
Params(TensorView view_, Element *ptr_)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:1142
Params params
Parameters object.
Definition: device/tensor_fill.h:1052
RandomFunc::Params random
Definition: device/tensor_fill.h:411
struct cutlass::Distribution::@18::@21 gaussian
Gaussian distribution.
FloatType min
Definition: device/tensor_fill.h:314
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:651
CUTLASS_DEVICE TensorUpdateDiagonalFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:697
CUTLASS_DEVICE TensorFillLinearFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:911
void TensorCopyDiagonalIn(TensorView< Element, Layout > view, Element const *ptr)
Copies a diagonal in from host memory without modifying off-diagonal elements.
Definition: device/tensor_fill.h:1089
curandState_t rng_state
RNG state object.
Definition: device/tensor_fill.h:137
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:917
Params(uint64_t seed_=0, Element max=1, Element min_=0, int int_scale_=-1)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:326
Defines a structure containing strides and a pointer to tensor data.
RandomFunc random
Definition: device/tensor_fill.h:218
uint64_t seed
Definition: device/tensor_fill.h:105
CUTLASS_DEVICE double random_normal_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:76
Defines a floating-point type based on the number of exponent and mantissa bits.
Definition: numeric_types.h:144
typename std::conditional<(sizeof(Element) > 4), double, float >::type FloatType
Definition: device/tensor_fill.h:95
TensorView view
Definition: device/tensor_fill.h:875
Element Element
Data type of individual access.
Definition: tensor_view.h:72
Params(TensorView view_, Element const *ptr_)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:1038
Params(TensorView view_=TensorView(), Element diag_=Element(1), Element other_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:544
uint64_t seed
Definition: device/tensor_fill.h:312
void BlockFillSequential(Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
Fills a block of data with sequential elements.
Definition: device/tensor_fill.h:958
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Parameters structure.
Definition: device/tensor_fill.h:306
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:857
int int_scale
Definition: device/tensor_fill.h:108
void TensorFillRandomGaussian(TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution.
Definition: device/tensor_fill.h:246
CUTLASS_DEVICE TensorFillDiagonalFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:567
Params params
Definition: device/tensor_fill.h:217
void BlockFillRandomUniform(Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution.
Definition: device/tensor_fill.h:488
Parameters structure.
Definition: device/tensor_fill.h:190
TensorView view
Definition: device/tensor_fill.h:1026
Parameters structure.
Definition: device/tensor_fill.h:869
Params(TensorView view_, Array< Element, Layout::kRank > const &v_, Element s_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:888
Params params
Definition: device/tensor_fill.h:435
void TensorFillDiagonal(TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0))
Fills a tensor everywhere with a unique value for its diagonal.
Definition: device/tensor_fill.h:597
typename Layout::TensorCoord TensorCoord
Coordinate in logical tensor space.
Definition: tensor_view.h:87
Element s
Definition: device/tensor_fill.h:877
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:1118
Params params
Parameters object.
Definition: device/tensor_fill.h:795
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:513
Parameters structure.
Definition: device/tensor_fill.h:525
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:1008
FloatType mean
Definition: device/tensor_fill.h:106
CUTLASS_DEVICE TensorCopyDiagonalInFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:1060
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:654
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:573
void TensorFill(TensorView< Element, Layout > view, Element val=Element(0))
Fills a tensor with a uniform value.
Definition: device/tensor_fill.h:617
This header contains a class to parametrize a statistical distribution function.
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:293
Params params
Parameters object.
Definition: device/tensor_fill.h:134
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:703
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:449
void BlockFillRandomGaussian(Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution.
Definition: device/tensor_fill.h:269
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:1121
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:1014
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:185
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:232
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Only update the diagonal element.
Definition: device/tensor_fill.h:1066
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:1112
CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
std::min
Definition: platform.h:183
TensorView view
Definition: device/tensor_fill.h:1130
Element other
Definition: device/tensor_fill.h:533
Parameters structure.
Definition: device/tensor_fill.h:763
typename std::conditional< (sizeof(Element) > 4), double, float >::type FloatType
Definition: device/tensor_fill.h:298
Launches a kernel calling a functor for each element in a tensor&#39;s index space.
Definition: device/tensor_foreach.h:39
Parameters structure.
Definition: device/tensor_fill.h:657
Array< Element, Layout::kRank > v
Definition: device/tensor_fill.h:876
void TensorUpdateDiagonal(TensorView< Element, Layout > view, Element diag=Element(1))
Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements...
Definition: device/tensor_fill.h:729
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:390
CUTLASS_DEVICE double random_uniform_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:88
CUTLASS_DEVICE FloatType random_normal_float(curandState_t *state)
Definition: device/tensor_fill.h:70
CUTLASS_DEVICE TensorUpdateOffDiagonalFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:803
FloatType range
Definition: device/tensor_fill.h:313
void BlockFillRandom(Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
Fills a block of data with sequential elements.
Definition: device/tensor_fill.h:973
Params(uint64_t seed_=0, Element mean_=0, Element stddev_=1, int int_scale_=-1)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:115
CUTLASS_DEVICE TensorCopyDiagonalOutFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:1164
void TensorFillLinear(TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0))
Fills tensor with a linear combination of its coordinate and another vector.
Definition: device/tensor_fill.h:937
CUTLASS_DEVICE RandomUniformFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:356
Element * ptr
Definition: device/tensor_fill.h:1131
Params params
Parameters object.
Definition: device/tensor_fill.h:559
Params params
Parameters object.
Definition: device/tensor_fill.h:689
Parameters structure.
Definition: device/tensor_fill.h:1124
void TensorUpdateOffDiagonal(TensorView< Element, Layout > view, Element other=Element(1))
Writes a uniform value to all elements in the tensor without modifying diagonal elements.
Definition: device/tensor_fill.h:835
Params params
Parameters object.
Definition: device/tensor_fill.h:345
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:757
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:399
Element diag
Definition: device/tensor_fill.h:532
typename std::conditional<(sizeof(Element) > 4), int64_t, int >::type IntType
Definition: device/tensor_fill.h:96
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:319
CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const
Returns a reference to the element at a given Coord.
Definition: tensor_ref.h:307
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:1135
TensorView view
Definition: device/tensor_fill.h:531
CUTLASS_DEVICE FloatType random_uniform_float(curandState_t *state)
Definition: device/tensor_fill.h:82
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:668
void TensorFillRandomUniform(TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution.
Definition: device/tensor_fill.h:463
Params params
Parameters object.
Definition: device/tensor_fill.h:1156
TensorView::TensorCoord TensorCoord
Coordinate in tensor&#39;s index space.
Definition: device/tensor_fill.h:522
CUTLASS_DEVICE Element operator()()
Compute random value and update RNG state.
Definition: device/tensor_fill.h:154
Definition: device/tensor_foreach.h:92
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:1170
Distribution type.
Definition: distribution.h:38
curandState_t rng_state
RNG state object.
Definition: device/tensor_fill.h:348
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:176
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:774
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:537
int int_scale
Random values are cast to integer after scaling by this power of two.
Definition: distribution.h:67
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:519
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:182
Basic include for CUTLASS.
Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=typename RandomFunc::Params())
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:204
CUTLASS_DEVICE TensorFillRandomGaussianFunc(Params const &params)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:226
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:809
Params params
Parameters object.
Definition: device/tensor_fill.h:903
Params(TensorView view_=TensorView(), Element other_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:781
Parameters structure.
Definition: device/tensor_fill.h:404
RandomFunc::Params random
Definition: device/tensor_fill.h:197
FloatType stddev
Definition: device/tensor_fill.h:107