34 #if !defined(__CUDACC_RTC__) 40 #include <type_traits> 46 #include <cublas_v2.h> 47 #include <curand_kernel.h> 68 template <
typename FloatType>
71 return curand_normal(state);
77 return curand_normal_double(state);
80 template <
typename FloatType>
83 return curand_uniform(state);
89 return curand_uniform_double(state);
92 template <
typename Element>
95 using FloatType =
typename std::conditional<(sizeof(Element) > 4),
double,
float>::type;
96 using IntType =
typename std::conditional<(sizeof(Element) > 4), int64_t,
int>::type;
124 int_scale(int_scale_) {
147 uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
149 curand_init(params.
seed, gtid, 0, &rng_state);
165 result = Element(rnd);
208 view(view_), random(random_) {
234 params.
view.
at(coord) = random();
249 Element
mean = Element(0),
250 Element
stddev = Element(1),
257 using Params =
typename Func::Params;
268 template <
typename Element>
273 Element
mean = Element(0),
274 Element
stddev = Element(1),
292 template <
typename Element>
295 using FloatType =
typename std::conditional<
296 (
sizeof(Element) > 4),
300 using IntType =
typename std::conditional<
301 (
sizeof(Element) > 4),
335 int_scale(int_scale_) {
358 uint64_t gtid = threadIdx.x + blockIdx.x * blockDim.x;
360 curand_init(params.
seed, gtid, 0, &rng_state);
368 rnd = params.
min + params.
range * rnd;
379 result = Element(rnd);
426 view(view_), random(random_) {
451 params.
view.
at(coord) = random();
466 Element
max = Element(1),
467 Element
min = Element(0),
474 using Params =
typename Func::Params;
476 typename RandomFunc::Params random(seed,
max,
min, bits);
487 template <
typename Element>
492 Element
max = Element(1),
493 Element
min = Element(0),
499 typename RandomFunc::Params
params(seed,
max,
min, bits);
546 Element diag_ = Element(1),
547 Element other_ = Element(0)
549 view(view_), diag(diag_), other(other_) {
578 for (
int i = 1; i < Layout::kRank; ++i) {
579 if (coord[i] != coord[i - 1]) {
599 Element diag = Element(1),
600 Element other = Element(0)) {
603 typedef typename Func::Params
Params;
619 Element val = Element(0)) {
677 Element diag_ = Element(1)
679 view(view_), diag(diag_) {
708 for (
int i = 1; i < Layout::kRank; ++i) {
709 if (coord[i] != coord[i - 1]) {
731 Element diag = Element(1)) {
734 typedef typename Func::Params
Params;
783 Element other_ = Element(0)
785 view(view_), other(other_) {
814 for (
int i = 1; i < Layout::kRank; ++i) {
815 if (coord[i] != coord[i - 1]) {
837 Element other = Element(1)) {
840 typedef typename Func::Params
Params;
876 Array<Element, Layout::kRank>
v;
890 Array<Element, Layout::kRank>
const & v_,
891 Element s_ = Element(0)
893 view(view_), v(v_), s(s_) {
918 Element sum = params.
s;
921 for (
int i = 0; i < Layout::kRank; ++i) {
922 sum += params.
v[i] * Element(coord[i]);
925 params.
view.
at(coord) = sum;
939 Array<Element, Layout::kRank>
const & v,
940 Element s = Element(0)) {
943 using Params =
typename Func::Params;
961 Element v = Element(1),
962 Element s = Element(0)) {
980 BlockFillRandomGaussian<Element>(
984 static_cast<Element
>(dist.
gaussian.mean),
985 static_cast<Element>(dist.
gaussian.stddev),
989 BlockFillRandomUniform<Element>(
993 static_cast<Element
>(dist.
uniform.max),
994 static_cast<Element>(dist.
uniform.min),
1042 view(view_), ptr(ptr_) {
1067 bool is_diagonal =
true;
1070 for (
int i = 1; i < Layout::kRank; ++i) {
1071 if (coord[i] != coord[0]) {
1072 is_diagonal =
false;
1076 params.
view.
at(coord) = params.
ptr[coord[0]];
1091 Element
const *ptr) {
1094 using Params =
typename Func::Params;
1146 view(view_), ptr(ptr_) {
1171 bool is_diagonal =
true;
1174 for (
int i = 1; i < Layout::kRank; ++i) {
1175 if (coord[i] != coord[0]) {
1176 is_diagonal =
false;
1180 params.
ptr[coord[0]] = params.
view.
at(coord);
1198 using Params =
typename Func::Params;
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:866
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:751
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:881
Params(TensorView view_=TensorView(), Element diag_=Element(1))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:675
CUTLASS_DEVICE RandomGaussianFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:145
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:1031
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:760
Definition: aligned_buffer.h:35
Definition: distribution.h:40
void TensorCopyDiagonalOut(Element *ptr, TensorView< Element, Layout > view)
Copies the diagonal of a tensor into a dense buffer in host memory.
Definition: device/tensor_fill.h:1193
Definition: distribution.h:40
TensorView view
Definition: device/tensor_fill.h:663
struct cutlass::Distribution::@18::@20 uniform
Uniform distribution.
Parameters structure.
Definition: device/tensor_fill.h:1020
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:863
Element const * ptr
Definition: device/tensor_fill.h:1027
TensorView view
Definition: device/tensor_fill.h:196
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:1017
Parameters structure.
Definition: device/tensor_fill.h:99
Kind kind
Active variant kind.
Definition: distribution.h:64
void TensorFillIdentity(TensorView< Element, Layout > view)
Fills a tensor's diagonal with 1 and 0 everywhere else.
Definition: device/tensor_fill.h:630
CUTLASS_HOST_DEVICE TensorCoord const & extent() const
Returns the extent of the view (the size along each logical dimension).
Definition: tensor_view.h:167
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:645
Params(TensorView view_, Element *ptr_)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:1142
Params params
Parameters object.
Definition: device/tensor_fill.h:1052
struct cutlass::Distribution::@18::@21 gaussian
Gaussian distribution.
Definition: device/tensor_fill.h:93
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:651
CUTLASS_DEVICE TensorUpdateDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:697
CUTLASS_DEVICE TensorFillLinearFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:911
void TensorCopyDiagonalIn(TensorView< Element, Layout > view, Element const *ptr)
Copies a diagonal in from host memory without modifying off-diagonal elements.
Definition: device/tensor_fill.h:1089
curandState_t rng_state
RNG state object.
Definition: device/tensor_fill.h:137
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:917
Defines a structure containing strides and a pointer to tensor data.
RandomFunc random
Definition: device/tensor_fill.h:218
uint64_t seed
Definition: device/tensor_fill.h:105
CUTLASS_DEVICE double random_normal_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:76
Defines a floating-point type based on the number of exponent and mantissa bits.
Definition: numeric_types.h:144
typename std::conditional<(sizeof(Element) > 4), double, float >::type FloatType
Definition: device/tensor_fill.h:95
TensorView view
Definition: device/tensor_fill.h:875
Element Element
Data type of individual access.
Definition: tensor_view.h:72
Params(TensorView view_, Element const *ptr_)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:1038
Params(TensorView view_=TensorView(), Element diag_=Element(1), Element other_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:544
void BlockFillSequential(Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0))
Fills a block of data with sequential elements.
Definition: device/tensor_fill.h:958
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:110
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:857
int int_scale
Definition: device/tensor_fill.h:108
void TensorFillRandomGaussian(TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution.
Definition: device/tensor_fill.h:246
CUTLASS_DEVICE TensorFillDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:567
Params params
Definition: device/tensor_fill.h:217
void BlockFillRandomUniform(Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution.
Definition: device/tensor_fill.h:488
Parameters structure.
Definition: device/tensor_fill.h:190
TensorView view
Definition: device/tensor_fill.h:1026
Parameters structure.
Definition: device/tensor_fill.h:869
Params(TensorView view_, Array< Element, Layout::kRank > const &v_, Element s_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:888
void TensorFillDiagonal(TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0))
Fills a tensor everywhere with a unique value for its diagonal.
Definition: device/tensor_fill.h:597
typename Layout::TensorCoord TensorCoord
Coordinate in logical tensor space.
Definition: tensor_view.h:87
Element s
Definition: device/tensor_fill.h:877
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:1118
Params params
Parameters object.
Definition: device/tensor_fill.h:795
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:513
Parameters structure.
Definition: device/tensor_fill.h:525
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:1008
FloatType mean
Definition: device/tensor_fill.h:106
Element diag
Definition: device/tensor_fill.h:664
CUTLASS_DEVICE TensorCopyDiagonalInFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:1060
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:654
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:573
void TensorFill(TensorView< Element, Layout > view, Element val=Element(0))
Fills a tensor with a uniform value.
Definition: device/tensor_fill.h:617
This header contains a class to parametrize a statistical distribution function.
Params params
Parameters object.
Definition: device/tensor_fill.h:134
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:703
void BlockFillRandomGaussian(Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1)
Fills a tensor with random values with a Gaussian distribution.
Definition: device/tensor_fill.h:269
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:1121
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:1014
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:185
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Element other
Definition: device/tensor_fill.h:770
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:232
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Only update the diagonal element.
Definition: device/tensor_fill.h:1066
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:1112
TensorView view
Definition: device/tensor_fill.h:1130
Element other
Definition: device/tensor_fill.h:533
Parameters structure.
Definition: device/tensor_fill.h:763
Launches a kernel calling a functor for each element in a tensor's index space.
Definition: device/tensor_foreach.h:39
Parameters structure.
Definition: device/tensor_fill.h:657
Array< Element, Layout::kRank > v
Definition: device/tensor_fill.h:876
void TensorUpdateDiagonal(TensorView< Element, Layout > view, Element diag=Element(1))
Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements...
Definition: device/tensor_fill.h:729
CUTLASS_DEVICE double random_uniform_float< double >(curandState_t *state)
Definition: device/tensor_fill.h:88
TensorView view
Definition: device/tensor_fill.h:769
CUTLASS_DEVICE FloatType random_normal_float(curandState_t *state)
Definition: device/tensor_fill.h:70
CUTLASS_DEVICE TensorUpdateOffDiagonalFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:803
void BlockFillRandom(Element *ptr, size_t capacity, uint64_t seed, Distribution dist)
Fills a block of data with sequential elements.
Definition: device/tensor_fill.h:973
Params(uint64_t seed_=0, Element mean_=0, Element stddev_=1, int int_scale_=-1)
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:115
CUTLASS_DEVICE TensorCopyDiagonalOutFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:1164
void TensorFillLinear(TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0))
Fills tensor with a linear combination of its coordinate and another vector.
Definition: device/tensor_fill.h:937
Element * ptr
Definition: device/tensor_fill.h:1131
Params params
Parameters object.
Definition: device/tensor_fill.h:559
Params params
Parameters object.
Definition: device/tensor_fill.h:689
Parameters structure.
Definition: device/tensor_fill.h:1124
void TensorUpdateOffDiagonal(TensorView< Element, Layout > view, Element other=Element(1))
Writes a uniform value to all elements in the tensor without modifying diagonal elements.
Definition: device/tensor_fill.h:835
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:757
Element diag
Definition: device/tensor_fill.h:532
typename std::conditional<(sizeof(Element) > 4), int64_t, int >::type IntType
Definition: device/tensor_fill.h:96
CUTLASS_HOST_DEVICE Reference at(TensorCoord const &coord) const
Returns a reference to the element at a given Coord.
Definition: tensor_ref.h:307
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:1135
TensorView view
Definition: device/tensor_fill.h:531
CUTLASS_DEVICE FloatType random_uniform_float(curandState_t *state)
Definition: device/tensor_fill.h:82
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:668
void TensorFillRandomUniform(TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1)
Fills a tensor with random values with a uniform random distribution.
Definition: device/tensor_fill.h:463
Params params
Parameters object.
Definition: device/tensor_fill.h:1156
TensorView::TensorCoord TensorCoord
Coordinate in tensor's index space.
Definition: device/tensor_fill.h:522
CUTLASS_DEVICE Element operator()()
Compute random value and update RNG state.
Definition: device/tensor_fill.h:154
Definition: device/tensor_foreach.h:92
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:1170
Distribution type.
Definition: distribution.h:38
Computes a random Gaussian distribution.
Definition: device/tensor_fill.h:176
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:774
CUTLASS_HOST_DEVICE Params()
Default ctor.
Definition: device/tensor_fill.h:537
int int_scale
Random values are cast to integer after scaling by this power of two.
Definition: distribution.h:67
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:519
TensorView::Element T
Scalar type.
Definition: device/tensor_fill.h:182
Basic include for CUTLASS.
Params(TensorView view_=TensorView(), typename RandomFunc::Params random_=typename RandomFunc::Params())
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:204
CUTLASS_DEVICE TensorFillRandomGaussianFunc(Params const ¶ms)
Device-side initialization of RNG.
Definition: device/tensor_fill.h:226
CUTLASS_DEVICE void operator()(TensorCoord const &coord)
Compute random value and update RNG state.
Definition: device/tensor_fill.h:809
Params params
Parameters object.
Definition: device/tensor_fill.h:903
Params(TensorView view_=TensorView(), Element other_=Element(0))
Construction of Gaussian RNG functor.
Definition: device/tensor_fill.h:781
RandomFunc::Params random
Definition: device/tensor_fill.h:197
FloatType stddev
Definition: device/tensor_fill.h:107