47 static bool const value = ((N & (N - 1)) == 0);
53 template <
int N,
int CurrentVal = N,
int Count = 0>
60 template <
int N,
int Count>
68 template <
int N,
int CurrentVal = N,
int Count = 0>
75 template <
int N,
int Count>
77 enum {
value = ((1 << Count) < N) ? Count + 1 : Count };
92 template <
int Div
idend,
int Divisor>
94 enum {
value = Dividend / Divisor };
96 static_assert((Dividend % Divisor == 0),
"Not an even multiple");
106 template <
typename div
idend_t,
typename divisor_t>
108 return ((dividend + divisor - 1) / divisor) * divisor;
114 template <
typename value_t>
117 if (a == 0)
return b;
119 if (b == 0)
return a;
127 template <
typename value_t>
129 value_t temp =
gcd(a, b);
131 return temp ? (a / temp * b) : 0;
139 template <
typename value_t>
141 for (
int i = 31; i >= 0; --i) {
142 if ((1 << i) & x)
return 31 - i;
147 template <
typename value_t>
149 int a = int(31 -
clz(x));
150 a += (x & (x - 1)) != 0;
159 void find_divisor(
unsigned int& mul,
unsigned int& shr,
unsigned int denom) {
165 unsigned m = unsigned(((1ull << p) +
unsigned(denom) - 1) /
unsigned(denom));
176 void fast_divmod(
int& quo,
int& rem,
int src,
int div,
unsigned int mul,
unsigned int shr) {
178 #if defined(__CUDA_ARCH__) 180 quo = (div != 1) ? __umulhi(src, mul) >> shr : src;
182 quo = int((div != 1) ?
int(src * mul) >> shr : src);
186 rem = src - (quo * div);
192 void fast_divmod(
int& quo, int64_t& rem, int64_t src,
int div,
unsigned int mul,
unsigned int shr) {
194 #if defined(__CUDA_ARCH__) 196 quo = (div != 1) ? __umulhi(src, mul) >> shr : src;
198 quo = int((div != 1) ? (src * mul) >> shr : src);
201 rem = src - (quo * div);
208 template <
int A,
int B>
210 static int const kValue = (A < B) ? A : B;
213 template <
int A,
int B>
215 static int const kValue = (A > B) ? A : B;
220 return (b < a ? b : a);
225 return (b > a ? b : a);
Definition: aligned_buffer.h:35
CUTLASS_HOST_DEVICE void fast_divmod(int &quo, int &rem, int src, int div, unsigned int mul, unsigned int shr)
Definition: fast_math.h:176
CUTLASS_HOST_DEVICE value_t find_log2(value_t x)
Definition: fast_math.h:148
Definition: fast_math.h:54
Definition: fast_math.h:209
CUTLASS_HOST_DEVICE constexpr int const_max(int a, int b)
Definition: fast_math.h:224
static bool const value
Definition: fast_math.h:47
CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b)
Definition: fast_math.h:128
CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor)
Definition: fast_math.h:107
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
Definition: fast_math.h:214
CUTLASS_HOST_DEVICE void find_divisor(unsigned int &mul, unsigned int &shr, unsigned int denom)
Definition: fast_math.h:159
CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b)
Definition: fast_math.h:115
Definition: fast_math.h:93
Definition: fast_math.h:69
CUTLASS_HOST_DEVICE value_t clz(value_t x)
Definition: fast_math.h:140
Definition: fast_math.h:46
CUTLASS_HOST_DEVICE constexpr int const_min(int a, int b)
Definition: fast_math.h:219
Basic include for CUTLASS.
Definition: fast_math.h:84