shr

shr.b16

// shr.b16 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename B16, enable_if_t<sizeof(B16) == 2, bool> = true>
__device__ static inline B16 shr(
  B16 a_reg,
  uint32_t b_reg);

shr.b32

// shr.b32 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename B32, enable_if_t<sizeof(B32) == 4, bool> = true>
__device__ static inline B32 shr(
  B32 a_reg,
  uint32_t b_reg);

shr.b64

// shr.b64 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename B64, enable_if_t<sizeof(B64) == 8, bool> = true>
__device__ static inline B64 shr(
  B64 a_reg,
  uint32_t b_reg);

shr.s16

// shr.s16 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename = void>
__device__ static inline int16_t shr(
  int16_t a_reg,
  uint32_t b_reg);

shr.s32

// shr.s32 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename = void>
__device__ static inline int32_t shr(
  int32_t a_reg,
  uint32_t b_reg);

shr.s64

// shr.s64 dest, a_reg, b_reg; // PTX ISA 10, SM_50
template <typename = void>
__device__ static inline int64_t shr(
  int64_t a_reg,
  uint32_t b_reg);