red.async
PTX ISA: red.async
red.async
red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64
// red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes{.op}.u64 [dest], value, [remote_bar]; // .u64 intentional PTX ISA 81, SM_90
// .op = { .add }
template <typename=void>
__device__ static inline void red_async(
cuda::ptx::op_add_t,
int64_t* dest,
const int64_t& value,
int64_t* remote_bar);
red.async .s64
emulation
PTX does not currently (CTK 12.3) expose red.async.add.s64
. This
exposure is emulated in cuda::ptx
using
// red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes{.op}.u64 [dest], value, [remote_bar]; // .u64 intentional PTX ISA 81, SM_90
// .op = { .add }
template <typename=void>
__device__ static inline void red_async(
cuda::ptx::op_add_t,
int64_t* dest,
const int64_t& value,
int64_t* remote_bar);