mbarrier.try_wait
PTX ISA: mbarrier.try_wait
mbarrier.try_wait
mbarrier.try_wait.shared::cta.b64
// mbarrier.try_wait.shared::cta.b64 waitComplete, [addr], state, suspendTimeHint; // 5b. PTX ISA 78, SM_90
template <typename=void>
__device__ static inline bool mbarrier_try_wait(
uint64_t* addr,
const uint64_t& state,
const uint32_t& suspendTimeHint);
mbarrier.try_wait.acquire.cta.shared::cta.b64
// mbarrier.try_wait{.sem}{.scope}.shared::cta.b64 waitComplete, [addr], state , suspendTimeHint; // 6b. PTX ISA 80, SM_90
// .sem = { .acquire }
// .scope = { .cta, .cluster }
template <cuda::ptx::dot_scope Scope>
__device__ static inline bool mbarrier_try_wait(
cuda::ptx::sem_acquire_t,
cuda::ptx::scope_t<Scope> scope,
uint64_t* addr,
const uint64_t& state,
const uint32_t& suspendTimeHint);
mbarrier.try_wait.acquire.cluster.shared::cta.b64
// mbarrier.try_wait{.sem}{.scope}.shared::cta.b64 waitComplete, [addr], state , suspendTimeHint; // 6b. PTX ISA 80, SM_90
// .sem = { .acquire }
// .scope = { .cta, .cluster }
template <cuda::ptx::dot_scope Scope>
__device__ static inline bool mbarrier_try_wait(
cuda::ptx::sem_acquire_t,
cuda::ptx::scope_t<Scope> scope,
uint64_t* addr,
const uint64_t& state,
const uint32_t& suspendTimeHint);
mbarrier.try_wait.parity
mbarrier.try_wait.parity.shared::cta.b64
// mbarrier.try_wait.parity.shared::cta.b64 waitComplete, [addr], phaseParity, suspendTimeHint; // 7b. PTX ISA 78, SM_90
template <typename=void>
__device__ static inline bool mbarrier_try_wait_parity(
uint64_t* addr,
const uint32_t& phaseParity,
const uint32_t& suspendTimeHint);
mbarrier.try_wait.parity.acquire.cta.shared::cta.b64
// mbarrier.try_wait.parity{.sem}{.scope}.shared::cta.b64 waitComplete, [addr], phaseParity, suspendTimeHint; // 8b. PTX ISA 80, SM_90
// .sem = { .acquire }
// .scope = { .cta, .cluster }
template <cuda::ptx::dot_scope Scope>
__device__ static inline bool mbarrier_try_wait_parity(
cuda::ptx::sem_acquire_t,
cuda::ptx::scope_t<Scope> scope,
uint64_t* addr,
const uint32_t& phaseParity,
const uint32_t& suspendTimeHint);
mbarrier.try_wait.parity.acquire.cluster.shared::cta.b64
// mbarrier.try_wait.parity{.sem}{.scope}.shared::cta.b64 waitComplete, [addr], phaseParity, suspendTimeHint; // 8b. PTX ISA 80, SM_90
// .sem = { .acquire }
// .scope = { .cta, .cluster }
template <cuda::ptx::dot_scope Scope>
__device__ static inline bool mbarrier_try_wait_parity(
cuda::ptx::sem_acquire_t,
cuda::ptx::scope_t<Scope> scope,
uint64_t* addr,
const uint32_t& phaseParity,
const uint32_t& suspendTimeHint);