fence

fence

fence.sc.cta

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .sc }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_sc_t,
  cuda::ptx::scope_t<Scope> scope);

fence.sc.gpu

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .sc }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_sc_t,
  cuda::ptx::scope_t<Scope> scope);

fence.sc.sys

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .sc }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_sc_t,
  cuda::ptx::scope_t<Scope> scope);

fence.sc.cluster

// fence.sem.scope; // 2. PTX ISA 78, SM_90
// .sem       = { .sc }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence(
  cuda::ptx::sem_sc_t,
  cuda::ptx::scope_cluster_t);

fence.acq_rel.cta

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .acq_rel }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acq_rel_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acq_rel.gpu

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .acq_rel }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acq_rel_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acq_rel.sys

// fence.sem.scope; // 1. PTX ISA 60, SM_70
// .sem       = { .acq_rel }
// .scope     = { .cta, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acq_rel_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acq_rel.cluster

// fence.sem.scope; // 2. PTX ISA 78, SM_90
// .sem       = { .acq_rel }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence(
  cuda::ptx::sem_acq_rel_t,
  cuda::ptx::scope_cluster_t);

fence.acquire.cta

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acquire.cluster

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acquire.gpu

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope);

fence.acquire.sys

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope);

fence.release.cta

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.release.cluster

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.release.gpu

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.release.sys

// fence.sem.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.sync_restrict

fence.acquire.sync_restrict::shared::cluster.cluster

// fence.sem.sync_restrict::space.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .space     = { .shared::cluster }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence_sync_restrict(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::space_cluster_t,
  cuda::ptx::scope_cluster_t);

fence.release.sync_restrict::shared::cta.cluster

// fence.sem.sync_restrict::space.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .space     = { .shared::cta }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence_sync_restrict(
  cuda::ptx::sem_release_t,
  cuda::ptx::space_shared_t,
  cuda::ptx::scope_cluster_t);

fence.mbarrier_init

fence.mbarrier_init.release.cluster

// fence.mbarrier_init.sem.scope; // 3. PTX ISA 80, SM_90
// .sem       = { .release }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence_mbarrier_init(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_cluster_t);

fence.proxy.alias

fence.proxy.alias

// fence.proxy.alias; // 4. PTX ISA 75, SM_70
template <typename = void>
__device__ static inline void fence_proxy_alias();

fence.proxy.async

fence.proxy.async

// fence.proxy.async; // 5. PTX ISA 80, SM_90
template <typename = void>
__device__ static inline void fence_proxy_async();

fence.proxy.async.global

// fence.proxy.async.space; // 6. PTX ISA 80, SM_90
// .space     = { .global, .shared::cluster, .shared::cta }
template <cuda::ptx::dot_space Space>
__device__ static inline void fence_proxy_async(
  cuda::ptx::space_t<Space> space);

fence.proxy.async.shared::cluster

// fence.proxy.async.space; // 6. PTX ISA 80, SM_90
// .space     = { .global, .shared::cluster, .shared::cta }
template <cuda::ptx::dot_space Space>
__device__ static inline void fence_proxy_async(
  cuda::ptx::space_t<Space> space);

fence.proxy.async.shared::cta

// fence.proxy.async.space; // 6. PTX ISA 80, SM_90
// .space     = { .global, .shared::cluster, .shared::cta }
template <cuda::ptx::dot_space Space>
__device__ static inline void fence_proxy_async(
  cuda::ptx::space_t<Space> space);

fence.proxy.async.sync_restrict

fence.proxy.async::generic.acquire.sync_restrict::shared::cluster.cluster

// fence.proxy.async::generic.sem.sync_restrict::space.scope; // PTX ISA 86, SM_90
// .sem       = { .acquire }
// .space     = { .shared::cluster }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence_proxy_async_generic_sync_restrict(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::space_cluster_t,
  cuda::ptx::scope_cluster_t);

fence.proxy.async::generic.release.sync_restrict::shared::cta.cluster

// fence.proxy.async::generic.sem.sync_restrict::space.scope; // PTX ISA 86, SM_90
// .sem       = { .release }
// .space     = { .shared::cta }
// .scope     = { .cluster }
template <typename = void>
__device__ static inline void fence_proxy_async_generic_sync_restrict(
  cuda::ptx::sem_release_t,
  cuda::ptx::space_shared_t,
  cuda::ptx::scope_cluster_t);

fence.proxy.tensormap

fence.proxy.tensormap::generic.release.cta

// fence.proxy.tensormap::generic.release.scope; // 7. PTX ISA 83, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.proxy.tensormap::generic.release.cluster

// fence.proxy.tensormap::generic.release.scope; // 7. PTX ISA 83, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.proxy.tensormap::generic.release.gpu

// fence.proxy.tensormap::generic.release.scope; // 7. PTX ISA 83, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.proxy.tensormap::generic.release.sys

// fence.proxy.tensormap::generic.release.scope; // 7. PTX ISA 83, SM_90
// .sem       = { .release }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_release_t,
  cuda::ptx::scope_t<Scope> scope);

fence.proxy.tensormap::generic.acquire.cta

// fence.proxy.tensormap::generic.sem.scope [addr], size; // 8. PTX ISA 83, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <int N32, cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope,
  const void* addr,
  cuda::ptx::n32_t<N32> size);

fence.proxy.tensormap::generic.acquire.cluster

// fence.proxy.tensormap::generic.sem.scope [addr], size; // 8. PTX ISA 83, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <int N32, cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope,
  const void* addr,
  cuda::ptx::n32_t<N32> size);

fence.proxy.tensormap::generic.acquire.gpu

// fence.proxy.tensormap::generic.sem.scope [addr], size; // 8. PTX ISA 83, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <int N32, cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope,
  const void* addr,
  cuda::ptx::n32_t<N32> size);

fence.proxy.tensormap::generic.acquire.sys

// fence.proxy.tensormap::generic.sem.scope [addr], size; // 8. PTX ISA 83, SM_90
// .sem       = { .acquire }
// .scope     = { .cta, .cluster, .gpu, .sys }
template <int N32, cuda::ptx::dot_scope Scope>
__device__ static inline void fence_proxy_tensormap_generic(
  cuda::ptx::sem_acquire_t,
  cuda::ptx::scope_t<Scope> scope,
  const void* addr,
  cuda::ptx::n32_t<N32> size);