cuda::atomic::atomic_thread_fence
Defined in header <cuda/atomic>
:
__host__ __device__
void cuda::atomic_thread_fence(cuda::std::memory_order order,
cuda::thread_scope scope = cuda::thread_scope_system);
Establishes memory synchronization ordering of non-atomic and relaxed atomic accesses, as instructed by order
,
for all threads within scope
without an associated atomic operation. It has the same semantics as
cuda::std::atomic_thread_fence.
Example
The following code is an example of the MessagePassing pattern:
#include <cstdio>
#include <cuda/atomic>
#include <cooperative_groups.h>
namespace cg = cooperative_groups;
__global__ void example_kernel(int* data, cuda::std::atomic_flag* flag) {
assert(cg::grid_group::size() == 2);
assert(cg::thread_block::size() == 1);
if (blockIdx.x == 0) {
*data = 42;
cuda::atomic_thread_fence(cuda::memory_order_release,
cuda::thread_scope_device);
flag->test_and_set(cuda::std::memory_order_relaxed);
flag->notify_one();
}
else {
// an atomic operation is required to set up the synchronization
flag->wait(false, cuda::std::memory_order_relaxed);
cuda::atomic_thread_fence(cuda::memory_order_acquire,
cuda::thread_scope_device);
std::printf("%d\n", *data); // Prints 42
}
}