cuda::pipeline_consumer_wait_prior
Defined in header <cuda/pipeline>
:
template <cuda::std::uint8_t Prior>
__host__ __device__
void cuda::pipeline_consumer_wait_prior(cuda::pipeline<thread_scope_thread>& pipe);
Let Stage be the pipeline stage Prior
stages before the current one (counting the current one).
Blocks the current thread until all operations committed to pipeline stages up to Stage complete.
All stages up to Stage (exclusive) are implicitly released.
Template Parameters
|
The index of the pipeline stage Stage (see above) counting up from the current one. The index of the current stage is |
Parameters
|
The thread-scoped |
Note
If the pipeline is in a quitted state, the behavior is undefined.
Example
#include <cuda/pipeline>
__global__ void example_kernel(uint64_t* global, cuda::std::size_t element_count) {
extern __shared__ uint64_t shared[];
cuda::pipeline<cuda::thread_scope_thread> pipe = cuda::make_pipeline();
for (cuda::std::size_t i = 0; i < element_count; ++i) {
pipe.producer_acquire();
cuda::memcpy_async(shared + i, global + i, sizeof(*global), pipe);
pipe.producer_commit();
}
// Wait for operations committed in all stages but the last one.
cuda::pipeline_consumer_wait_prior<1>(pipe);
pipe.consumer_release();
// Wait for operations committed in all stages.
cuda::pipeline_consumer_wait_prior<0>(pipe);
pipe.consumer_release();
}