cuda::experimental::stf::exec_place_green_ctx#
-
class exec_place_green_ctx : public cuda::experimental::stf::exec_place#
Designates execution that is to run on a green context.
Initialize with the device ordinal and green_context
Public Functions
-
inline exec_place_green_ctx(green_ctx_view gc_view)#
- ::std::shared_ptr<green_ctx_view> gc_view_ptr
-
inline bool operator==(const exec_place &rhs) const#
-
inline bool operator!=(const exec_place &rhs) const#
-
inline bool operator<(const exec_place &rhs) const#
-
inline iterator begin()#
-
inline iterator end()#
-
inline ::std::string to_string() const#
Returns a string representation of the execution place object.
- Returns:
std::string
-
inline const data_place affine_data_place() const#
Returns the
data_placenaturally associated with this execution place.
-
inline void set_affine_data_place(data_place place)#
- inline stream_pool &get_stream_pool(
- async_resources_handle &async_resources,
- bool for_computation
- inline decorated_stream getStream(
- async_resources_handle &async_resources,
- bool for_computation
Get a decorated stream from the stream pool associated to this execution place.
This method can be used to obtain CUDA streams from execution places without requiring a CUDASTF context. This is useful when you want to use CUDASTF’s place abstractions (devices, green contexts) for stream management without the full task-based model.
Note
If you are using a CUDASTF context, use
ctx.async_resources()to ensure the same stream pools are shared between your code and the context’s internal operations.- Parameters:
async_resources – Handle managing the stream pools. Create a standalone
async_resources_handlefor context-free usage, or usectx.async_resources()when working alongside a CUDASTF context.for_computation – Hint for selecting which pool to use. When true, returns a stream from the computation pool; when false, returns a stream from the data transfer pool. Using separate pools for computation and transfers can improve overlapping. This is a performance hint and does not affect correctness.
- Returns:
A decorated_stream containing the CUDA stream and metadata (device ID, pool index)
- inline cudaStream_t pick_stream(
- async_resources_handle &async_resources,
- bool for_computation = true
Get a CUDA stream from the stream pool associated to this execution place.
This method can be used to obtain CUDA streams from execution places without requiring a CUDASTF context. This is useful when you want to use CUDASTF’s place abstractions (devices, green contexts) for stream management without the full task-based model.
Example usage without a context:
async_resources_handle resources; exec_place place = exec_place::device(0); cudaStream_t stream = place.pick_stream(resources); myKernel<<<grid, block, 0, stream>>>(...);
Example usage with a context (sharing resources):
stream_ctx ctx; exec_place place = exec_place::device(0); cudaStream_t stream = place.pick_stream(ctx.async_resources()); // Stream comes from the same pool used by ctx internally
Note
If you are using a CUDASTF context, use
ctx.async_resources()to ensure the same stream pools are shared between your code and the context’s internal operations.- Parameters:
async_resources – Handle managing the stream pools. Create a standalone
async_resources_handlefor context-free usage, or usectx.async_resources()when working alongside a CUDASTF context.for_computation – Hint for selecting which pool to use. When true, returns a stream from the computation pool; when false, returns a stream from the data transfer pool. Using separate pools for computation and transfers can improve overlapping. This is a performance hint and does not affect correctness. Defaults to true.
- Returns:
A CUDA stream associated with this execution place
- inline size_t stream_pool_size(
- async_resources_handle &async_resources,
- bool for_computation = true
Get the number of streams available in the pool for this execution place.
- Parameters:
async_resources – Handle managing the stream pools
for_computation – Hint for selecting which pool to query (computation or transfer pool)
- Returns:
The number of stream slots in the pool
- inline ::std::vector<cudaStream_t> pick_all_streams(
- async_resources_handle &async_resources,
- bool for_computation = true
Get all streams from the pool associated to this execution place.
This method returns a vector containing all CUDA streams in the pool. Streams are created lazily, so calling this method will create any streams that haven’t been created yet.
- Parameters:
async_resources – Handle managing the stream pools
for_computation – Hint for selecting which pool to use (computation or transfer pool)
- Returns:
A vector of CUDA streams from the pool
-
inline exec_place activate() const#
Set computation to run on this place.
- Returns:
exec_placeThe previous execution place. Seedeactivatebelow.
-
inline void deactivate(const exec_place &p) const#
Undoes the effect of
activate.Call with the previous
exec_placeobject returned byactivate.Warning
Undefined behavior if you don’t pass the result of
activate.
-
inline bool is_host() const#
-
inline bool is_device() const#
-
inline bool is_grid() const#
-
inline size_t size() const#
-
inline exec_place_grid as_grid() const#
-
inline size_t grid_dim(int axid_is) const#
-
template<typename Fun>
inline auto operator->*(Fun &&fun) const# Execute lambda on this place.
This method accepts a functor, saves the current CUDA device, changes it to the current execution place, invokes the lambda, and finally sets the current device back to the previous one. The last step is taken even if the lambda throws an exception.
- Template Parameters:
Fun – A callable entity type
- Parameters:
fun – Input functor that will be forwarded and executed
- Returns:
auto the result of the executed functor.
Public Static Functions
-
static inline exec_place_host host()#
-
static inline exec_place device_auto()#
-
static inline exec_place device(int devid)#
-
static inline exec_place green_ctx(const green_ctx_view &gc_view)#
- const ::std::shared_ptr<green_ctx_view> &gc_view_ptr
-
static inline exec_place_cuda_stream cuda_stream(cudaStream_t stream)#
- static inline exec_place_cuda_stream cuda_stream(
- const decorated_stream &dstream
-
static inline exec_place current_device()#
Returns the currently active device.
- Returns:
-
static inline exec_place_grid all_devices()#
-
static inline exec_place_grid n_devices(size_t n, dim4 dims)#
-
static inline exec_place_grid n_devices(size_t n)#
-
static inline exec_place_grid repeat(const exec_place &e, size_t cnt)#
Creates a grid by replicating an execution place multiple times.
-
class impl : public cuda::experimental::stf::exec_place::impl#
Public Functions
-
inline impl(green_ctx_view gc_view)#
-
inline impl(CUcontext saved_context)#
-
inline virtual exec_place activate() const override#
-
inline virtual void deactivate(const exec_place &prev) const override#
-
inline ::std::string to_string() const override#
- inline virtual stream_pool &get_stream_pool(
- async_resources_handle&,
- bool
-
inline virtual const data_place affine_data_place() const#
-
inline virtual bool is_host() const#
-
inline virtual bool is_device() const#
-
inline virtual bool is_grid() const#
-
inline virtual size_t size() const#
-
inline virtual void set_affine_data_place(data_place place)#
- inline decorated_stream getStream(
- async_resources_handle &async_resources,
- bool for_computation
-
inline impl(green_ctx_view gc_view)#
-
inline exec_place_green_ctx(green_ctx_view gc_view)#