cuda::experimental::places::exec_place_device#
-
class exec_place_device : public cuda::experimental::places::exec_place#
Designates execution that is to run on a specific CUDA device.
Device is modeled as a 1-element grid containing that device.
Public Functions
-
inline bool operator==(const exec_place &rhs) const#
-
inline bool operator!=(const exec_place &rhs) const#
-
inline bool operator<(const exec_place &rhs) const#
-
inline bool operator>(const exec_place &rhs) const#
-
inline bool operator<=(const exec_place &rhs) const#
-
inline bool operator>=(const exec_place &rhs) const#
-
inline size_t hash() const#
-
inline dim4 get_dims() const#
Get the dimensions of this grid.
For scalar places (host, single device), returns dim4(1, 1, 1, 1).
-
inline size_t size() const#
Get the total number of places in this grid.
-
inline exec_place get_place(size_t idx) const#
Get the sub-place at the given linear index.
For scalar places, idx must be 0 and returns the place itself.
-
inline exec_place get_place(pos4 p) const#
Get the sub-place at the given multi-dimensional position.
-
inline exec_place_scope activate(size_t idx = 0) const#
Activate the sub-place at the given index.
Returns an exec_place_scope RAII guard that automatically deactivates when destroyed. For scalar places, idx should be 0 (the default).
- Parameters:
idx – The index of the sub-place to activate (default 0 for scalar places)
- Returns:
An exec_place_scope guard that manages the activation lifetime
-
inline ::std::string to_string() const#
-
inline data_place affine_data_place() const#
-
inline void set_affine_data_place(data_place place)#
- inline stream_pool &get_stream_pool(
- bool for_computation,
- exec_place_resources &res
Get the stream pool associated with this place from the supplied registry.
Pooled places (device, host) lazily create their entry in
res; self-contained places (cuda_stream, green-context) ignoreresand return their embedded pool.
- inline stream_pool &get_stream_pool(
- bool for_computation,
- ::cuda::experimental::stf::async_resources_handle &h
Convenience overload taking an
async_resources_handle.Defined inline in
__stf/internal/async_resources_handle.cuh.
- inline augmented_stream getStream(
- exec_place_resources &res,
- bool for_computation = true
- inline augmented_stream getStream(
- ::cuda::experimental::stf::async_resources_handle &h,
- bool for_computation = true
Convenience overload taking an
async_resources_handle.Defined inline in
__stf/internal/async_resources_handle.cuh.
- inline cudaStream_t pick_stream(
- exec_place_resources &res,
- bool for_computation = true
- inline cudaStream_t pick_stream(
- ::cuda::experimental::stf::async_resources_handle &h,
- bool for_computation = true
Convenience overload taking an
async_resources_handle.Defined inline in
__stf/internal/async_resources_handle.cuh.
-
inline size_t stream_pool_size(exec_place_resources &res) const#
Number of streams in this place’s pool (slots, not initialized).
- inline size_t stream_pool_size(
- ::cuda::experimental::stf::async_resources_handle &h
Convenience overload taking an
async_resources_handle.Defined inline in
__stf/internal/async_resources_handle.cuh.
- inline ::std::vector<cudaStream_t> pick_all_streams(
- exec_place_resources &res
Materialize all streams in the pool as a vector.
Triggers lazy creation of every empty slot.
- inline ::std::vector<cudaStream_t> pick_all_streams(
- ::cuda::experimental::stf::async_resources_handle &h
Convenience overload taking an
async_resources_handle.Defined inline in
__stf/internal/async_resources_handle.cuh.
-
inline bool is_host() const#
-
inline bool is_device() const#
-
inline size_t grid_dim(int axis_id) const#
Get the dimension along a specific axis.
- Deprecated:
Use get_dims().get(axis_id) instead
-
inline dim4 grid_dims() const#
Get all dimensions.
- Deprecated:
Use get_dims() instead
-
inline const exec_place &as_grid() const#
Returns *this for compatibility.
- Deprecated:
All places are grids now; use exec_place methods directly
-
template<typename Fun>
auto operator->*(Fun &&fun) const# Execute lambda on this place.
This method accepts a functor, saves the current CUDA device, changes it to the current execution place, invokes the lambda, and finally sets the current device back to the previous one. The last step is taken even if the lambda throws an exception.
- Template Parameters:
Fun – A callable entity type
- Parameters:
fun – Input functor that will be forwarded and executed
- Returns:
auto the result of the executed functor.
Public Static Functions
-
static inline exec_place host()#
-
static inline exec_place device_auto()#
-
static inline exec_place device(int devid)#
- static inline exec_place green_ctx(
- const green_ctx_view &gc_view,
- bool use_green_ctx_data_place = false
Create a green context execution place.
- Parameters:
gc_view – The green context view
use_green_ctx_data_place – If true, use a green context data place as the affine data place. If false (default), use a regular device data place instead.
-
static inline exec_place cuda_stream(cudaStream_t stream)#
-
static inline exec_place cuda_stream(const augmented_stream &dstream)#
-
static inline exec_place current_device()#
Returns the currently active device.
- Returns:
-
static inline exec_place all_devices()#
Returns all available devices, or single device if only one GPU.
-
static inline exec_place n_devices(size_t n, dim4 dims)#
Returns single device if n == 1 (no grid wrapper needed)
-
static inline exec_place n_devices(size_t n)#
Returns single device if n == 1 (no grid wrapper needed)
-
static inline exec_place repeat(const exec_place &e, size_t cnt)#
Creates a grid by replicating an execution place multiple times Returns the original place if cnt == 1 (no grid wrapper needed)
-
class impl : public cuda::experimental::places::exec_place::impl#
Public Functions
-
inline explicit impl(int devid)#
- inline ::std::shared_ptr<exec_place::impl> get_place(
- size_t idx
-
inline virtual exec_place activate(size_t idx) const override#
Activate the sub-place at the given index.
For scalar places, idx must be 0. Returns the previous execution state needed for deactivate().
- inline virtual void deactivate(
- const exec_place &prev,
- size_t idx = 0
Deactivate the sub-place at the given index, restoring previous state.
-
inline virtual bool is_device() const override#
-
inline int get_devid() const#
-
inline ::std::string to_string() const override#
-
inline virtual dim4 get_dims() const#
Get the dimensions of this grid.
For scalar places, returns dim4(1, 1, 1, 1).
-
inline virtual size_t size() const#
Get the total number of places in this grid.
-
inline virtual bool is_host() const#
-
inline virtual data_place affine_data_place() const#
-
inline virtual void set_affine_data_place(data_place place)#
-
inline virtual int cmp(const impl &rhs) const#
Three-way comparison.
- Returns:
-1 if *this < rhs, 0 if *this == rhs, 1 if *this > rhs
-
inline virtual size_t hash() const#
- inline virtual stream_pool &get_stream_pool(
- bool for_computation,
- exec_place_resources &res,
- const exec_place &self
Return the stream pool to draw streams from for this place.
Pooled implementations (device, host) use the default body, which looks up / lazily creates a per-place pool inside the supplied registry, keyed by
this(a stable singleton pointer for those impls).Self-contained implementations (
exec_place_cuda_stream_impl,exec_place_green_ctx_impl) override this method and ignore the registry, returning their embedded pool instead.The grid implementation forwards
resto its first sub-place.- Parameters:
for_computation – If true, return the computation pool slot; otherwise return the data-transfer slot.
res – Registry of per-place stream pools (typically owned by an
async_resources_handle).self – The
exec_placewrapping*this(kept for derived overrides that need access to the public-facing place).
Public Static Attributes
-
static constexpr size_t pool_size = exec_place_default_pool_size#
-
static constexpr size_t data_pool_size = exec_place_default_data_pool_size#
-
inline explicit impl(int devid)#
-
inline bool operator==(const exec_place &rhs) const#