cuda::experimental::stf::exec_place#
-
class exec_place#
Indicates where a computation takes place (CPU, dev0, dev1, …)
All execution places are modeled as grids. Scalar places (host, single device) are simply 1-element grids. This unified model eliminates special-casing and allows uniform iteration over any exec_place.
Subclassed by cuda::experimental::stf::exec_place_device
Public Functions
-
exec_place() = default#
-
inline bool operator==(const exec_place &rhs) const#
-
inline bool operator!=(const exec_place &rhs) const#
-
inline bool operator<(const exec_place &rhs) const#
-
inline bool operator>(const exec_place &rhs) const#
-
inline bool operator<=(const exec_place &rhs) const#
-
inline bool operator>=(const exec_place &rhs) const#
-
inline size_t hash() const#
-
inline dim4 get_dims() const#
Get the dimensions of this grid.
For scalar places (host, single device), returns dim4(1, 1, 1, 1).
-
inline size_t size() const#
Get the total number of places in this grid.
-
inline exec_place get_place(size_t idx) const#
Get the sub-place at the given linear index.
For scalar places, idx must be 0 and returns the place itself.
-
inline exec_place get_place(pos4 p) const#
Get the sub-place at the given multi-dimensional position.
-
inline exec_place_scope activate(size_t idx = 0) const#
Activate the sub-place at the given index.
Returns an exec_place_scope RAII guard that automatically deactivates when destroyed. For scalar places, idx should be 0 (the default).
- Parameters:
idx – The index of the sub-place to activate (default 0 for scalar places)
- Returns:
An exec_place_scope guard that manages the activation lifetime
-
inline ::std::string to_string() const#
-
inline data_place affine_data_place() const#
-
inline void set_affine_data_place(data_place place)#
-
inline stream_pool &get_stream_pool(bool for_computation) const#
-
inline decorated_stream getStream(bool for_computation) const#
-
inline cudaStream_t pick_stream(bool for_computation = true) const#
-
inline bool is_host() const#
-
inline bool is_device() const#
-
inline size_t grid_dim(int axis_id) const#
Get the dimension along a specific axis.
- Deprecated:
Use get_dims().get(axis_id) instead
-
inline dim4 grid_dims() const#
Get all dimensions.
- Deprecated:
Use get_dims() instead
-
inline const exec_place &as_grid() const#
Returns *this for compatibility.
- Deprecated:
All places are grids now; use exec_place methods directly
-
template<typename Fun>
auto operator->*(Fun &&fun) const# Execute lambda on this place.
This method accepts a functor, saves the current CUDA device, changes it to the current execution place, invokes the lambda, and finally sets the current device back to the previous one. The last step is taken even if the lambda throws an exception.
- Template Parameters:
Fun – A callable entity type
- Parameters:
fun – Input functor that will be forwarded and executed
- Returns:
auto the result of the executed functor.
Public Static Functions
-
static inline exec_place host()#
-
static inline exec_place device_auto()#
-
static inline exec_place device(int devid)#
- static inline exec_place green_ctx(
- const green_ctx_view &gc_view,
- bool use_green_ctx_data_place = false
Create a green context execution place.
- Parameters:
gc_view – The green context view
use_green_ctx_data_place – If true, use a green context data place as the affine data place. If false (default), use a regular device data place instead.
-
static inline exec_place cuda_stream(cudaStream_t stream)#
-
static inline exec_place cuda_stream(const decorated_stream &dstream)#
-
static inline exec_place current_device()#
Returns the currently active device.
- Returns:
-
static inline exec_place all_devices()#
Returns all available devices, or single device if only one GPU.
-
static inline exec_place n_devices(size_t n, dim4 dims)#
Returns single device if n == 1 (no grid wrapper needed)
-
static inline exec_place n_devices(size_t n)#
Returns single device if n == 1 (no grid wrapper needed)
-
static inline exec_place repeat(const exec_place &e, size_t cnt)#
Creates a grid by replicating an execution place multiple times Returns the original place if cnt == 1 (no grid wrapper needed)
-
class impl : public std::enable_shared_from_this<impl>#
Subclassed by cuda::experimental::stf::exec_place_cuda_stream_impl, cuda::experimental::stf::exec_place_device::impl, cuda::experimental::stf::exec_place_device_auto_impl, cuda::experimental::stf::exec_place_green_ctx_impl, cuda::experimental::stf::exec_place_grid_impl, cuda::experimental::stf::exec_place_host_impl
Public Functions
-
impl() = default#
-
virtual ~impl() = default#
-
inline explicit impl(data_place place)#
-
inline virtual dim4 get_dims() const#
Get the dimensions of this grid.
For scalar places, returns dim4(1, 1, 1, 1).
-
inline virtual size_t size() const#
Get the total number of places in this grid.
-
inline ::std::shared_ptr<impl> get_place(size_t idx)#
Get the impl of the sub-place at the given linear index.
For scalar places, idx must be 0 and returns shared_from_this(). For grids, returns the impl of the stored sub-place.
-
virtual exec_place activate(size_t idx) const = 0#
Activate the sub-place at the given index.
For scalar places, idx must be 0. Returns the previous execution state needed for deactivate().
- virtual void deactivate(
- const exec_place &prev,
- size_t idx = 0
Deactivate the sub-place at the given index, restoring previous state.
-
inline virtual bool is_host() const#
-
inline virtual bool is_device() const#
-
inline virtual data_place affine_data_place() const#
-
inline ::std::string to_string() const#
-
inline virtual void set_affine_data_place(data_place place)#
-
inline virtual int cmp(const impl &rhs) const#
Three-way comparison.
- Returns:
-1 if *this < rhs, 0 if *this == rhs, 1 if *this > rhs
-
inline virtual size_t hash() const#
- inline virtual stream_pool &get_stream_pool(
- bool for_computation
-
impl() = default#
-
exec_place() = default#