cuda::experimental::stf::exec_place#

class exec_place#

Indicates where a computation takes place (CPU, dev0, dev1, …)

All execution places are modeled as grids. Scalar places (host, single device) are simply 1-element grids. This unified model eliminates special-casing and allows uniform iteration over any exec_place.

Subclassed by cuda::experimental::stf::exec_place_device

Public Functions

template<typename T> inline ::std::shared_ptr<impl> make_static_instance( )#

exec_place() = default#

inline bool operator==(const exec_place &rhs) const#

inline bool operator!=(const exec_place &rhs) const#

inline bool operator<(const exec_place &rhs) const#

inline bool operator>(const exec_place &rhs) const#

inline bool operator<=(const exec_place &rhs) const#

inline bool operator>=(const exec_place &rhs) const#

inline size_t hash() const#

inline dim4 get_dims() const#

Get the dimensions of this grid.

For scalar places (host, single device), returns dim4(1, 1, 1, 1).

inline size_t size() const#: Get the total number of places in this grid.

inline exec_place get_place(size_t idx) const#

Get the sub-place at the given linear index.

For scalar places, idx must be 0 and returns the place itself.

inline exec_place get_place(pos4 p) const#: Get the sub-place at the given multi-dimensional position.

inline exec_place_scope activate(size_t idx = 0) const#

Activate the sub-place at the given index.

Returns an exec_place_scope RAII guard that automatically deactivates when destroyed. For scalar places, idx should be 0 (the default).

Parameters:: idx – The index of the sub-place to activate (default 0 for scalar places)
Returns:: An exec_place_scope guard that manages the activation lifetime

inline ::std::string to_string() const#

inline data_place affine_data_place() const#

inline void set_affine_data_place(data_place place)#

inline stream_pool &get_stream_pool(bool for_computation) const#

inline decorated_stream getStream(bool for_computation) const#

inline cudaStream_t pick_stream(bool for_computation = true) const#

inline const ::std::shared_ptr<impl> &get_impl() const#

inline bool is_host() const#

inline bool is_device() const#

inline size_t grid_dim(int axis_id) const#

Get the dimension along a specific axis.

Deprecated:: Use get_dims().get(axis_id) instead

inline dim4 grid_dims() const#

Get all dimensions.

Deprecated:: Use get_dims() instead

inline const exec_place &as_grid() const#

Returns *this for compatibility.

Deprecated:: All places are grids now; use exec_place methods directly

template<typename ...Args> auto partition_by_scope(Args&&... args)#

template<typename Fun> auto operator->*(Fun &&fun) const#

Execute lambda on this place.

This method accepts a functor, saves the current CUDA device, changes it to the current execution place, invokes the lambda, and finally sets the current device back to the previous one. The last step is taken even if the lambda throws an exception.

Template Parameters:: Fun – A callable entity type
Parameters:: fun – Input functor that will be forwarded and executed
Returns:: auto the result of the executed functor.

inline exec_place(::std::shared_ptr<impl> pimpl)#

Public Static Functions

static inline exec_place host()#

static inline exec_place device_auto()#

static inline exec_place device(int devid)#

static inline exec_place green_ctx( const green_ctx_view &gc_view, bool use_green_ctx_data_place = false )#

Create a green context execution place.

Parameters:

gc_view – The green context view
use_green_ctx_data_place – If true, use a green context data place as the affine data place. If false (default), use a regular device data place instead.

static inline exec_place cuda_stream(cudaStream_t stream)#

static inline exec_place cuda_stream(const decorated_stream &dstream)#

static inline exec_place current_device()#

Returns the currently active device.

Returns:: exec_place

static inline exec_place all_devices()#: Returns all available devices, or single device if only one GPU.

static inline exec_place n_devices(size_t n, dim4 dims)#: Returns single device if n == 1 (no grid wrapper needed)

static inline exec_place n_devices(size_t n)#: Returns single device if n == 1 (no grid wrapper needed)

static inline exec_place repeat(const exec_place &e, size_t cnt)#: Creates a grid by replicating an execution place multiple times Returns the original place if cnt == 1 (no grid wrapper needed)

class impl : public std::enable_shared_from_this<impl>#

Subclassed by cuda::experimental::stf::exec_place_cuda_stream_impl, cuda::experimental::stf::exec_place_device::impl, cuda::experimental::stf::exec_place_device_auto_impl, cuda::experimental::stf::exec_place_green_ctx_impl, cuda::experimental::stf::exec_place_grid_impl, cuda::experimental::stf::exec_place_host_impl

Public Functions

impl() = default#

impl(const impl&) = delete#

impl &operator=(const impl&) = delete#

virtual ~impl() = default#

inline explicit impl(data_place place)#

inline virtual dim4 get_dims() const#

Get the dimensions of this grid.

For scalar places, returns dim4(1, 1, 1, 1).

inline virtual size_t size() const#: Get the total number of places in this grid.

inline ::std::shared_ptr<impl> get_place(size_t idx)#

Get the impl of the sub-place at the given linear index.

For scalar places, idx must be 0 and returns shared_from_this(). For grids, returns the impl of the stored sub-place.

virtual exec_place activate(size_t idx) const = 0#

Activate the sub-place at the given index.

For scalar places, idx must be 0. Returns the previous execution state needed for deactivate().

virtual void deactivate( const exec_place &prev, size_t idx = 0 ) const = 0#: Deactivate the sub-place at the given index, restoring previous state.

inline virtual bool is_host() const#

inline virtual bool is_device() const#

inline virtual data_place affine_data_place() const#

inline ::std::string to_string() const#

inline virtual void set_affine_data_place(data_place place)#

inline virtual int cmp(const impl &rhs) const#

Three-way comparison.

Returns:: -1 if *this < rhs, 0 if *this == rhs, 1 if *this > rhs

inline virtual size_t hash() const#

inline virtual stream_pool &get_stream_pool( bool for_computation ) const#

Public Static Attributes

static constexpr size_t pool_size = 4#

static constexpr size_t data_pool_size = 4#