cuda::experimental::stf::data_place#

class data_place#

Designates where data will be stored (CPU memory vs.

on device 0 (first GPU), device 1 (second GPU), …)

This class uses a polymorphic design where all place types (host, managed, device, composite, future extensions) implement a common data_place_interface. The data_place class holds a shared_ptr to this interface and delegates operations to it.

Public Functions

inline explicit data_place( ::std::shared_ptr<data_place_interface> impl )#

inline data_place()#

Default constructor.

The object is initialized as invalid.

data_place(const data_place&) = default#

data_place(data_place&&) = default#

data_place &operator=(const data_place&) = default#

data_place &operator=(data_place&&) = default#

inline bool operator==(const data_place &rhs) const#

inline bool operator!=(const data_place &rhs) const#

inline bool operator<(const data_place &rhs) const#

inline bool operator>(const data_place &rhs) const#

inline bool operator<=(const data_place &rhs) const#

inline bool operator>=(const data_place &rhs) const#

inline bool is_composite() const#

inline bool is_invalid() const#

inline bool is_host() const#

inline bool is_managed() const#

inline bool is_affine() const#

inline bool is_device() const#

inline bool is_device_auto() const#

inline bool is_resolved() const#

inline ::std::string to_string() const#

inline const partition_fn_t &get_partitioner() const#

inline exec_place affine_exec_place() const#

inline size_t hash() const#

Compute a hash value for this data place.

Used by std::hash specialization for unordered containers.

inline decorated_stream getDataStream() const#

inline const ::std::shared_ptr<data_place_interface> &get_impl( ) const#

Get the underlying interface pointer.

This is primarily for internal use and backward compatibility.

inline CUresult mem_create( CUmemGenericAllocationHandle *handle, size_t size ) const#: Create a physical memory allocation for this place (VMM API)

inline void *allocate( ::std::ptrdiff_t size, cudaStream_t stream = nullptr ) const#: Allocate memory at this data place (raw allocation)

inline void deallocate( void *ptr, size_t size, cudaStream_t stream = nullptr ) const#: Deallocate memory at this data place (raw deallocation)

inline bool allocation_is_stream_ordered() const#: Returns true if allocation/deallocation is stream-ordered.

Public Static Functions

static inline data_place invalid()#: Represents an invalid data_place object.

static inline data_place host()#: Represents the host CPU as the data_place (pinned host memory, or memory which should be pinned by CUDASTF).

static inline data_place managed()#: Represents a managed memory location as the data_place.

static inline data_place affine()#: This actually does not define a data_place, but means that we should use the data place affine to the execution place.

static inline data_place device_auto()#: Constant representing a placeholder that lets the library automatically select a GPU device as the data_place.

static inline data_place device(int dev_id = 0)#: Data is placed on device with index dev_id.

static inline data_place current_device()#: Select the embedded memory of the current device as data_place.

template<typename partitioner_t> static data_place composite( partitioner_t p, const exec_place &g )#

static inline data_place composite( partition_fn_t f, const exec_place &grid )#

static inline data_place green_ctx(const green_ctx_view &gc_view)#

Friends

inline friend size_t to_index(const data_place &p)#

Returns an index guaranteed to be >= 0 (0 for managed CPU, 1 for pinned CPU, 2 for device 0, 3 for device 1, …).

Requires that p is initialized and different from data_place::invalid().

inline friend data_place from_index(size_t n)#

Inverse of to_index: converts an index back to a data_place.

Index 0 -> managed, 1 -> host, 2 -> device(0), 3 -> device(1), …

inline friend int device_ordinal(const data_place &p)#: Returns the device ordinal (0 = first GPU, 1 = second GPU, … and by convention the CPU is -1) Requires that p is initialized.