cuda::experimental::stf::data_place#

class data_place#

Designates where data will be stored (CPU memory vs.

on device 0 (first GPU), device 1 (second GPU), …)

This class uses a polymorphic design where all place types (host, managed, device, composite, future extensions) implement a common data_place_interface. The data_place class holds a shared_ptr to this interface and delegates operations to it.

Public Functions

inline explicit data_place(
::std::shared_ptr<data_place_interface> impl
)#
inline data_place()#

Default constructor.

The object is initialized as invalid.

data_place(const data_place&) = default#
data_place(data_place&&) = default#
data_place &operator=(const data_place&) = default#
data_place &operator=(data_place&&) = default#
inline bool operator==(const data_place &rhs) const#
inline bool operator!=(const data_place &rhs) const#
inline bool operator<(const data_place &rhs) const#
inline bool operator>(const data_place &rhs) const#
inline bool operator<=(const data_place &rhs) const#
inline bool operator>=(const data_place &rhs) const#
inline bool is_composite() const#
inline bool is_invalid() const#
inline bool is_host() const#
inline bool is_managed() const#
inline bool is_affine() const#
inline bool is_device() const#
inline bool is_device_auto() const#
inline bool is_resolved() const#
inline ::std::string to_string() const#
inline const partition_fn_t &get_partitioner() const#
inline exec_place affine_exec_place() const#
inline size_t hash() const#

Compute a hash value for this data place.

Used by std::hash specialization for unordered containers.

inline decorated_stream getDataStream() const#
inline const ::std::shared_ptr<data_place_interface> &get_impl(
) const#

Get the underlying interface pointer.

This is primarily for internal use and backward compatibility.

inline CUresult mem_create(
CUmemGenericAllocationHandle *handle,
size_t size
) const#

Create a physical memory allocation for this place (VMM API)

inline void *allocate(
::std::ptrdiff_t size,
cudaStream_t stream = nullptr
) const#

Allocate memory at this data place (raw allocation)

inline void deallocate(
void *ptr,
size_t size,
cudaStream_t stream = nullptr
) const#

Deallocate memory at this data place (raw deallocation)

inline bool allocation_is_stream_ordered() const#

Returns true if allocation/deallocation is stream-ordered.

Public Static Functions

static inline data_place invalid()#

Represents an invalid data_place object.

static inline data_place host()#

Represents the host CPU as the data_place (pinned host memory, or memory which should be pinned by CUDASTF).

static inline data_place managed()#

Represents a managed memory location as the data_place.

static inline data_place affine()#

This actually does not define a data_place, but means that we should use the data place affine to the execution place.

static inline data_place device_auto()#

Constant representing a placeholder that lets the library automatically select a GPU device as the data_place.

static inline data_place device(int dev_id = 0)#

Data is placed on device with index dev_id.

static inline data_place current_device()#

Select the embedded memory of the current device as data_place.

template<typename partitioner_t>
static data_place composite(
partitioner_t p,
const exec_place &g
)#
static inline data_place composite(
partition_fn_t f,
const exec_place &grid
)#
static inline data_place green_ctx(const green_ctx_view &gc_view)#

Friends

inline friend size_t to_index(const data_place &p)#

Returns an index guaranteed to be >= 0 (0 for managed CPU, 1 for pinned CPU, 2 for device 0, 3 for device 1, …).

Requires that p is initialized and different from data_place::invalid().

inline friend data_place from_index(size_t n)#

Inverse of to_index: converts an index back to a data_place.

Index 0 -> managed, 1 -> host, 2 -> device(0), 3 -> device(1), …

inline friend int device_ordinal(const data_place &p)#

Returns the device ordinal (0 = first GPU, 1 = second GPU, … and by convention the CPU is -1) Requires that p is initialized.