cuda::experimental::stf::data_place#
-
class data_place#
Designates where data will be stored (CPU memory vs.
on device 0 (first GPU), device 1 (second GPU), …)
This class uses a polymorphic design where all place types (host, managed, device, composite, future extensions) implement a common data_place_interface. The data_place class holds a shared_ptr to this interface and delegates operations to it.
Public Functions
- ::std::shared_ptr<data_place_interface> impl
-
inline data_place()#
Default constructor.
The object is initialized as invalid.
-
data_place(const data_place&) = default#
-
data_place(data_place&&) = default#
-
data_place &operator=(const data_place&) = default#
-
data_place &operator=(data_place&&) = default#
-
inline bool operator==(const data_place &rhs) const#
-
inline bool operator!=(const data_place &rhs) const#
-
inline bool operator<(const data_place &rhs) const#
-
inline bool operator>(const data_place &rhs) const#
-
inline bool operator<=(const data_place &rhs) const#
-
inline bool operator>=(const data_place &rhs) const#
-
inline bool is_composite() const#
-
inline bool is_invalid() const#
-
inline bool is_host() const#
-
inline bool is_managed() const#
-
inline bool is_affine() const#
-
inline bool is_device() const#
-
inline bool is_device_auto() const#
-
inline bool is_resolved() const#
-
inline ::std::string to_string() const#
-
inline const partition_fn_t &get_partitioner() const#
-
inline exec_place affine_exec_place() const#
-
inline size_t hash() const#
Compute a hash value for this data place.
Used by std::hash specialization for unordered containers.
-
inline decorated_stream getDataStream() const#
- inline const ::std::shared_ptr<data_place_interface> &get_impl(
Get the underlying interface pointer.
This is primarily for internal use and backward compatibility.
- inline CUresult mem_create(
- CUmemGenericAllocationHandle *handle,
- size_t size
Create a physical memory allocation for this place (VMM API)
- inline void *allocate(
- ::std::ptrdiff_t size,
- cudaStream_t stream = nullptr
Allocate memory at this data place (raw allocation)
- inline void deallocate(
- void *ptr,
- size_t size,
- cudaStream_t stream = nullptr
Deallocate memory at this data place (raw deallocation)
-
inline bool allocation_is_stream_ordered() const#
Returns true if allocation/deallocation is stream-ordered.
Public Static Functions
-
static inline data_place invalid()#
Represents an invalid
data_placeobject.
-
static inline data_place host()#
Represents the host CPU as the
data_place(pinned host memory, or memory which should be pinned by CUDASTF).
-
static inline data_place managed()#
Represents a managed memory location as the
data_place.
-
static inline data_place affine()#
This actually does not define a data_place, but means that we should use the data place affine to the execution place.
-
static inline data_place device_auto()#
Constant representing a placeholder that lets the library automatically select a GPU device as the
data_place.
-
static inline data_place device(int dev_id = 0)#
Data is placed on device with index dev_id.
-
static inline data_place current_device()#
Select the embedded memory of the current device as
data_place.
-
template<typename partitioner_t>
static data_place composite( - partitioner_t p,
- const exec_place &g
- static inline data_place composite(
- partition_fn_t f,
- const exec_place &grid
-
static inline data_place green_ctx(const green_ctx_view &gc_view)#
Friends
-
inline friend size_t to_index(const data_place &p)#
Returns an index guaranteed to be >= 0 (0 for managed CPU, 1 for pinned CPU, 2 for device 0, 3 for device 1, …).
Requires that
pis initialized and different fromdata_place::invalid().
-
inline friend data_place from_index(size_t n)#
Inverse of
to_index: converts an index back to adata_place.Index 0 -> managed, 1 -> host, 2 -> device(0), 3 -> device(1), …
-
inline friend int device_ordinal(const data_place &p)#
Returns the device ordinal (0 = first GPU, 1 = second GPU, … and by convention the CPU is -1) Requires that
pis initialized.