cuda::experimental::stf::data_place#
-
class data_place#
Designates where data will be stored (CPU memory vs.
on device 0 (first GPU), device 1 (second GPU), …)
This typed
enumis aligned with CUDA device ordinals but does not implicitly convert toint. Seedevice_ordinalbelow.Public Functions
-
data_place() = default#
Default constructor.
The object is initialized as invalid.
-
inline bool operator==(const data_place &rhs) const#
-
inline bool operator!=(const data_place &rhs) const#
-
inline bool is_composite() const#
checks if this data place is a composite data place
-
inline bool is_extension() const#
checks if this data place has an extension (green context, etc.)
-
inline bool is_invalid() const#
-
inline bool is_host() const#
-
inline bool is_managed() const#
-
inline bool is_affine() const#
-
inline bool is_device() const#
checks if this data place corresponds to a specific device
-
inline bool is_device_auto() const#
-
inline ::std::string to_string() const#
-
inline const exec_place_grid &get_grid() const#
-
inline const get_executor_func_t &get_partitioner() const#
-
inline exec_place get_affine_exec_place() const#
- inline decorated_stream getDataStream(
- async_resources_handle &async_resources
-
inline bool has_extension() const#
Check if this data place has a custom extension.
- inline const ::std::shared_ptr<data_place_extension> &get_extension(
Get the extension (may be nullptr for standard place types)
- inline CUresult mem_create(
- CUmemGenericAllocationHandle *handle,
- size_t size
Create a physical memory allocation for this place (VMM API)
This method is used by localized arrays (composite_slice) to create physical memory segments that are then mapped into a contiguous virtual address space. It delegates to the extension’s mem_create if present (enabling custom place types to override memory allocation), otherwise creates a standard pinned allocation on this place’s device or host.
Managed memory is not supported by the VMM API.
See also
allocate() for regular memory allocation
Note
For regular memory allocation (not VMM-based), use the allocate() method instead, which provides stream-ordered allocation via cudaMallocAsync.
- Parameters:
handle – Output parameter for the allocation handle
size – Size of the allocation in bytes
- Returns:
CUresult indicating success or failure
- inline void *allocate(
- ::std::ptrdiff_t size,
- cudaStream_t stream = nullptr
Allocate memory at this data place (raw allocation)
This is the low-level allocation interface that handles all place types:
For extensions: delegates to extension->allocate()
For host: uses cudaMallocHost (immediate, stream ignored)
For managed: uses cudaMallocManaged (immediate, stream ignored)
For device: uses cudaMallocAsync (stream-ordered)
- Parameters:
size – Size of the allocation in bytes
stream – CUDA stream for stream-ordered allocations (ignored for immediate allocations, defaults to nullptr)
- Returns:
Pointer to allocated memory
- inline void deallocate(
- void *ptr,
- size_t size,
- cudaStream_t stream = nullptr
Deallocate memory at this data place (raw deallocation)
For immediate deallocations (host, managed), the stream is ignored. Note that cudaFree (used for managed memory) may introduce implicit synchronization.
- Parameters:
ptr – Pointer to memory to deallocate
size – Size of the allocation
stream – CUDA stream for stream-ordered deallocations (ignored for immediate deallocations, defaults to nullptr)
-
inline bool allocation_is_stream_ordered() const#
Returns true if allocation/deallocation is stream-ordered.
When this returns true, the allocation uses stream-ordered APIs like cudaMallocAsync, and allocators should use stream_async_op to synchronize prerequisites before allocation.
When this returns false, the allocation is immediate (like cudaMallocHost) and the stream parameter is ignored. Note that immediate deallocations (e.g., cudaFree) may introduce implicit synchronization.
Public Static Functions
-
static inline data_place invalid()#
Represents an invalid
data_placeobject.
-
static inline data_place host()#
Represents the host CPU as the
data_place(pinned host memory, or memory which should be pinned by CUDASTF).
-
static inline data_place managed()#
Represents a managed memory location as the
data_place.
-
static inline data_place affine()#
This actually does not define a data_place, but means that we should use the data place affine to the execution place.
-
static inline data_place device_auto()#
Constant representing a placeholder that lets the library automatically select a GPU device as the
data_place.
-
static inline data_place device(int dev_id = 0)#
Data is placed on device with index dev_id.
Two relaxations are allowed: -1 can be passed to create a placeholder for the host, and -2 can be used to create a placeholder for a managed device.
-
static inline data_place current_device()#
Select the embedded memory of the current device as
data_place.
-
template<typename partitioner_t>
static data_place composite( - partitioner_t p,
- const exec_place_grid &g
- static inline data_place composite(
- get_executor_func_t f,
- const exec_place_grid &grid
-
static inline data_place green_ctx(const green_ctx_view &gc_view)#
- ::std::shared_ptr<green_ctx_view> gc_view_ptr
- ::std::shared_ptr<data_place_extension> ext
Create a data_place from an extension.
This factory method allows custom place types to be created from data_place_extension implementations.
Friends
-
inline friend size_t to_index(const data_place &p)#
Returns an index guaranteed to be >= 0 (0 for managed CPU, 1 for pinned CPU, 2 for device 0, 3 for device 1, …).
Requires that
pis initialized and different fromdata_place::invalid().
-
inline friend int device_ordinal(const data_place &p)#
Returns the device ordinal (0 = first GPU, 1 = second GPU, … and by convention the CPU is -1) Requires that
pis initialized.
-
class composite_state#
Public Functions
-
composite_state() = default#
- inline composite_state(
- exec_place_grid grid,
- get_executor_func_t partitioner_func
-
inline const exec_place_grid &get_grid() const#
-
inline const get_executor_func_t &get_partitioner() const#
-
composite_state() = default#
-
data_place() = default#