cuda::experimental::stf::data_place_device_auto#

class data_place_device_auto : public cuda::experimental::stf::data_place_interface#

Implementation for device_auto data place (auto-select device)

Public Types

enum ord#

Special device ordinal values for non-device places.

Returned by get_device_ordinal() for places that don’t correspond to a specific CUDA device.

Values:

enumerator invalid = ::std::numeric_limits<int>::min()#
enumerator composite = -5#
enumerator device_auto = -4#
enumerator affine = -3#
enumerator managed = -2#
enumerator host = -1#

Public Functions

inline virtual bool is_resolved() const override#

Whether this place is fully resolved and ready for allocation.

Returns true for places that represent a concrete memory target: host, managed, device(N), composite, green_ctx, etc. Returns false for abstract/deferred places that need further resolution: invalid, affine, device_auto.

inline virtual int get_device_ordinal() const override#

Get the device ordinal for this place.

Returns:

  • >= 0 for specific CUDA devices

  • data_place_ordinals::host (-1) for host

  • data_place_ordinals::managed (-2) for managed

  • data_place_ordinals::affine (-3) for affine

  • data_place_ordinals::device_auto (-4) for device_auto

  • data_place_ordinals::composite (-5) for composite

  • data_place_ordinals::invalid for invalid

inline virtual ::std::string to_string() const override#

Get a string representation of this place.

inline virtual size_t hash() const override#

Compute a hash value for this place.

inline virtual int cmp(
const data_place_interface &other
) const override#

Three-way comparison with another place.

Returns:

-1 if *this < other, 0 if *this == other, 1 if *this > other

inline virtual void *allocate(
::std::ptrdiff_t,
cudaStream_t
) const override#

Allocate memory at this place.

Parameters:
  • size – Size of the allocation in bytes

  • stream – CUDA stream for stream-ordered allocations

Throws:

std::runtime_error – if allocation is not supported for this place type

Returns:

Pointer to allocated memory

inline virtual void deallocate(
void*,
size_t,
cudaStream_t
) const override#

Deallocate memory at this place.

Parameters:
  • ptr – Pointer to memory to deallocate

  • size – Size of the allocation

  • stream – CUDA stream for stream-ordered deallocations

inline virtual bool allocation_is_stream_ordered() const override#

Returns true if allocation/deallocation is stream-ordered.

inline virtual CUresult mem_create(
CUmemGenericAllocationHandle*,
size_t
) const#

Create a physical memory allocation for this place (VMM API)

Default implementation returns CUDA_ERROR_NOT_SUPPORTED. Subclasses that support VMM should override this.

Parameters:
  • handle – Output parameter for the allocation handle

  • size – Size of the allocation in bytes

Returns:

CUresult indicating success or failure

inline ::std::shared_ptr<void> get_affine_exec_impl() const#

Get the implementation for the affine exec_place (for custom place types)

Custom data_place implementations (e.g. green contexts) override this to provide their own affine exec_place. Returns nullptr by default, which causes data_place::affine_exec_place() to fall through to the error path. The returned shared_ptr should be castable to shared_ptr<exec_place::impl>.

inline virtual const partition_fn_t &get_partitioner() const#

Get the partitioner function for composite places.

Throws:

std::logic_error – if not a composite place