Coverage for cuda/core/_memory/_managed_buffer.py: 90.65%
107 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: Apache-2.0
4from __future__ import annotations
6from collections.abc import Iterable, Iterator, MutableSet
7from typing import TYPE_CHECKING, Any
9from cuda.core._device import Device
10from cuda.core._host import Host
11from cuda.core._memory._buffer import Buffer
12from cuda.core._memory._managed_location import _coerce_location
13from cuda.core._memory._managed_memory_ops import (
14 _advise_one,
15 _do_single_discard_prefetch_py,
16 _do_single_discard_py,
17 _do_single_prefetch_py,
18 _read_preferred_location_v2,
19)
20from cuda.core._utils.cuda_utils import driver, handle_return
21from cuda.core._utils.version import binding_version, driver_version
23if TYPE_CHECKING:
24 from cuda.core._memory._buffer import MemoryResource
25 from cuda.core._stream import Stream
26 from cuda.core.graph import GraphBuilder
29_INT_SIZE = 4
31# Enum aliases — referenced once per property write, so cache the lookup.
32_ADV = driver.CUmem_advise
33_SET_READ_MOSTLY = _ADV.CU_MEM_ADVISE_SET_READ_MOSTLY
34_UNSET_READ_MOSTLY = _ADV.CU_MEM_ADVISE_UNSET_READ_MOSTLY
35_SET_PREFERRED = _ADV.CU_MEM_ADVISE_SET_PREFERRED_LOCATION
36_UNSET_PREFERRED = _ADV.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION
37_SET_ACCESSED_BY = _ADV.CU_MEM_ADVISE_SET_ACCESSED_BY
38_UNSET_ACCESSED_BY = _ADV.CU_MEM_ADVISE_UNSET_ACCESSED_BY
40_RANGE = driver.CUmem_range_attribute
41_ATTR_READ_MOSTLY = _RANGE.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY
42_ATTR_PREFERRED = _RANGE.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION
43_ATTR_ACCESSED_BY = _RANGE.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY
46def _get_int_attr(buf: Buffer, attribute: Any) -> int:
47 return int(handle_return(driver.cuMemRangeGetAttribute(_INT_SIZE, attribute, buf.handle, buf.size))) 1rlnomst
50def _query_accessed_by(buf: Buffer) -> list[Device | Host]:
51 """Read the live ``CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY`` list.
53 Driver fills an int32 array: device id, ``-1`` = host, ``-2`` = empty.
54 Sized to ``cuDeviceGetCount() + 1`` (every visible device plus host).
55 """
56 num_devices = handle_return(driver.cuDeviceGetCount()) 1edbc
57 n = num_devices + 1 1edbc
58 raw = handle_return(driver.cuMemRangeGetAttribute(n * _INT_SIZE, _ATTR_ACCESSED_BY, buf.handle, buf.size)) 1edbc
59 return [Host() if v == -1 else Device(v) for v in raw if v != -2] 1edbc
62class AccessedBySetProxy(MutableSet[Device | Host]):
63 """Live driver-backed view of ``set_accessed_by`` advice for a managed buffer.
65 Reads (``__contains__``, ``__iter__``, ``len(...)``) call
66 ``cuMemRangeGetAttribute``; writes (``add``, ``discard``) call
67 ``cuMemAdvise``. There is no in-memory mirror, so the view always
68 reflects the current driver state.
70 Note
71 ----
72 The driver returns integer device ordinals (``-1`` for host); host
73 NUMA distinctions applied via ``Host(numa_id=...)`` collapse to a
74 generic ``Host()`` when iterating this set.
75 """
77 __slots__ = ("_buf",)
79 def __init__(self, buf: ManagedBuffer):
80 self._buf = buf 1edbcfgh
82 # Operators such as &|^ produce a plain set, not another proxy.
83 @classmethod
84 def _from_iterable(cls, it: Iterable[Any]) -> set[Device | Host]: # type: ignore[override]
85 return set(it) 1d
87 # --- abstract methods required by MutableSet ---
89 def __contains__(self, location: object) -> bool:
90 if not isinstance(location, (Device, Host)): 1edbc
91 return False
92 return location in _query_accessed_by(self._buf) 1edbc
94 def __iter__(self) -> Iterator[Device | Host]:
95 return iter(_query_accessed_by(self._buf)) 1d
97 def __len__(self) -> int:
98 return len(_query_accessed_by(self._buf)) 1d
100 def add(self, location: Device | Host) -> None:
101 """Apply ``set_accessed_by`` advice for ``location``."""
102 if not isinstance(location, (Device, Host)): 1edfgh
103 raise TypeError(f"expected Device or Host, got {type(location).__name__}")
104 _advise_one(self._buf, _SET_ACCESSED_BY, location) 1edfgh
106 def discard(self, location: Device | Host) -> None:
107 """Apply ``unset_accessed_by`` advice for ``location``.
109 Per the ``MutableSet`` contract, ``discard`` is a no-op for elements
110 not in the set. ``set_accessed_by`` only accepts ``Device`` and the
111 generic ``Host()`` — NUMA-aware host variants (``Host(numa_id=...)``,
112 ``Host.numa_current()``) can never enter the set, so discarding them
113 is silently ignored rather than forwarded to the driver.
114 """
115 if not isinstance(location, (Device, Host)): 1ed
116 return
117 if isinstance(location, Host) and (location.numa_id is not None or location.is_numa_current): 1ed
118 return 1d
119 _advise_one(self._buf, _UNSET_ACCESSED_BY, location) 1ed
121 def __repr__(self) -> str:
122 return f"AccessedBySetProxy({set(_query_accessed_by(self._buf))!r})" 1d
125class ManagedBuffer(Buffer):
126 """Managed (unified) memory buffer with a property-style advice API.
128 Returned by :meth:`ManagedMemoryResource.allocate`, or wrap an
129 existing managed-memory pointer with :meth:`ManagedBuffer.from_handle`.
131 Examples
132 --------
133 >>> buf = mr.allocate(size)
134 >>> buf.read_mostly = True
135 >>> buf.preferred_location = Device(0)
136 >>> buf.accessed_by.add(Device(1))
137 >>> buf.prefetch(Device(0), stream=stream)
139 Note
140 ----
141 On CUDA 13 builds, ``preferred_location`` round-trips full NUMA
142 information. On CUDA 12 builds, ``Host(numa_id=...)`` and
143 ``Host.numa_current()`` are rejected with ``TypeError`` at the call
144 boundary — only ``Device(...)`` and the generic ``Host()`` are
145 accepted. Use ``Host()`` to target the host on CUDA 12.
146 """
148 @classmethod
149 def from_handle(
150 cls,
151 ptr,
152 size: int,
153 mr: MemoryResource | None = None,
154 owner: object | None = None,
155 ) -> Buffer:
156 """Wrap an existing managed-memory pointer in a :class:`ManagedBuffer`.
158 Use this when you have an externally-allocated managed pointer
159 and want the property-style advice API (:attr:`read_mostly`,
160 :attr:`preferred_location`, :attr:`accessed_by`).
162 Parameters
163 ----------
164 ptr : :obj:`~_memory.DevicePointerT`
165 Pointer to a managed allocation.
166 size : int
167 Allocation size in bytes.
168 mr : :obj:`~_memory.MemoryResource`, optional
169 Memory resource that owns ``ptr``. When provided, its
170 ``deallocate`` is called when the buffer is closed.
171 owner : object, optional
172 An object that keeps the underlying allocation alive.
173 ``owner`` and ``mr`` cannot both be specified.
174 """
175 return cls._init(ptr, size, mr=mr, owner=owner) 1uvwxyzABCiDEk
177 @property
178 def read_mostly(self) -> bool:
179 """Whether ``set_read_mostly`` advice is currently applied."""
180 return _get_int_attr(self, _ATTR_READ_MOSTLY) != 0 1m
182 @read_mostly.setter
183 def read_mostly(self, value: bool) -> None:
184 _advise_one(self, _SET_READ_MOSTLY if value else _UNSET_READ_MOSTLY, None) 1fmk
186 @property
187 def preferred_location(self) -> Device | Host | None:
188 """Currently applied ``set_preferred_location`` target, or ``None``.
190 On CUDA 13 builds, fully round-trips ``Host(numa_id=N)``. On CUDA 12
191 the legacy attribute carries only a device ordinal (or ``-1`` for
192 host), so ``Host(numa_id=N)`` set via the setter round-trips back
193 as ``Host()``.
194 """
195 # The v2 path uses CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_{TYPE,ID},
196 # both added in CUDA 13. Require both bindings and the runtime driver
197 # to be 13.0+; otherwise fall back to the legacy device-ordinal path.
198 # See PR #2054 / #2064 for prior bindings-only-check regressions.
199 if binding_version() >= (13, 0, 0) and driver_version() >= (13, 0, 0): 1ij
200 return _read_preferred_location_v2(self) 1ij
201 # CUDA 12 legacy path (no NUMA info available; also taken when
202 # bindings are 13.x but the runtime driver is still 12.x).
203 loc_id = _get_int_attr(self, _ATTR_PREFERRED)
204 if loc_id == -2:
205 return None
206 if loc_id == -1:
207 return Host()
208 return Device(loc_id)
210 @preferred_location.setter
211 def preferred_location(self, value: Device | Host | None) -> None:
212 if value is None: 1fij
213 _advise_one(self, _UNSET_PREFERRED, None) 1j
214 else:
215 _advise_one(self, _SET_PREFERRED, value) 1fij
217 @property
218 def accessed_by(self) -> AccessedBySetProxy:
219 """Live set-like view of ``set_accessed_by`` locations."""
220 return AccessedBySetProxy(self) 1edbcfgh
222 @accessed_by.setter
223 def accessed_by(self, locations: Iterable[Device | Host]) -> None:
224 # Validate every target before issuing any cuMemAdvise so an invalid
225 # element can't leave accessed_by partially mutated.
226 target: set[Device | Host] = set() 1bc
227 for loc in locations: 1bc
228 if not isinstance(loc, (Device, Host)): 1bc
229 raise TypeError(f"accessed_by entries must be Device or Host, got {type(loc).__name__}")
230 target.add(loc) 1bc
231 for loc in target: 1bc
232 spec = _coerce_location(loc) 1bc
233 assert spec is not None 1bc
234 if spec.kind not in ("device", "host"): 1bc
235 raise ValueError(f"advise {_SET_ACCESSED_BY.name} does not support location_type='{spec.kind}'") 1b
236 current = set(_query_accessed_by(self)) 1bc
237 for loc in current - target: 1bc
238 _advise_one(self, _UNSET_ACCESSED_BY, loc) 1c
239 for loc in target - current: 1bc
240 _advise_one(self, _SET_ACCESSED_BY, loc) 1bc
242 def prefetch(self, location: Device | Host, *, stream: Stream | GraphBuilder) -> None:
243 """Prefetch this range to ``location`` on ``stream``."""
244 _do_single_prefetch_py(self, location, stream) 1pqlnoghk
246 def discard(self, *, stream: Stream | GraphBuilder) -> None:
247 """Discard this range's resident pages on ``stream`` (CUDA 13+)."""
248 _do_single_discard_py(self, stream) 1pq
250 def discard_prefetch(self, location: Device | Host, *, stream: Stream | GraphBuilder) -> None:
251 """Discard this range and prefetch to ``location`` on ``stream`` (CUDA 13+)."""
252 _do_single_discard_prefetch_py(self, location, stream) 1lk