Coverage for cuda/core/_memory/_managed_buffer.py: 90.65%

107 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-03 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# SPDX-License-Identifier: Apache-2.0 

3 

4from __future__ import annotations 

5 

6from collections.abc import Iterable, Iterator, MutableSet 

7from typing import TYPE_CHECKING, Any 

8 

9from cuda.core._device import Device 

10from cuda.core._host import Host 

11from cuda.core._memory._buffer import Buffer 

12from cuda.core._memory._managed_location import _coerce_location 

13from cuda.core._memory._managed_memory_ops import ( 

14 _advise_one, 

15 _do_single_discard_prefetch_py, 

16 _do_single_discard_py, 

17 _do_single_prefetch_py, 

18 _read_preferred_location_v2, 

19) 

20from cuda.core._utils.cuda_utils import driver, handle_return 

21from cuda.core._utils.version import binding_version, driver_version 

22 

23if TYPE_CHECKING: 

24 from cuda.core._memory._buffer import MemoryResource 

25 from cuda.core._stream import Stream 

26 from cuda.core.graph import GraphBuilder 

27 

28 

29_INT_SIZE = 4 

30 

31# Enum aliases — referenced once per property write, so cache the lookup. 

32_ADV = driver.CUmem_advise 

33_SET_READ_MOSTLY = _ADV.CU_MEM_ADVISE_SET_READ_MOSTLY 

34_UNSET_READ_MOSTLY = _ADV.CU_MEM_ADVISE_UNSET_READ_MOSTLY 

35_SET_PREFERRED = _ADV.CU_MEM_ADVISE_SET_PREFERRED_LOCATION 

36_UNSET_PREFERRED = _ADV.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION 

37_SET_ACCESSED_BY = _ADV.CU_MEM_ADVISE_SET_ACCESSED_BY 

38_UNSET_ACCESSED_BY = _ADV.CU_MEM_ADVISE_UNSET_ACCESSED_BY 

39 

40_RANGE = driver.CUmem_range_attribute 

41_ATTR_READ_MOSTLY = _RANGE.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY 

42_ATTR_PREFERRED = _RANGE.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION 

43_ATTR_ACCESSED_BY = _RANGE.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY 

44 

45 

46def _get_int_attr(buf: Buffer, attribute: Any) -> int: 

47 return int(handle_return(driver.cuMemRangeGetAttribute(_INT_SIZE, attribute, buf.handle, buf.size))) 1rlnomst

48 

49 

50def _query_accessed_by(buf: Buffer) -> list[Device | Host]: 

51 """Read the live ``CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY`` list. 

52 

53 Driver fills an int32 array: device id, ``-1`` = host, ``-2`` = empty. 

54 Sized to ``cuDeviceGetCount() + 1`` (every visible device plus host). 

55 """ 

56 num_devices = handle_return(driver.cuDeviceGetCount()) 1edbc

57 n = num_devices + 1 1edbc

58 raw = handle_return(driver.cuMemRangeGetAttribute(n * _INT_SIZE, _ATTR_ACCESSED_BY, buf.handle, buf.size)) 1edbc

59 return [Host() if v == -1 else Device(v) for v in raw if v != -2] 1edbc

60 

61 

62class AccessedBySetProxy(MutableSet[Device | Host]): 

63 """Live driver-backed view of ``set_accessed_by`` advice for a managed buffer. 

64 

65 Reads (``__contains__``, ``__iter__``, ``len(...)``) call 

66 ``cuMemRangeGetAttribute``; writes (``add``, ``discard``) call 

67 ``cuMemAdvise``. There is no in-memory mirror, so the view always 

68 reflects the current driver state. 

69 

70 Note 

71 ---- 

72 The driver returns integer device ordinals (``-1`` for host); host 

73 NUMA distinctions applied via ``Host(numa_id=...)`` collapse to a 

74 generic ``Host()`` when iterating this set. 

75 """ 

76 

77 __slots__ = ("_buf",) 

78 

79 def __init__(self, buf: ManagedBuffer): 

80 self._buf = buf 1edbcfgh

81 

82 # Operators such as &|^ produce a plain set, not another proxy. 

83 @classmethod 

84 def _from_iterable(cls, it: Iterable[Any]) -> set[Device | Host]: # type: ignore[override] 

85 return set(it) 1d

86 

87 # --- abstract methods required by MutableSet --- 

88 

89 def __contains__(self, location: object) -> bool: 

90 if not isinstance(location, (Device, Host)): 1edbc

91 return False 

92 return location in _query_accessed_by(self._buf) 1edbc

93 

94 def __iter__(self) -> Iterator[Device | Host]: 

95 return iter(_query_accessed_by(self._buf)) 1d

96 

97 def __len__(self) -> int: 

98 return len(_query_accessed_by(self._buf)) 1d

99 

100 def add(self, location: Device | Host) -> None: 

101 """Apply ``set_accessed_by`` advice for ``location``.""" 

102 if not isinstance(location, (Device, Host)): 1edfgh

103 raise TypeError(f"expected Device or Host, got {type(location).__name__}") 

104 _advise_one(self._buf, _SET_ACCESSED_BY, location) 1edfgh

105 

106 def discard(self, location: Device | Host) -> None: 

107 """Apply ``unset_accessed_by`` advice for ``location``. 

108 

109 Per the ``MutableSet`` contract, ``discard`` is a no-op for elements 

110 not in the set. ``set_accessed_by`` only accepts ``Device`` and the 

111 generic ``Host()`` — NUMA-aware host variants (``Host(numa_id=...)``, 

112 ``Host.numa_current()``) can never enter the set, so discarding them 

113 is silently ignored rather than forwarded to the driver. 

114 """ 

115 if not isinstance(location, (Device, Host)): 1ed

116 return 

117 if isinstance(location, Host) and (location.numa_id is not None or location.is_numa_current): 1ed

118 return 1d

119 _advise_one(self._buf, _UNSET_ACCESSED_BY, location) 1ed

120 

121 def __repr__(self) -> str: 

122 return f"AccessedBySetProxy({set(_query_accessed_by(self._buf))!r})" 1d

123 

124 

125class ManagedBuffer(Buffer): 

126 """Managed (unified) memory buffer with a property-style advice API. 

127 

128 Returned by :meth:`ManagedMemoryResource.allocate`, or wrap an 

129 existing managed-memory pointer with :meth:`ManagedBuffer.from_handle`. 

130 

131 Examples 

132 -------- 

133 >>> buf = mr.allocate(size) 

134 >>> buf.read_mostly = True 

135 >>> buf.preferred_location = Device(0) 

136 >>> buf.accessed_by.add(Device(1)) 

137 >>> buf.prefetch(Device(0), stream=stream) 

138 

139 Note 

140 ---- 

141 On CUDA 13 builds, ``preferred_location`` round-trips full NUMA 

142 information. On CUDA 12 builds, ``Host(numa_id=...)`` and 

143 ``Host.numa_current()`` are rejected with ``TypeError`` at the call 

144 boundary — only ``Device(...)`` and the generic ``Host()`` are 

145 accepted. Use ``Host()`` to target the host on CUDA 12. 

146 """ 

147 

148 @classmethod 

149 def from_handle( 

150 cls, 

151 ptr, 

152 size: int, 

153 mr: MemoryResource | None = None, 

154 owner: object | None = None, 

155 ) -> Buffer: 

156 """Wrap an existing managed-memory pointer in a :class:`ManagedBuffer`. 

157 

158 Use this when you have an externally-allocated managed pointer 

159 and want the property-style advice API (:attr:`read_mostly`, 

160 :attr:`preferred_location`, :attr:`accessed_by`). 

161 

162 Parameters 

163 ---------- 

164 ptr : :obj:`~_memory.DevicePointerT` 

165 Pointer to a managed allocation. 

166 size : int 

167 Allocation size in bytes. 

168 mr : :obj:`~_memory.MemoryResource`, optional 

169 Memory resource that owns ``ptr``. When provided, its 

170 ``deallocate`` is called when the buffer is closed. 

171 owner : object, optional 

172 An object that keeps the underlying allocation alive. 

173 ``owner`` and ``mr`` cannot both be specified. 

174 """ 

175 return cls._init(ptr, size, mr=mr, owner=owner) 1uvwxyzABCiDEk

176 

177 @property 

178 def read_mostly(self) -> bool: 

179 """Whether ``set_read_mostly`` advice is currently applied.""" 

180 return _get_int_attr(self, _ATTR_READ_MOSTLY) != 0 1m

181 

182 @read_mostly.setter 

183 def read_mostly(self, value: bool) -> None: 

184 _advise_one(self, _SET_READ_MOSTLY if value else _UNSET_READ_MOSTLY, None) 1fmk

185 

186 @property 

187 def preferred_location(self) -> Device | Host | None: 

188 """Currently applied ``set_preferred_location`` target, or ``None``. 

189 

190 On CUDA 13 builds, fully round-trips ``Host(numa_id=N)``. On CUDA 12 

191 the legacy attribute carries only a device ordinal (or ``-1`` for 

192 host), so ``Host(numa_id=N)`` set via the setter round-trips back 

193 as ``Host()``. 

194 """ 

195 # The v2 path uses CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_{TYPE,ID}, 

196 # both added in CUDA 13. Require both bindings and the runtime driver 

197 # to be 13.0+; otherwise fall back to the legacy device-ordinal path. 

198 # See PR #2054 / #2064 for prior bindings-only-check regressions. 

199 if binding_version() >= (13, 0, 0) and driver_version() >= (13, 0, 0): 1ij

200 return _read_preferred_location_v2(self) 1ij

201 # CUDA 12 legacy path (no NUMA info available; also taken when 

202 # bindings are 13.x but the runtime driver is still 12.x). 

203 loc_id = _get_int_attr(self, _ATTR_PREFERRED) 

204 if loc_id == -2: 

205 return None 

206 if loc_id == -1: 

207 return Host() 

208 return Device(loc_id) 

209 

210 @preferred_location.setter 

211 def preferred_location(self, value: Device | Host | None) -> None: 

212 if value is None: 1fij

213 _advise_one(self, _UNSET_PREFERRED, None) 1j

214 else: 

215 _advise_one(self, _SET_PREFERRED, value) 1fij

216 

217 @property 

218 def accessed_by(self) -> AccessedBySetProxy: 

219 """Live set-like view of ``set_accessed_by`` locations.""" 

220 return AccessedBySetProxy(self) 1edbcfgh

221 

222 @accessed_by.setter 

223 def accessed_by(self, locations: Iterable[Device | Host]) -> None: 

224 # Validate every target before issuing any cuMemAdvise so an invalid 

225 # element can't leave accessed_by partially mutated. 

226 target: set[Device | Host] = set() 1bc

227 for loc in locations: 1bc

228 if not isinstance(loc, (Device, Host)): 1bc

229 raise TypeError(f"accessed_by entries must be Device or Host, got {type(loc).__name__}") 

230 target.add(loc) 1bc

231 for loc in target: 1bc

232 spec = _coerce_location(loc) 1bc

233 assert spec is not None 1bc

234 if spec.kind not in ("device", "host"): 1bc

235 raise ValueError(f"advise {_SET_ACCESSED_BY.name} does not support location_type='{spec.kind}'") 1b

236 current = set(_query_accessed_by(self)) 1bc

237 for loc in current - target: 1bc

238 _advise_one(self, _UNSET_ACCESSED_BY, loc) 1c

239 for loc in target - current: 1bc

240 _advise_one(self, _SET_ACCESSED_BY, loc) 1bc

241 

242 def prefetch(self, location: Device | Host, *, stream: Stream | GraphBuilder) -> None: 

243 """Prefetch this range to ``location`` on ``stream``.""" 

244 _do_single_prefetch_py(self, location, stream) 1pqlnoghk

245 

246 def discard(self, *, stream: Stream | GraphBuilder) -> None: 

247 """Discard this range's resident pages on ``stream`` (CUDA 13+).""" 

248 _do_single_discard_py(self, stream) 1pq

249 

250 def discard_prefetch(self, location: Device | Host, *, stream: Stream | GraphBuilder) -> None: 

251 """Discard this range and prefetch to ``location`` on ``stream`` (CUDA 13+).""" 

252 _do_single_discard_prefetch_py(self, location, stream) 1lk