Coverage for cuda/core/_memory/_managed_memory_resource.pyx: 88.24%

119 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-13 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from cuda.bindings cimport cydriver 

8  

9from cuda.core._memory._memory_pool cimport _MemPool, _MP_allocate 

10from cuda.core._memory._memory_pool cimport MP_init_create_pool, MP_init_current_pool # no-cython-lint 

11from cuda.core._stream cimport Stream, Stream_accept 

12from cuda.core._utils.cuda_utils cimport HANDLE_RETURN 

13from cuda.core._utils.cuda_utils cimport check_or_create_options # no-cython-lint 

14from cuda.core._utils.cuda_utils import CUDAError # no-cython-lint 

15  

16from dataclasses import dataclass 

17import threading 

18from typing import TYPE_CHECKING 

19import warnings 

20  

21from cuda.core._memory._managed_buffer import ManagedBuffer 

22from cuda.core.typing import ManagedMemoryLocationType 

23  

24if TYPE_CHECKING: 

25 from cuda.core.graph import GraphBuilder 

26  

27__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions'] 

28  

29  

30@dataclass 

31cdef class ManagedMemoryResourceOptions: 

32 """Customizable :obj:`~_memory.ManagedMemoryResource` options. 

33  

34 Attributes 

35 ---------- 

36 preferred_location : int | None, optional 

37 A location identifier (device ordinal or NUMA node ID) whose 

38 meaning depends on ``preferred_location_type``. 

39 (Default to ``None``) 

40  

41 preferred_location_type : ManagedMemoryLocationType | str | None, optional 

42 Controls how ``preferred_location`` is interpreted. 

43  

44 When set to ``None`` (the default), legacy behavior is used: 

45 ``preferred_location`` is interpreted as a device ordinal, 

46 ``-1`` for host, or ``None`` for no preference. 

47  

48 When set explicitly, the type determines both the kind of 

49 preferred location and the valid values for 

50 ``preferred_location``: 

51  

52 - ``"device"``: prefer a specific GPU. ``preferred_location`` 

53 must be a device ordinal (``>= 0``). 

54 - ``"host"``: prefer host memory (OS-managed NUMA placement). 

55 ``preferred_location`` must be ``None``. 

56 - ``"host_numa"``: prefer a specific host NUMA node. 

57 ``preferred_location`` must be a NUMA node ID (``>= 0``), 

58 or ``None`` to derive the NUMA node from the current CUDA 

59 device's ``host_numa_id`` attribute (requires an active 

60 CUDA context). 

61  

62 (Default to ``None``) 

63 """ 

64 preferred_location: int | None = None 

65 preferred_location_type: ManagedMemoryLocationType | str | None = None 

66  

67  

68cdef class ManagedMemoryResource(_MemPool): 

69 """ 

70 A managed memory resource managing a stream-ordered memory pool. 

71  

72 Managed memory is accessible from both the host and device, with automatic 

73 migration between them as needed. 

74  

75 Parameters 

76 ---------- 

77 options : ManagedMemoryResourceOptions 

78 Memory resource creation options. 

79  

80 If set to `None`, the memory resource uses the driver's current 

81 stream-ordered memory pool. If no memory pool is set as current, 

82 the driver's default memory pool is used. 

83  

84 If not set to `None`, a new memory pool is created, which is owned by 

85 the memory resource. 

86  

87 When using an existing (current or default) memory pool, the returned 

88 managed memory resource does not own the pool (`is_handle_owned` is 

89 `False`), and closing the resource has no effect. 

90  

91 Notes 

92 ----- 

93 IPC (Inter-Process Communication) is not currently supported for managed 

94 memory pools. 

95 """ 

96  

97 def __init__(self, options: ManagedMemoryResourceOptions | dict[str, object] | None = None) -> None: 

98 _MMR_init(self, options) 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

99  

100 def allocate(self, size_t size, *, stream: Stream | GraphBuilder) -> ManagedBuffer: 

101 """Allocate a managed-memory buffer of the requested size. 

102  

103 Parameters 

104 ---------- 

105 size : int 

106 The size of the buffer to allocate, in bytes. 

107 stream : :obj:`~_stream.Stream` 

108 Keyword-only. The stream on which to perform the allocation 

109 asynchronously. Must be passed explicitly; pass 

110 ``device.default_stream`` to use the default stream. 

111  

112 Returns 

113 ------- 

114 ManagedBuffer 

115 A :class:`ManagedBuffer` (a :class:`Buffer` subclass) that 

116 exposes the property-style advice API 

117 (``read_mostly``, ``preferred_location``, ``accessed_by``) 

118 and instance methods (``prefetch``, ``discard``, 

119 ``discard_prefetch``). 

120 """ 

121 assert isinstance(stream, Stream), "Only Stream is supported for managed memory allocations" 1VWXnopqYZ01234djhfklm

122 if self.is_mapped: 1VWXnopqYZ01234djhfklm

123 raise TypeError("Cannot allocate from a mapped IPC-enabled memory resource") 

124 cdef Stream s = Stream_accept(stream) 1VWXnopqYZ01234djhfklm

125 return _MP_allocate(self, size, s, ManagedBuffer) 1VWXnopqYZ01234djhfklm

126  

127 @property 

128 def device_id(self) -> int: 

129 """The preferred device ordinal, or -1 if the preferred location is not a device.""" 

130 if self._pref_loc_type == "device": 1f

131 return self._pref_loc_id 1f

132 return -1 

133  

134 @property 

135 def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None: 

136 """The preferred location for managed memory allocations. 

137  

138 Returns ``None`` if no preferred location is set (driver decides), 

139 or a tuple ``(type, id)`` where *type* is one of ``"device"``, 

140 ``"host"``, or ``"host_numa"``, and *id* is the device ordinal, 

141 ``None`` (for ``"host"``), or the NUMA node ID, respectively. 

142 """ 

143 if self._pref_loc_type is None: 1scbe

144 return None 1s

145 if self._pref_loc_type == "host": 1cbe

146 return (ManagedMemoryLocationType.HOST, None) 1e

147 return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id) 1cb

148  

149 @property 

150 def is_device_accessible(self) -> bool: 

151 """Return True. This memory resource provides device-accessible buffers.""" 

152 return True 1djh

153  

154 @property 

155 def is_host_accessible(self) -> bool: 

156 """Return True. This memory resource provides host-accessible buffers.""" 

157 return True 1djh

158  

159 @property 

160 def is_managed(self) -> bool: 

161 """Return True. This memory resource provides managed (unified) memory buffers.""" 

162 return True 1d

163  

164  

165IF CUDA_CORE_BUILD_MAJOR >= 13: 

166 cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa") 

167  

168  

169 cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts): 

170 """Resolve preferred location options into driver and stored values. 

171  

172 Returns a 4-tuple: 

173 (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id) 

174 """ 

175 cdef object pref_loc = opts.preferred_location if opts is not None else None 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

176 cdef object pref_type = opts.preferred_location_type if opts is not None else None 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

177  

178 if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

179 raise ValueError( 1a

180 f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} " 1a

181 f"or None, got {pref_type!r}" 1a

182 ) 

183  

184 if pref_type is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

185 # Legacy behavior 

186 if pref_loc is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

187 return ( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

188 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE, 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

189 -1, None, -1, 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

190 ) 

191 if pref_loc == -1: 1dceaf

192 return ( 1e

193 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e

194 -1, "host", -1, 

195 ) 

196 if pref_loc < 0: 1dcaf

197 raise ValueError( 1a

198 f"preferred_location must be a device ordinal (>= 0), -1 for " 1a

199 f"host, or None for no preference, got {pref_loc}" 1a

200 ) 

201 return ( 1dcf

202 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1dcf

203 pref_loc, "device", pref_loc, 1dcf

204 ) 

205  

206 if pref_type == "device": 1gcbea

207 if pref_loc is None or pref_loc < 0: 1ca

208 raise ValueError( 1a

209 f"preferred_location must be a device ordinal (>= 0) when " 1a

210 f"preferred_location_type is 'device', got {pref_loc!r}" 1a

211 ) 

212 return ( 1c

213 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1c

214 pref_loc, "device", pref_loc, 1c

215 ) 

216  

217 if pref_type == "host": 1gbea

218 if pref_loc is not None: 1ea

219 raise ValueError( 1a

220 f"preferred_location must be None when " 1a

221 f"preferred_location_type is 'host', got {pref_loc!r}" 1a

222 ) 

223 return ( 1e

224 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e

225 -1, "host", -1, 

226 ) 

227  

228 # pref_type == "host_numa" 

229 if pref_loc is None: 1gba

230 from .._device import Device 1gb

231 dev = Device() 1gb

232 numa_id = dev.properties.host_numa_id 1gb

233 if numa_id < 0: 1gb

234 raise RuntimeError( 1g

235 "Cannot determine host NUMA ID for the current CUDA device. " 

236 "The system may not support NUMA, or no CUDA context is " 

237 "active. Set preferred_location to an explicit NUMA node ID " 

238 "or call Device.set_current() first." 

239 ) 

240 return ( 1b

241 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

242 numa_id, "host_numa", numa_id, 1b

243 ) 

244 if pref_loc < 0: 1ba

245 raise ValueError( 1a

246 f"preferred_location must be a NUMA node ID (>= 0) or None " 1a

247 f"when preferred_location_type is 'host_numa', got {pref_loc}" 1a

248 ) 

249 return ( 1b

250 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

251 pref_loc, "host_numa", pref_loc, 1b

252 ) 

253  

254  

255cdef inline _MMR_init(ManagedMemoryResource self, options): 

256 IF CUDA_CORE_BUILD_MAJOR >= 13: 

257 cdef ManagedMemoryResourceOptions opts = check_or_create_options( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

258 ManagedMemoryResourceOptions, options, "ManagedMemoryResource options", 

259 keep_none=True 

260 ) 

261 cdef cydriver.CUmemLocationType loc_type 

262 cdef int loc_id 

263  

264 loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = ( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

265 _resolve_preferred_location(opts) 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

266 ) 

267  

268 if opts is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

269 try: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

270 MP_init_current_pool( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

271 self, 

272 loc_type, 

273 loc_id, 

274 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

275 ) 

276 except CUDAError as e: 

277 if "CUDA_ERROR_NOT_SUPPORTED" in str(e): 

278 from .._device import Device 

279 if not Device().properties.concurrent_managed_access: 

280 raise RuntimeError( 

281 "The default memory pool on this device does not support " 

282 "managed allocations (concurrent managed access is not " 

283 "available). Use " 

284 "ManagedMemoryResource(options=ManagedMemoryResourceOptions(...)) " 

285 "to create a dedicated managed pool." 

286 ) from e 

287 raise 

288 else: 

289 MP_init_create_pool( 1dcbehftkuvwxlyzm

290 self, 

291 loc_type, 

292 loc_id, 

293 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

294 False, 1dcbehftkuvwxlyzm

295 0, 

296 ) 

297  

298 _check_concurrent_managed_access() 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

299 ELSE: 

300 raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later") 

301  

302  

303cdef bint _concurrent_access_warned = False 

304cdef object _concurrent_access_lock = threading.Lock() 

305  

306  

307cdef inline _check_concurrent_managed_access(): 

308 """Warn once if the platform lacks concurrent managed memory access.""" 

309 global _concurrent_access_warned 

310 if _concurrent_access_warned: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

311 return 1ABCDnoEpFqGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm

312  

313 cdef int c_concurrent = 0 1i

314 with _concurrent_access_lock: 1i

315 if _concurrent_access_warned: 1i

316 return 

317  

318 # concurrent_managed_access is a system-level attribute for sm_60 and 

319 # later, so any device will do. 

320 with nogil: 1i

321 HANDLE_RETURN(cydriver.cuDeviceGetAttribute( 1i

322 &c_concurrent, 

323 cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, 

324 0)) 

325 if not c_concurrent: 1i

326 warnings.warn( 

327 "This platform does not support concurrent managed memory access " 

328 "(Device.properties.concurrent_managed_access is False). Host access to any managed " 

329 "allocation is forbidden while any GPU kernel is in flight, even " 

330 "if the kernel does not touch that allocation. Failing to " 

331 "synchronize before host access will cause a segfault. " 

332 "See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/" 

333 "index.html#gpu-exclusive-access-to-managed-memory", 

334 UserWarning, 

335 stacklevel=3 

336 ) 

337  

338 _concurrent_access_warned = True 1i

339  

340  

341def reset_concurrent_access_warning() -> None: 

342 """Reset the concurrent access warning flag for testing purposes.""" 

343 global _concurrent_access_warned 

344 _concurrent_access_warned = False