Coverage for cuda / core / _memory / _managed_memory_resource.pyx: 88.79%

116 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-22 01:37 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from cuda.bindings cimport cydriver 

8  

9from cuda.core._memory._memory_pool cimport _MemPool, _MP_allocate 

10from cuda.core._memory._memory_pool cimport MP_init_create_pool, MP_init_current_pool # no-cython-lint 

11from cuda.core._stream cimport Stream, Stream_accept 

12from cuda.core._utils.cuda_utils cimport HANDLE_RETURN 

13from cuda.core._utils.cuda_utils cimport check_or_create_options # no-cython-lint 

14from cuda.core._utils.cuda_utils import CUDAError # no-cython-lint 

15  

16from dataclasses import dataclass 

17import threading 

18import warnings 

19  

20from cuda.core._memory._managed_buffer import ManagedBuffer 

21from cuda.core.typing import ManagedMemoryLocationType 

22  

23__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions'] 

24  

25  

26@dataclass 

27cdef class ManagedMemoryResourceOptions: 

28 """Customizable :obj:`~_memory.ManagedMemoryResource` options. 

29  

30 Attributes 

31 ---------- 

32 preferred_location : int | None, optional 

33 A location identifier (device ordinal or NUMA node ID) whose 

34 meaning depends on ``preferred_location_type``. 

35 (Default to ``None``) 

36  

37 preferred_location_type : ManagedMemoryLocationType | str | None, optional 

38 Controls how ``preferred_location`` is interpreted. 

39  

40 When set to ``None`` (the default), legacy behavior is used: 

41 ``preferred_location`` is interpreted as a device ordinal, 

42 ``-1`` for host, or ``None`` for no preference. 

43  

44 When set explicitly, the type determines both the kind of 

45 preferred location and the valid values for 

46 ``preferred_location``: 

47  

48 - ``"device"``: prefer a specific GPU. ``preferred_location`` 

49 must be a device ordinal (``>= 0``). 

50 - ``"host"``: prefer host memory (OS-managed NUMA placement). 

51 ``preferred_location`` must be ``None``. 

52 - ``"host_numa"``: prefer a specific host NUMA node. 

53 ``preferred_location`` must be a NUMA node ID (``>= 0``), 

54 or ``None`` to derive the NUMA node from the current CUDA 

55 device's ``host_numa_id`` attribute (requires an active 

56 CUDA context). 

57  

58 (Default to ``None``) 

59 """ 

60 preferred_location: int | None = None 

61 preferred_location_type: ManagedMemoryLocationType | str | None = None 

62  

63  

64cdef class ManagedMemoryResource(_MemPool): 

65 """ 

66 A managed memory resource managing a stream-ordered memory pool. 

67  

68 Managed memory is accessible from both the host and device, with automatic 

69 migration between them as needed. 

70  

71 Parameters 

72 ---------- 

73 options : ManagedMemoryResourceOptions 

74 Memory resource creation options. 

75  

76 If set to `None`, the memory resource uses the driver's current 

77 stream-ordered memory pool. If no memory pool is set as current, 

78 the driver's default memory pool is used. 

79  

80 If not set to `None`, a new memory pool is created, which is owned by 

81 the memory resource. 

82  

83 When using an existing (current or default) memory pool, the returned 

84 managed memory resource does not own the pool (`is_handle_owned` is 

85 `False`), and closing the resource has no effect. 

86  

87 Notes 

88 ----- 

89 IPC (Inter-Process Communication) is not currently supported for managed 

90 memory pools. 

91 """ 

92  

93 def __init__(self, options=None): 

94 _MMR_init(self, options) 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

95  

96 def allocate(self, size_t size, *, stream: Stream): 

97 """Allocate a managed-memory buffer of the requested size. 

98  

99 Parameters 

100 ---------- 

101 size : int 

102 The size of the buffer to allocate, in bytes. 

103 stream : :obj:`~_stream.Stream` 

104 Keyword-only. The stream on which to perform the allocation 

105 asynchronously. Must be passed explicitly; pass 

106 ``device.default_stream`` to use the default stream. 

107  

108 Returns 

109 ------- 

110 ManagedBuffer 

111 A :class:`ManagedBuffer` (a :class:`Buffer` subclass) that 

112 exposes the property-style advice API 

113 (``read_mostly``, ``preferred_location``, ``accessed_by``) 

114 and instance methods (``prefetch``, ``discard``, 

115 ``discard_prefetch``). 

116 """ 

117 if self.is_mapped: 1VWXmnopYZ01234djifklg

118 raise TypeError("Cannot allocate from a mapped IPC-enabled memory resource") 

119 cdef Stream s = Stream_accept(stream) 1VWXmnopYZ01234djifklg

120 return _MP_allocate(self, size, s, ManagedBuffer) 1VWXmnopYZ01234djifklg

121  

122 @property 

123 def device_id(self) -> int: 

124 """The preferred device ordinal, or -1 if the preferred location is not a device.""" 

125 if self._pref_loc_type == "device": 1f

126 return self._pref_loc_id 1f

127 return -1 

128  

129 @property 

130 def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None: 

131 """The preferred location for managed memory allocations. 

132  

133 Returns ``None`` if no preferred location is set (driver decides), 

134 or a tuple ``(type, id)`` where *type* is one of ``"device"``, 

135 ``"host"``, or ``"host_numa"``, and *id* is the device ordinal, 

136 ``None`` (for ``"host"``), or the NUMA node ID, respectively. 

137 """ 

138 if self._pref_loc_type is None: 1qcbe

139 return None 1q

140 if self._pref_loc_type == "host": 1cbe

141 return (ManagedMemoryLocationType.HOST, None) 1e

142 return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id) 1cb

143  

144 @property 

145 def is_device_accessible(self) -> bool: 

146 """Return True. This memory resource provides device-accessible buffers.""" 

147 return True 1dji

148  

149 @property 

150 def is_host_accessible(self) -> bool: 

151 """Return True. This memory resource provides host-accessible buffers.""" 

152 return True 1dji

153  

154 @property 

155 def is_managed(self) -> bool: 

156 """Return True. This memory resource provides managed (unified) memory buffers.""" 

157 return True 1d

158  

159  

160IF CUDA_CORE_BUILD_MAJOR >= 13: 

161 cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa") 

162  

163  

164 cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts): 

165 """Resolve preferred location options into driver and stored values. 

166  

167 Returns a 4-tuple: 

168 (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id) 

169 """ 

170 cdef object pref_loc = opts.preferred_location if opts is not None else None 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

171 cdef object pref_type = opts.preferred_location_type if opts is not None else None 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

172  

173 if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

174 raise ValueError( 1a

175 f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} " 1a

176 f"or None, got {pref_type!r}" 1a

177 ) 

178  

179 if pref_type is None: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

180 # Legacy behavior 

181 if pref_loc is None: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

182 return ( 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

183 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE, 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

184 -1, None, -1, 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

185 ) 

186 if pref_loc == -1: 1dceaf

187 return ( 1e

188 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e

189 -1, "host", -1, 

190 ) 

191 if pref_loc < 0: 1dcaf

192 raise ValueError( 1a

193 f"preferred_location must be a device ordinal (>= 0), -1 for " 1a

194 f"host, or None for no preference, got {pref_loc}" 1a

195 ) 

196 return ( 1dcf

197 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1dcf

198 pref_loc, "device", pref_loc, 1dcf

199 ) 

200  

201 if pref_type == "device": 1hcbea

202 if pref_loc is None or pref_loc < 0: 1ca

203 raise ValueError( 1a

204 f"preferred_location must be a device ordinal (>= 0) when " 1a

205 f"preferred_location_type is 'device', got {pref_loc!r}" 1a

206 ) 

207 return ( 1c

208 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1c

209 pref_loc, "device", pref_loc, 1c

210 ) 

211  

212 if pref_type == "host": 1hbea

213 if pref_loc is not None: 1ea

214 raise ValueError( 1a

215 f"preferred_location must be None when " 1a

216 f"preferred_location_type is 'host', got {pref_loc!r}" 1a

217 ) 

218 return ( 1e

219 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e

220 -1, "host", -1, 

221 ) 

222  

223 # pref_type == "host_numa" 

224 if pref_loc is None: 1hba

225 from .._device import Device 1hb

226 dev = Device() 1hb

227 numa_id = dev.properties.host_numa_id 1hb

228 if numa_id < 0: 1hb

229 raise RuntimeError( 1h

230 "Cannot determine host NUMA ID for the current CUDA device. " 

231 "The system may not support NUMA, or no CUDA context is " 

232 "active. Set preferred_location to an explicit NUMA node ID " 

233 "or call Device.set_current() first." 

234 ) 

235 return ( 1b

236 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

237 numa_id, "host_numa", numa_id, 1b

238 ) 

239 if pref_loc < 0: 1ba

240 raise ValueError( 1a

241 f"preferred_location must be a NUMA node ID (>= 0) or None " 1a

242 f"when preferred_location_type is 'host_numa', got {pref_loc}" 1a

243 ) 

244 return ( 1b

245 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

246 pref_loc, "host_numa", pref_loc, 1b

247 ) 

248  

249  

250cdef inline _MMR_init(ManagedMemoryResource self, options): 

251 IF CUDA_CORE_BUILD_MAJOR >= 13: 

252 cdef ManagedMemoryResourceOptions opts = check_or_create_options( 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

253 ManagedMemoryResourceOptions, options, "ManagedMemoryResource options", 

254 keep_none=True 

255 ) 

256 cdef cydriver.CUmemLocationType loc_type 

257 cdef int loc_id 

258  

259 loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = ( 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

260 _resolve_preferred_location(opts) 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

261 ) 

262  

263 if opts is None: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

264 try: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

265 MP_init_current_pool( 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

266 self, 

267 loc_type, 

268 loc_id, 

269 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

270 ) 

271 except CUDAError as e: 

272 if "CUDA_ERROR_NOT_SUPPORTED" in str(e): 

273 from .._device import Device 

274 if not Device().properties.concurrent_managed_access: 

275 raise RuntimeError( 

276 "The default memory pool on this device does not support " 

277 "managed allocations (concurrent managed access is not " 

278 "available). Use " 

279 "ManagedMemoryResource(options=ManagedMemoryResourceOptions(...)) " 

280 "to create a dedicated managed pool." 

281 ) from e 

282 raise 

283 else: 

284 MP_init_create_pool( 1dcbeifrkstuvlwxg

285 self, 

286 loc_type, 

287 loc_id, 

288 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

289 False, 1dcbeifrkstuvlwxg

290 0, 

291 ) 

292  

293 _check_concurrent_managed_access() 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

294 ELSE: 

295 raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later") 

296  

297  

298cdef bint _concurrent_access_warned = False 

299cdef object _concurrent_access_lock = threading.Lock() 

300  

301  

302cdef inline _check_concurrent_managed_access(): 

303 """Warn once if the platform lacks concurrent managed memory access.""" 

304 global _concurrent_access_warned 

305 if _concurrent_access_warned: 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

306 return 1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

307  

308 cdef int c_concurrent = 0 1g

309 with _concurrent_access_lock: 1g

310 if _concurrent_access_warned: 1g

311 return 

312  

313 # concurrent_managed_access is a system-level attribute for sm_60 and 

314 # later, so any device will do. 

315 with nogil: 1g

316 HANDLE_RETURN(cydriver.cuDeviceGetAttribute( 1g

317 &c_concurrent, 

318 cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, 

319 0)) 

320 if not c_concurrent: 1g

321 warnings.warn( 

322 "This platform does not support concurrent managed memory access " 

323 "(Device.properties.concurrent_managed_access is False). Host access to any managed " 

324 "allocation is forbidden while any GPU kernel is in flight, even " 

325 "if the kernel does not touch that allocation. Failing to " 

326 "synchronize before host access will cause a segfault. " 

327 "See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/" 

328 "index.html#gpu-exclusive-access-to-managed-memory", 

329 UserWarning, 

330 stacklevel=3 

331 ) 

332  

333 _concurrent_access_warned = True 1g

334  

335  

336def reset_concurrent_access_warning(): 

337 """Reset the concurrent access warning flag for testing purposes.""" 

338 global _concurrent_access_warned 

339 _concurrent_access_warned = False