Coverage for cuda/core/_memory/_managed_memory

3# SPDX-License-Identifier: Apache-2.0

5from __future__ import annotations

7from cuda.bindings cimport cydriver

9from cuda.core._memory._memory_pool cimport _MemPool, _MP_allocate

10from cuda.core._memory._memory_pool cimport MP_init_create_pool, MP_init_current_pool # no-cython-lint

11from cuda.core._stream cimport Stream, Stream_accept

12from cuda.core._utils.cuda_utils cimport HANDLE_RETURN

13from cuda.core._utils.cuda_utils cimport check_or_create_options # no-cython-lint

14from cuda.core._utils.cuda_utils import CUDAError # no-cython-lint (empty)

16from dataclasses import dataclass (empty)

17import threading (empty)

18import warnings (empty)

20from cuda.core._memory._managed_buffer import ManagedBuffer (empty)

21from cuda.core.typing import ManagedMemoryLocationType (empty)

23__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions'] (empty)

26@dataclass (empty)

27cdef class ManagedMemoryResourceOptions:

28 """Customizable :obj:`~_memory.ManagedMemoryResource` options.

30 Attributes

31 ----------

32 preferred_location : int | None, optional

33 A location identifier (device ordinal or NUMA node ID) whose

34 meaning depends on ``preferred_location_type``.

35 (Default to ``None``)

37 preferred_location_type : ManagedMemoryLocationType | str | None, optional

38 Controls how ``preferred_location`` is interpreted.

40 When set to ``None`` (the default), legacy behavior is used:

41 ``preferred_location`` is interpreted as a device ordinal,

42 ``-1`` for host, or ``None`` for no preference.

44 When set explicitly, the type determines both the kind of

45 preferred location and the valid values for

46 ``preferred_location``:

48 - ``"device"``: prefer a specific GPU. ``preferred_location``

49 must be a device ordinal (``>= 0``).

50 - ``"host"``: prefer host memory (OS-managed NUMA placement).

51 ``preferred_location`` must be ``None``.

52 - ``"host_numa"``: prefer a specific host NUMA node.

53 ``preferred_location`` must be a NUMA node ID (``>= 0``),

54 or ``None`` to derive the NUMA node from the current CUDA

55 device's ``host_numa_id`` attribute (requires an active

56 CUDA context).

58 (Default to ``None``)

59 """

60 preferred_location: int | None = None (empty)

61 preferred_location_type: ManagedMemoryLocationType | str | None = None (empty)

64cdef class ManagedMemoryResource(_MemPool):

65 """

66 A managed memory resource managing a stream-ordered memory pool.

68 Managed memory is accessible from both the host and device, with automatic

69 migration between them as needed.

71 Parameters

72 ----------

73 options : ManagedMemoryResourceOptions

74 Memory resource creation options.

76 If set to `None`, the memory resource uses the driver's current

77 stream-ordered memory pool. If no memory pool is set as current,

78 the driver's default memory pool is used.

80 If not set to `None`, a new memory pool is created, which is owned by

81 the memory resource.

83 When using an existing (current or default) memory pool, the returned

84 managed memory resource does not own the pool (`is_handle_owned` is

85 `False`), and closing the resource has no effect.

87 Notes

88 -----

89 IPC (Inter-Process Communication) is not currently supported for managed

90 memory pools.

91 """

93 def __init__(self, options=None):

94 _MMR_init(self, options) 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

96 def allocate(self, size_t size, *, stream: Stream): (empty)

97 """Allocate a managed-memory buffer of the requested size.

99 Parameters

100 ----------

101 size : int

102 The size of the buffer to allocate, in bytes.

103 stream : :obj:`~_stream.Stream`

104 Keyword-only. The stream on which to perform the allocation

105 asynchronously. Must be passed explicitly; pass

106 ``device.default_stream`` to use the default stream.

107

108 Returns

109 -------

110 ManagedBuffer

111 A :class:`ManagedBuffer` (a :class:`Buffer` subclass) that

112 exposes the property-style advice API

113 (``read_mostly``, ``preferred_location``, ``accessed_by``)

114 and instance methods (``prefetch``, ``discard``,

115 ``discard_prefetch``).

116 """

117 if self.is_mapped: 21 ctx1VWXmnopYZ01234djifklg

118 raise TypeError("Cannot allocate from a mapped IPC-enabled memory resource")

119 cdef Stream s = Stream_accept(stream) 21 ctx1VWXmnopYZ01234djifklg

120 return _MP_allocate(self, size, s, ManagedBuffer) 21 ctx1VWXmnopYZ01234djifklg

121

122 @property

123 def device_id(self) -> int:

124 """The preferred device ordinal, or -1 if the preferred location is not a device."""

125 if self._pref_loc_type == "device": 1 ctx1f

126 return self._pref_loc_id 1 ctx1f

127 return -1

128

129 @property

130 def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None:

131 """The preferred location for managed memory allocations.

132

133 Returns ``None`` if no preferred location is set (driver decides),

134 or a tuple ``(type, id)`` where *type* is one of ``"device"``,

135 ``"host"``, or ``"host_numa"``, and *id* is the device ordinal,

136 ``None`` (for ``"host"``), or the NUMA node ID, respectively.

137 """

138 if self._pref_loc_type is None: 4 ctx1qcbe

139 return None 1 ctx1q

140 if self._pref_loc_type == "host": 3 ctx1cbe

141 return (ManagedMemoryLocationType.HOST, None) 1 ctx1e

142 return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id) 2 ctx1cb

143

144 @property

145 def is_device_accessible(self) -> bool:

146 """Return True. This memory resource provides device-accessible buffers."""

147 return True 3 ctx1dji

148

149 @property

150 def is_host_accessible(self) -> bool:

151 """Return True. This memory resource provides host-accessible buffers."""

152 return True 3 ctx1dji

153

154 @property

155 def is_managed(self) -> bool:

156 """Return True. This memory resource provides managed (unified) memory buffers."""

157 return True 1 ctx1d

158

159

160IF CUDA_CORE_BUILD_MAJOR >= 13:

161 cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa") (empty)

162

163

164 cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts): (empty)

165 """Resolve preferred location options into driver and stored values.

166

167 Returns a 4-tuple:

168 (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id)

169 """

170 cdef object pref_loc = opts.preferred_location if opts is not None else None 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

171 cdef object pref_type = opts.preferred_location_type if opts is not None else None 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

172

173 if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

174 raise ValueError( 1 ctx1a

175 f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} " 1 ctx1a

176 f"or None, got {pref_type!r}" 1 ctx1a

177 )

178

179 if pref_type is None: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

180 # Legacy behavior

181 if pref_loc is None: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

182 return ( 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

183 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE, 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

184 -1, None, -1, 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

185 )

186 if pref_loc == -1: 5 ctx1dceaf

187 return ( 1 ctx1e

188 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1 ctx1e

189 -1, "host", -1,

190 )

191 if pref_loc < 0: 4 ctx1dcaf

192 raise ValueError( 1 ctx1a

193 f"preferred_location must be a device ordinal (>= 0), -1 for " 1 ctx1a

194 f"host, or None for no preference, got {pref_loc}" 1 ctx1a

195 )

196 return ( 3 ctx1dcf

197 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 3 ctx1dcf

198 pref_loc, "device", pref_loc, 3 ctx1dcf

199 )

200

201 if pref_type == "device": 5 ctx1hcbea

202 if pref_loc is None or pref_loc < 0: 2 ctx1ca

203 raise ValueError( 1 ctx1a

204 f"preferred_location must be a device ordinal (>= 0) when " 1 ctx1a

205 f"preferred_location_type is 'device', got {pref_loc!r}" 1 ctx1a

206 )

207 return ( 1 ctx1c

208 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1 ctx1c

209 pref_loc, "device", pref_loc, 1 ctx1c

210 )

211

212 if pref_type == "host": 4 ctx1hbea

213 if pref_loc is not None: 2 ctx1ea

214 raise ValueError( 1 ctx1a

215 f"preferred_location must be None when " 1 ctx1a

216 f"preferred_location_type is 'host', got {pref_loc!r}" 1 ctx1a

217 )

218 return ( 1 ctx1e

219 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1 ctx1e

220 -1, "host", -1,

221 )

222

223 # pref_type == "host_numa"

224 if pref_loc is None: 3 ctx1hba

225 from .._device import Device 2 ctx1hb

226 dev = Device() 2 ctx1hb

227 numa_id = dev.properties.host_numa_id 2 ctx1hb

228 if numa_id < 0: 2 ctx1hb

229 raise RuntimeError( 1 ctx1h

230 "Cannot determine host NUMA ID for the current CUDA device. "

231 "The system may not support NUMA, or no CUDA context is "

232 "active. Set preferred_location to an explicit NUMA node ID "

233 "or call Device.set_current() first."

234 )

235 return ( 1 ctx1b

236 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1 ctx1b

237 numa_id, "host_numa", numa_id, 1 ctx1b

238 )

239 if pref_loc < 0: 2 ctx1ba

240 raise ValueError( 1 ctx1a

241 f"preferred_location must be a NUMA node ID (>= 0) or None " 1 ctx1a

242 f"when preferred_location_type is 'host_numa', got {pref_loc}" 1 ctx1a

243 )

244 return ( 1 ctx1b

245 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1 ctx1b

246 pref_loc, "host_numa", pref_loc, 1 ctx1b

247 )

248

249

250cdef inline _MMR_init(ManagedMemoryResource self, options): (empty)

251 IF CUDA_CORE_BUILD_MAJOR >= 13:

252 cdef ManagedMemoryResourceOptions opts = check_or_create_options( 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

253 ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",

254 keep_none=True

255 )

256 cdef cydriver.CUmemLocationType loc_type

257 cdef int loc_id

258

259 loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = ( 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

260 _resolve_preferred_location(opts) 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

261 )

262

263 if opts is None: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

264 try: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

265 MP_init_current_pool( 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

266 self,

267 loc_type,

268 loc_id,

269 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,

270 )

271 except CUDAError as e:

272 if "CUDA_ERROR_NOT_SUPPORTED" in str(e):

273 from .._device import Device

274 if not Device().properties.concurrent_managed_access:

275 raise RuntimeError(

276 "The default memory pool on this device does not support "

277 "managed allocations (concurrent managed access is not "

278 "available). Use "

279 "ManagedMemoryResource(options=ManagedMemoryResourceOptions(...)) "

280 "to create a dedicated managed pool."

281 ) from e

282 raise

283 else:

284 MP_init_create_pool( 16 ctx1dcbeifrkstuvlwxg

285 self,

286 loc_type,

287 loc_id,

288 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,

289 False, 16 ctx1dcbeifrkstuvlwxg

290 0,

291 )

292

293 _check_concurrent_managed_access() 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

294 ELSE:

295 raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")

296

297

298cdef bint _concurrent_access_warned = False (empty)

299cdef object _concurrent_access_lock = threading.Lock() (empty)

300

301

302cdef inline _check_concurrent_managed_access(): (empty)

303 """Warn once if the platform lacks concurrent managed memory access."""

304 global _concurrent_access_warned

305 if _concurrent_access_warned: 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

306 return 46 ctx1zABCmnDoEpFGHIJKLMdhjqcbeaifrNkOsPtQuRvSlTwUxg

307

308 cdef int c_concurrent = 0 1 ctx1g

309 with _concurrent_access_lock: 1 ctx1g

310 if _concurrent_access_warned: 1 ctx1g

311 return

312

313 # concurrent_managed_access is a system-level attribute for sm_60 and

314 # later, so any device will do.

315 with nogil: 1 ctx1g

316 HANDLE_RETURN(cydriver.cuDeviceGetAttribute( 1 ctx1g

317 &c_concurrent,

318 cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,

319 0))

320 if not c_concurrent: 1 ctx1g

321 warnings.warn(

322 "This platform does not support concurrent managed memory access "

323 "(Device.properties.concurrent_managed_access is False). Host access to any managed "

324 "allocation is forbidden while any GPU kernel is in flight, even "

325 "if the kernel does not touch that allocation. Failing to "

326 "synchronize before host access will cause a segfault. "

327 "See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/"

328 "index.html#gpu-exclusive-access-to-managed-memory",

329 UserWarning,

330 stacklevel=3

331 )

332

333 _concurrent_access_warned = True 1 ctx1g

334

335

336def reset_concurrent_access_warning(): (empty)

337 """Reset the concurrent access warning flag for testing purposes."""

338 global _concurrent_access_warned

339 _concurrent_access_warned = False

Coverage for cuda / core / _memory / _managed_memory_resource.pyx: 88.79%

116 statements