Coverage for cuda/core/_memory/_managed_memory_resource.pyx: 88.24%
119 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7from cuda.bindings cimport cydriver
9from cuda.core._memory._memory_pool cimport _MemPool, _MP_allocate
10from cuda.core._memory._memory_pool cimport MP_init_create_pool, MP_init_current_pool # no-cython-lint
11from cuda.core._stream cimport Stream, Stream_accept
12from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
13from cuda.core._utils.cuda_utils cimport check_or_create_options # no-cython-lint
14from cuda.core._utils.cuda_utils import CUDAError # no-cython-lint
16from dataclasses import dataclass
17import threading
18from typing import TYPE_CHECKING
19import warnings
21from cuda.core._memory._managed_buffer import ManagedBuffer
22from cuda.core.typing import ManagedMemoryLocationType
24if TYPE_CHECKING:
25 from cuda.core.graph import GraphBuilder
27__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions']
30@dataclass
31cdef class ManagedMemoryResourceOptions:
32 """Customizable :obj:`~_memory.ManagedMemoryResource` options.
34 Attributes
35 ----------
36 preferred_location : int | None, optional
37 A location identifier (device ordinal or NUMA node ID) whose
38 meaning depends on ``preferred_location_type``.
39 (Default to ``None``)
41 preferred_location_type : ManagedMemoryLocationType | str | None, optional
42 Controls how ``preferred_location`` is interpreted.
44 When set to ``None`` (the default), legacy behavior is used:
45 ``preferred_location`` is interpreted as a device ordinal,
46 ``-1`` for host, or ``None`` for no preference.
48 When set explicitly, the type determines both the kind of
49 preferred location and the valid values for
50 ``preferred_location``:
52 - ``"device"``: prefer a specific GPU. ``preferred_location``
53 must be a device ordinal (``>= 0``).
54 - ``"host"``: prefer host memory (OS-managed NUMA placement).
55 ``preferred_location`` must be ``None``.
56 - ``"host_numa"``: prefer a specific host NUMA node.
57 ``preferred_location`` must be a NUMA node ID (``>= 0``),
58 or ``None`` to derive the NUMA node from the current CUDA
59 device's ``host_numa_id`` attribute (requires an active
60 CUDA context).
62 (Default to ``None``)
63 """
64 preferred_location: int | None = None
65 preferred_location_type: ManagedMemoryLocationType | str | None = None
68cdef class ManagedMemoryResource(_MemPool):
69 """
70 A managed memory resource managing a stream-ordered memory pool.
72 Managed memory is accessible from both the host and device, with automatic
73 migration between them as needed.
75 Parameters
76 ----------
77 options : ManagedMemoryResourceOptions
78 Memory resource creation options.
80 If set to `None`, the memory resource uses the driver's current
81 stream-ordered memory pool. If no memory pool is set as current,
82 the driver's default memory pool is used.
84 If not set to `None`, a new memory pool is created, which is owned by
85 the memory resource.
87 When using an existing (current or default) memory pool, the returned
88 managed memory resource does not own the pool (`is_handle_owned` is
89 `False`), and closing the resource has no effect.
91 Notes
92 -----
93 IPC (Inter-Process Communication) is not currently supported for managed
94 memory pools.
95 """
97 def __init__(self, options: ManagedMemoryResourceOptions | dict[str, object] | None = None) -> None:
98 _MMR_init(self, options) 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
100 def allocate(self, size_t size, *, stream: Stream | GraphBuilder) -> ManagedBuffer:
101 """Allocate a managed-memory buffer of the requested size.
103 Parameters
104 ----------
105 size : int
106 The size of the buffer to allocate, in bytes.
107 stream : :obj:`~_stream.Stream`
108 Keyword-only. The stream on which to perform the allocation
109 asynchronously. Must be passed explicitly; pass
110 ``device.default_stream`` to use the default stream.
112 Returns
113 -------
114 ManagedBuffer
115 A :class:`ManagedBuffer` (a :class:`Buffer` subclass) that
116 exposes the property-style advice API
117 (``read_mostly``, ``preferred_location``, ``accessed_by``)
118 and instance methods (``prefetch``, ``discard``,
119 ``discard_prefetch``).
120 """
121 assert isinstance(stream, Stream), "Only Stream is supported for managed memory allocations" 1VWXnopqYZ01234djhfklm
122 if self.is_mapped: 1VWXnopqYZ01234djhfklm
123 raise TypeError("Cannot allocate from a mapped IPC-enabled memory resource")
124 cdef Stream s = Stream_accept(stream) 1VWXnopqYZ01234djhfklm
125 return _MP_allocate(self, size, s, ManagedBuffer) 1VWXnopqYZ01234djhfklm
127 @property
128 def device_id(self) -> int:
129 """The preferred device ordinal, or -1 if the preferred location is not a device."""
130 if self._pref_loc_type == "device": 1f
131 return self._pref_loc_id 1f
132 return -1
134 @property
135 def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None:
136 """The preferred location for managed memory allocations.
138 Returns ``None`` if no preferred location is set (driver decides),
139 or a tuple ``(type, id)`` where *type* is one of ``"device"``,
140 ``"host"``, or ``"host_numa"``, and *id* is the device ordinal,
141 ``None`` (for ``"host"``), or the NUMA node ID, respectively.
142 """
143 if self._pref_loc_type is None: 1scbe
144 return None 1s
145 if self._pref_loc_type == "host": 1cbe
146 return (ManagedMemoryLocationType.HOST, None) 1e
147 return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id) 1cb
149 @property
150 def is_device_accessible(self) -> bool:
151 """Return True. This memory resource provides device-accessible buffers."""
152 return True 1djh
154 @property
155 def is_host_accessible(self) -> bool:
156 """Return True. This memory resource provides host-accessible buffers."""
157 return True 1djh
159 @property
160 def is_managed(self) -> bool:
161 """Return True. This memory resource provides managed (unified) memory buffers."""
162 return True 1d
165IF CUDA_CORE_BUILD_MAJOR >= 13:
166 cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa")
169 cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts):
170 """Resolve preferred location options into driver and stored values.
172 Returns a 4-tuple:
173 (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id)
174 """
175 cdef object pref_loc = opts.preferred_location if opts is not None else None 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
176 cdef object pref_type = opts.preferred_location_type if opts is not None else None 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
178 if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
179 raise ValueError( 1a
180 f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} " 1a
181 f"or None, got {pref_type!r}" 1a
182 )
184 if pref_type is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
185 # Legacy behavior
186 if pref_loc is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
187 return ( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
188 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE, 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
189 -1, None, -1, 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
190 )
191 if pref_loc == -1: 1dceaf
192 return ( 1e
193 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e
194 -1, "host", -1,
195 )
196 if pref_loc < 0: 1dcaf
197 raise ValueError( 1a
198 f"preferred_location must be a device ordinal (>= 0), -1 for " 1a
199 f"host, or None for no preference, got {pref_loc}" 1a
200 )
201 return ( 1dcf
202 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1dcf
203 pref_loc, "device", pref_loc, 1dcf
204 )
206 if pref_type == "device": 1gcbea
207 if pref_loc is None or pref_loc < 0: 1ca
208 raise ValueError( 1a
209 f"preferred_location must be a device ordinal (>= 0) when " 1a
210 f"preferred_location_type is 'device', got {pref_loc!r}" 1a
211 )
212 return ( 1c
213 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1c
214 pref_loc, "device", pref_loc, 1c
215 )
217 if pref_type == "host": 1gbea
218 if pref_loc is not None: 1ea
219 raise ValueError( 1a
220 f"preferred_location must be None when " 1a
221 f"preferred_location_type is 'host', got {pref_loc!r}" 1a
222 )
223 return ( 1e
224 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1e
225 -1, "host", -1,
226 )
228 # pref_type == "host_numa"
229 if pref_loc is None: 1gba
230 from .._device import Device 1gb
231 dev = Device() 1gb
232 numa_id = dev.properties.host_numa_id 1gb
233 if numa_id < 0: 1gb
234 raise RuntimeError( 1g
235 "Cannot determine host NUMA ID for the current CUDA device. "
236 "The system may not support NUMA, or no CUDA context is "
237 "active. Set preferred_location to an explicit NUMA node ID "
238 "or call Device.set_current() first."
239 )
240 return ( 1b
241 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b
242 numa_id, "host_numa", numa_id, 1b
243 )
244 if pref_loc < 0: 1ba
245 raise ValueError( 1a
246 f"preferred_location must be a NUMA node ID (>= 0) or None " 1a
247 f"when preferred_location_type is 'host_numa', got {pref_loc}" 1a
248 )
249 return ( 1b
250 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b
251 pref_loc, "host_numa", pref_loc, 1b
252 )
255cdef inline _MMR_init(ManagedMemoryResource self, options):
256 IF CUDA_CORE_BUILD_MAJOR >= 13:
257 cdef ManagedMemoryResourceOptions opts = check_or_create_options( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
258 ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
259 keep_none=True
260 )
261 cdef cydriver.CUmemLocationType loc_type
262 cdef int loc_id
264 loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = ( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
265 _resolve_preferred_location(opts) 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
266 )
268 if opts is None: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
269 try: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
270 MP_init_current_pool( 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
271 self,
272 loc_type,
273 loc_id,
274 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
275 )
276 except CUDAError as e:
277 if "CUDA_ERROR_NOT_SUPPORTED" in str(e):
278 from .._device import Device
279 if not Device().properties.concurrent_managed_access:
280 raise RuntimeError(
281 "The default memory pool on this device does not support "
282 "managed allocations (concurrent managed access is not "
283 "available). Use "
284 "ManagedMemoryResource(options=ManagedMemoryResourceOptions(...)) "
285 "to create a dedicated managed pool."
286 ) from e
287 raise
288 else:
289 MP_init_create_pool( 1dcbehftkuvwxlyzm
290 self,
291 loc_type,
292 loc_id,
293 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
294 False, 1dcbehftkuvwxlyzm
295 0,
296 )
298 _check_concurrent_managed_access() 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
299 ELSE:
300 raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
303cdef bint _concurrent_access_warned = False
304cdef object _concurrent_access_lock = threading.Lock()
307cdef inline _check_concurrent_managed_access():
308 """Warn once if the platform lacks concurrent managed memory access."""
309 global _concurrent_access_warned
310 if _concurrent_access_warned: 1ABCDnoEpFqiGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
311 return 1ABCDnoEpFqGHIJKLMdgjscbeahftNkOuPvQwRxSlTyUzm
313 cdef int c_concurrent = 0 1i
314 with _concurrent_access_lock: 1i
315 if _concurrent_access_warned: 1i
316 return
318 # concurrent_managed_access is a system-level attribute for sm_60 and
319 # later, so any device will do.
320 with nogil: 1i
321 HANDLE_RETURN(cydriver.cuDeviceGetAttribute( 1i
322 &c_concurrent,
323 cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
324 0))
325 if not c_concurrent: 1i
326 warnings.warn(
327 "This platform does not support concurrent managed memory access "
328 "(Device.properties.concurrent_managed_access is False). Host access to any managed "
329 "allocation is forbidden while any GPU kernel is in flight, even "
330 "if the kernel does not touch that allocation. Failing to "
331 "synchronize before host access will cause a segfault. "
332 "See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/"
333 "index.html#gpu-exclusive-access-to-managed-memory",
334 UserWarning,
335 stacklevel=3
336 )
338 _concurrent_access_warned = True 1i
341def reset_concurrent_access_warning() -> None:
342 """Reset the concurrent access warning flag for testing purposes."""
343 global _concurrent_access_warned
344 _concurrent_access_warned = False