Coverage for cuda/core/_memory/_virtual_memory_resource.py: 91.32%
242 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7from dataclasses import dataclass, field
8from typing import TYPE_CHECKING, Iterable
10if TYPE_CHECKING:
11 from cuda.core._stream import Stream
12 from cuda.core.graph import GraphBuilder
14from cuda.core._device import Device
15from cuda.core._memory._buffer import Buffer, MemoryResource
16from cuda.core._utils.cuda_utils import (
17 Transaction,
18 check_or_create_options,
19 driver,
20)
21from cuda.core._utils.cuda_utils import (
22 _check_driver_error as raise_if_driver_error,
23)
24from cuda.core._utils.version import binding_version
25from cuda.core.typing import (
26 DevicePointerType,
27 VirtualMemoryAccessType,
28 VirtualMemoryAllocationType,
29 VirtualMemoryGranularityType,
30 VirtualMemoryHandleType,
31 VirtualMemoryLocationType,
32)
34__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"]
37@dataclass
38class VirtualMemoryResourceOptions:
39 """A configuration object for the VirtualMemoryResource
40 Stores configuration information which tells the resource how to use the CUDA VMM APIs
42 Attributes
43 ----------
44 allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str
45 Controls the type of allocation.
46 location_type: :obj:`~_memory.VirtualMemoryLocationType` | str
47 Controls the location of the allocation.
48 handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str
49 Export handle type for the physical allocation. Use
50 ``"posix_fd"`` on Linux if you plan to
51 import/export the allocation (required for cuMemRetainAllocationHandle).
52 Use `None` if you don't need an exportable handle.
53 gpu_direct_rdma: bool
54 Hint that the allocation should be GDR-capable (if supported).
55 granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str
56 Controls granularity query and size rounding.
57 addr_hint: int
58 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide.
59 addr_align: int
60 Alignment for the VA reservation. If `None`, use the queried granularity.
61 peers: Iterable[int]
62 Extra device IDs that should be granted access in addition to ``device``.
63 self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
64 Access flags for the owning device.
65 peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
66 Access flags for peers.
67 """
69 allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED
70 location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE
71 handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD
72 granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED
73 gpu_direct_rdma: bool = False
74 addr_hint: int | None = 0
75 addr_align: int | None = None
76 peers: Iterable[int] = field(default_factory=tuple)
77 self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
78 peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
80 _a = driver.CUmemAccess_flags
81 _access_flags = { # noqa: RUF012
82 VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
83 VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ,
84 None: 0,
85 }
86 _h = driver.CUmemAllocationHandleType
87 _handle_types = { # noqa: RUF012
88 None: _h.CU_MEM_HANDLE_TYPE_NONE,
89 VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
90 VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT,
91 VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC,
92 }
93 _g = driver.CUmemAllocationGranularity_flags
94 _granularity = { # noqa: RUF012
95 VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED,
96 VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM,
97 }
98 _l = driver.CUmemLocationType 1ea
99 _location_type = { # noqa: RUF012 1ea
100 VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE,
101 VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST,
102 VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA,
103 VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,
104 }
105 _t = driver.CUmemAllocationType
106 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not
107 _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012
108 if binding_version() >= (13, 0, 0):
109 _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED
111 @staticmethod
112 def _access_to_flags(spec: VirtualMemoryAccessType | None) -> int:
113 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1cdafbh
114 if flags is None: 1cdafbh
115 raise ValueError(f"Unknown access spec: {spec!r}") 1h
116 return flags # type: ignore[no-any-return] 1cdafb
118 @staticmethod
119 def _allocation_type_to_driver(spec: VirtualMemoryAllocationType) -> int:
120 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1cdabi
121 if alloc_type is None: 1cdabi
122 raise ValueError(f"Unsupported allocation_type: {spec!r}") 1i
123 return alloc_type # type: ignore[no-any-return] 1cdab
125 @staticmethod
126 def _location_type_to_driver(spec: VirtualMemoryLocationType) -> int:
127 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1cdabj
128 if loc_type is None: 1cdabj
129 raise ValueError(f"Unsupported location_type: {spec!r}") 1j
130 return loc_type # type: ignore[no-any-return] 1cdab
132 @staticmethod
133 def _handle_type_to_driver(spec: VirtualMemoryHandleType | None) -> int:
134 if spec == "win32": 1cdablg
135 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 1l
136 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1cdabg
137 if handle_type is None: 1cdabg
138 raise ValueError(f"Unsupported handle_type: {spec!r}") 1g
139 return handle_type # type: ignore[no-any-return] 1cdab
141 @staticmethod
142 def _granularity_to_driver(spec: VirtualMemoryGranularityType) -> int:
143 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1cdabk
144 if granularity is None: 1cdabk
145 raise ValueError(f"Unsupported granularity: {spec!r}") 1k
146 return granularity # type: ignore[no-any-return] 1cdab
149class VirtualMemoryResource(MemoryResource):
150 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory.
152 Parameters
153 ----------
154 device_id : Device | int
155 Device for which a memory resource is constructed.
157 config : VirtualMemoryResourceOptions, optional
158 A configuration object for the VirtualMemoryResource
161 Warning
162 -------
163 This is a low-level API that is provided only for convenience. Make sure you fully understand
164 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses
165 in cuda.core should already meet the common needs.
166 """
168 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions | None = None) -> None:
169 self.device: Device | None = Device(device_id) 1cdafb
170 self.config: VirtualMemoryResourceOptions = check_or_create_options( # type: ignore[assignment] 1cdafb
171 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False
172 )
173 # Matches ("host", "host_numa", "host_numa_current")
174 if "host" in self.config.location_type: 1cdafb
175 self.device = None
177 if not self.device and self.config.location_type == "device": 1cdafb
178 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations")
180 if self.device and not self.device.properties.virtual_memory_management_supported: 1cdafb
181 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support")
183 # Validate RDMA support if requested
184 if (
185 self.config.gpu_direct_rdma
186 and self.device is not None
187 and not self.device.properties.gpu_direct_rdma_supported
188 ):
189 raise RuntimeError("GPU Direct RDMA is not supported on this device")
191 @staticmethod
192 def _align_up(size: int, gran: int) -> int:
193 """
194 Align a size up to the nearest multiple of a granularity.
195 """
196 return (size + gran - 1) & ~(gran - 1) 1cdab
198 def modify_allocation(
199 self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions | None = None
200 ) -> Buffer:
201 """
202 Grow an existing allocation using CUDA VMM, with a configurable policy.
204 This implements true growing allocations that preserve the base pointer
205 by extending the virtual address range and mapping additional physical memory.
207 This function uses transactional allocation: if any step fails, the original buffer is not modified and
208 all steps the function took are rolled back so a new allocation is not created.
210 Parameters
211 ----------
212 buf : Buffer
213 The existing buffer to grow
214 new_size : int
215 The new total size for the allocation
216 config : VirtualMemoryResourceOptions, optional
217 Configuration for the new physical memory chunks. If None, uses current config.
219 Returns
220 -------
221 Buffer
222 The same buffer with updated size and properties, preserving the original pointer
223 """
224 if config is not None: 1ab
225 self.config = config 1b
227 # Build allocation properties for new chunks
228 prop = driver.CUmemAllocationProp() 1ab
229 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab
230 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab
231 # Caller must not invoke modify_allocation on a host-located resource;
232 # we rely on the dataclass invariant that self.device is non-None for
233 # device-located resources (it's only None when location is host).
234 assert self.device is not None, "modify_allocation requires a device-located resource" 1ab
235 prop.location.id = self.device.device_id 1ab
236 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab
237 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab
238 prop.win32HandleMetaData = 0 1ab
240 # Query granularity
241 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab
242 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab
243 raise_if_driver_error(res) 1ab
245 # Calculate sizes
246 additional_size = new_size - buf.size 1ab
247 if additional_size <= 0: 1ab
248 # Same size: only update access policy if needed; avoid zero-sized driver calls
249 descs = self._build_access_descriptors(prop) 1ab
250 if descs: 1ab
251 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab
252 raise_if_driver_error(res) 1ab
253 return buf 1ab
255 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a
256 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a
257 aligned_prev_size = total_aligned_size - aligned_additional_size 1a
258 addr_align = self.config.addr_align or gran 1a
260 # Try to extend the existing VA range first
261 res, new_ptr = driver.cuMemAddressReserve( 1a
262 aligned_additional_size,
263 addr_align,
264 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range
265 0,
266 )
268 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a
269 # Check for specific errors that are not recoverable with the slow path
270 if res in ( 1a
271 driver.CUresult.CUDA_ERROR_INVALID_VALUE,
272 driver.CUresult.CUDA_ERROR_NOT_PERMITTED,
273 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED,
274 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED,
275 ):
276 raise_if_driver_error(res)
277 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a
278 raise_if_driver_error(res2) 1a
279 # Fallback: couldn't extend contiguously, need full remapping
280 return self._grow_allocation_slow_path( 1a
281 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align
282 )
283 else:
284 # Success! We can extend the VA range contiguously
285 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr)
287 def _grow_allocation_fast_path(
288 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int
289 ) -> Buffer:
290 """
291 Fast path for growing a virtual memory allocation when the new region can be
292 reserved contiguously after the existing buffer.
294 This function creates and maps new physical memory for the additional size,
295 sets access permissions, and updates the buffer size in place (the pointer
296 remains unchanged).
298 Args:
299 buf (Buffer):
300 The buffer to grow.
302 new_size (int):
303 The new total size in bytes.
305 prop (driver.CUmemAllocationProp):
306 Allocation properties for the new memory.
308 aligned_additional_size (int):
309 The size of the new region to allocate, aligned to granularity.
311 new_ptr (int):
312 The address of the newly reserved contiguous VA region (should
313 be at the end of the current buffer).
315 Returns:
316 Buffer: The same buffer object with its size updated to `new_size`.
317 """
318 with Transaction() as trans: 1f
319 # Create new physical memory for the additional size
320 trans.append( 1f
321 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])
322 )
323 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1f
324 raise_if_driver_error(res) 1f
325 # Register undo for creation
326 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1f
328 # Map the new physical memory to the extended VA range
329 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 1f
330 raise_if_driver_error(res) 1f
331 # Register undo for mapping
332 trans.append( 1f
333 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])
334 )
336 # Set access permissions for the new portion
337 descs = self._build_access_descriptors(prop) 1f
338 if descs: 1f
339 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 1f
340 raise_if_driver_error(res) 1f
342 # All succeeded, cancel undo actions
343 trans.commit() 1f
345 # Update the buffer size (pointer stays the same)
346 # TODO: #2049 This is a real bug, accessing _size which doesn't exist.
347 # Fix bug and remove the "type: ignore[attr-defined]" comment.
348 buf._size = new_size # type: ignore[attr-defined] 1f
349 return buf 1f
351 def _grow_allocation_slow_path(
352 self,
353 buf: Buffer,
354 new_size: int,
355 prop: driver.CUmemAllocationProp,
356 aligned_additional_size: int,
357 total_aligned_size: int,
358 addr_align: int,
359 ) -> Buffer:
360 """
361 Slow path for growing a virtual memory allocation when the new region cannot be
362 reserved contiguously after the existing buffer.
364 This function reserves a new, larger virtual address (VA) range, remaps the old
365 physical memory to the beginning of the new VA range, creates and maps new physical
366 memory for the additional size, sets access permissions, and updates the buffer's
367 pointer and size.
369 Args:
370 buf (Buffer): The buffer to grow.
371 new_size (int): The new total size in bytes.
372 prop (driver.CUmemAllocationProp): Allocation properties for the new memory.
373 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity.
374 total_aligned_size (int): The total new size to reserve, aligned to granularity.
375 addr_align (int): The required address alignment for the new VA range.
377 Returns:
378 Buffer: The buffer object updated with the new pointer and size.
379 """
380 with Transaction() as trans: 1a
381 # Reserve a completely new, larger VA range
382 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a
383 raise_if_driver_error(res) 1a
384 # Register undo for VA reservation
385 trans.append( 1a
386 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])
387 )
389 # Get the old allocation handle for remapping
390 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a
391 raise_if_driver_error(result) 1a
392 # Register undo for old_handle
393 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a
395 # Unmap the old VA range (aligned previous size)
396 aligned_prev_size = total_aligned_size - aligned_additional_size 1a
397 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a
398 raise_if_driver_error(result) 1a
400 def _remap_old() -> None: 1a
401 # Try to remap the old physical memory back to the original VA range
402 try:
403 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0)
404 raise_if_driver_error(res)
405 except Exception: # noqa: S110
406 # TODO: consider logging this exception
407 pass
409 trans.append(_remap_old) 1a
411 # Remap the old physical memory to the new VA range (aligned previous size)
412 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a
413 raise_if_driver_error(res) 1a
415 # Register undo for mapping
416 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a
418 # Create new physical memory for the additional size
419 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a
420 raise_if_driver_error(res) 1a
422 # Register undo for new physical memory
423 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a
425 # Map the new physical memory to the extended portion (aligned offset)
426 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a
427 raise_if_driver_error(res) 1a
429 # Register undo for mapping
430 trans.append( 1a
431 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error(
432 driver.cuMemUnmap(base + offs, s)[0]
433 )
434 )
436 # Set access permissions for the entire new range
437 descs = self._build_access_descriptors(prop) 1a
438 if descs: 1a
439 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a
440 raise_if_driver_error(res) 1a
442 # All succeeded, cancel undo actions
443 trans.commit() 1a
445 # Free the old VA range (aligned previous size)
446 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a
447 raise_if_driver_error(res2) 1a
449 # Invalidate the old buffer so its destructor won't try to free again
450 buf._clear() 1a
452 # Return a new Buffer for the new mapping
453 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a
455 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list[driver.CUmemAccessDesc]:
456 """
457 Build access descriptors for memory access permissions.
459 Returns
460 -------
461 list
462 List of CUmemAccessDesc objects for setting memory access
463 """
464 descs = [] 1cdafb
466 # Owner access
467 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1cdafb
468 if owner_flags: 1cdafb
469 d = driver.CUmemAccessDesc() 1cdafb
470 d.location.type = prop.location.type 1cdafb
471 d.location.id = prop.location.id 1cdafb
472 d.flags = owner_flags 1cdafb
473 descs.append(d) 1cdafb
475 # Peer device access
476 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1cdafb
477 if peer_flags: 1cdafb
478 for peer_dev in self.config.peers: 1cdafb
479 d = driver.CUmemAccessDesc()
480 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
481 d.location.id = int(peer_dev)
482 d.flags = peer_flags
483 descs.append(d)
485 return descs 1cdafb
487 def allocate(self, size: int, *, stream: Stream | GraphBuilder | None = None) -> Buffer:
488 """
489 Allocate a buffer of the given size using CUDA virtual memory.
491 Parameters
492 ----------
493 size : int
494 The size in bytes of the buffer to allocate.
495 stream : Stream, optional
496 Keyword-only. Unused because virtual memory operations are
497 synchronous.
499 Returns
500 -------
501 Buffer
502 A Buffer object representing the allocated virtual memory.
504 Raises
505 ------
506 CUDAError
507 If any CUDA driver API call fails during allocation.
509 Notes
510 -----
511 This method uses transactional allocation: if any step fails, all resources
512 allocated so far are automatically cleaned up. The allocation is performed
513 with the configured granularity, access permissions, and peer access as
514 specified in the resource's configuration.
515 """
516 if stream is not None: 1cdab
517 from cuda.core._stream import Stream_accept
519 Stream_accept(stream)
521 config = self.config 1cdab
522 # ---- Build allocation properties ----
523 prop = driver.CUmemAllocationProp() 1cdab
524 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1cdab
525 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1cdab
526 prop.location.id = self.device.device_id if self.device is not None else -1 1cdab
527 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1cdab
528 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1cdab
529 prop.win32HandleMetaData = 0 1cdab
531 # ---- Query and apply granularity ----
532 # Choose min vs recommended granularity per config
533 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1cdab
534 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1cdab
535 raise_if_driver_error(res) 1cdab
537 aligned_size = VirtualMemoryResource._align_up(size, gran) 1cdab
538 addr_align = config.addr_align or gran 1cdab
540 # ---- Transactional allocation ----
541 with Transaction() as trans: 1cdab
542 # ---- Create physical memory ----
543 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1cdab
544 raise_if_driver_error(res) 1cdab
545 # Register undo for physical memory
546 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1cdab
548 # ---- Reserve VA space ----
549 # Potentially, use a separate size for the VA reservation from the physical allocation size
550 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1cdab
551 raise_if_driver_error(res) 1cdab
552 # Register undo for VA reservation
553 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1cdab
555 # ---- Map physical memory into VA ----
556 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1cdab
557 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1cdab
558 raise_if_driver_error(res) 1cdab
560 # ---- Set access for owner + peers ----
561 descs = self._build_access_descriptors(prop) 1cdab
562 if descs: 1cdab
563 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1cdab
564 raise_if_driver_error(res) 1cdab
566 trans.commit() 1cdab
568 # Done — return a Buffer that tracks this VA range
569 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1cdab
570 return buf 1cdab
572 def deallocate(self, ptr: DevicePointerType, size: int, *, stream: Stream | GraphBuilder | None = None) -> None:
573 """
574 Deallocate memory on the device using CUDA VMM APIs.
576 Parameters
577 ----------
578 ptr : DevicePointerType
579 The pointer to the memory to deallocate.
580 size : int
581 The size in bytes of the memory to deallocate.
582 stream : Stream, optional
583 Keyword-only. Unused because virtual memory operations are
584 synchronous.
585 """
586 ptr = 0 if ptr is None else int(ptr) 1cdab
588 if stream is not None: 1cdab
589 from cuda.core._stream import Stream_accept 1cdab
591 Stream_accept(stream) 1cdab
592 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1cdab
593 raise_if_driver_error(result) 1cdab
594 (result,) = driver.cuMemUnmap(ptr, size) 1cdab
595 raise_if_driver_error(result) 1cdab
596 (result,) = driver.cuMemAddressFree(ptr, size) 1cdab
597 raise_if_driver_error(result) 1cdab
598 (result,) = driver.cuMemRelease(handle) 1cdab
599 raise_if_driver_error(result) 1cdab
601 @property
602 def is_device_accessible(self) -> bool:
603 """
604 Indicates whether the allocated memory is accessible from the device.
605 """
606 return self.config.location_type == "device"
608 @property
609 def is_host_accessible(self) -> bool:
610 """
611 Indicates whether the allocated memory is accessible from the host.
612 """
613 return self.config.location_type == "host"
615 @property
616 def device_id(self) -> int:
617 """
618 Get the device ID associated with this memory resource.
620 Returns:
621 int: CUDA device ID. -1 if the memory resource allocates host memory
622 """
623 return self.device.device_id if self.device is not None else -1 1cdb
625 def __repr__(self) -> str:
626 """
627 Return a string representation of the VirtualMemoryResource.
629 Returns:
630 str: A string describing the object
631 """
632 return f"<VirtualMemoryResource device={self.device}>"