Coverage for cuda / core / _memory / _virtual_memory_resource.py: 88.75%
240 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from __future__ import annotations
7from dataclasses import dataclass, field
8from typing import TYPE_CHECKING, Iterable
10if TYPE_CHECKING:
11 from cuda.core._stream import Stream
13from cuda.core._device import Device
14from cuda.core._memory._buffer import Buffer, MemoryResource
15from cuda.core._utils.cuda_utils import (
16 Transaction,
17 check_or_create_options,
18 driver,
19)
20from cuda.core._utils.cuda_utils import (
21 _check_driver_error as raise_if_driver_error,
22)
23from cuda.core._utils.version import binding_version
24from cuda.core.typing import (
25 VirtualMemoryAccessType,
26 VirtualMemoryAllocationType,
27 VirtualMemoryGranularityType,
28 VirtualMemoryHandleType,
29 VirtualMemoryLocationType,
30)
32__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"]
35@dataclass
36class VirtualMemoryResourceOptions:
37 """A configuration object for the VirtualMemoryResource
38 Stores configuration information which tells the resource how to use the CUDA VMM APIs
40 Attributes
41 ----------
42 allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str
43 Controls the type of allocation.
44 location_type: :obj:`~_memory.VirtualMemoryLocationType` | str
45 Controls the location of the allocation.
46 handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str
47 Export handle type for the physical allocation. Use
48 ``"posix_fd"`` on Linux if you plan to
49 import/export the allocation (required for cuMemRetainAllocationHandle).
50 Use `None` if you don't need an exportable handle.
51 gpu_direct_rdma: bool
52 Hint that the allocation should be GDR-capable (if supported).
53 granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str
54 Controls granularity query and size rounding.
55 addr_hint: int
56 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide.
57 addr_align: int
58 Alignment for the VA reservation. If `None`, use the queried granularity.
59 peers: Iterable[int]
60 Extra device IDs that should be granted access in addition to ``device``.
61 self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
62 Access flags for the owning device.
63 peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
64 Access flags for peers.
65 """
67 allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED
68 location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE
69 handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD
70 granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED
71 gpu_direct_rdma: bool = False
72 addr_hint: int | None = 0
73 addr_align: int | None = None
74 peers: Iterable[int] = field(default_factory=tuple)
75 self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
76 peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
78 _a = driver.CUmemAccess_flags
79 _access_flags = { # noqa: RUF012
80 VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
81 VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ,
82 None: 0,
83 }
84 _h = driver.CUmemAllocationHandleType
85 _handle_types = { # noqa: RUF012
86 None: _h.CU_MEM_HANDLE_TYPE_NONE,
87 VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
88 VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT,
89 VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC,
90 }
91 _g = driver.CUmemAllocationGranularity_flags
92 _granularity = { # noqa: RUF012 1fa
93 VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED,
94 VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM,
95 }
96 _l = driver.CUmemLocationType 1fa
97 _location_type = { # noqa: RUF012
98 VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE,
99 VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST,
100 VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA,
101 VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,
102 }
103 _t = driver.CUmemAllocationType
104 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not
105 _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012
106 if binding_version() >= (13, 0, 0):
107 _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED
109 @staticmethod
110 def _access_to_flags(spec: str):
111 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1deacb
112 if flags is None: 1deacb
113 raise ValueError(f"Unknown access spec: {spec!r}")
114 return flags 1deacb
116 @staticmethod
117 def _allocation_type_to_driver(spec: str):
118 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1deacb
119 if alloc_type is None: 1deacb
120 raise ValueError(f"Unsupported allocation_type: {spec!r}")
121 return alloc_type 1deacb
123 @staticmethod
124 def _location_type_to_driver(spec: str):
125 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1deacb
126 if loc_type is None: 1deacb
127 raise ValueError(f"Unsupported location_type: {spec!r}")
128 return loc_type 1deacb
130 @staticmethod
131 def _handle_type_to_driver(spec: str):
132 if spec == "win32": 1deacb
133 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team")
134 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1deacb
135 if handle_type is None: 1deacb
136 raise ValueError(f"Unsupported handle_type: {spec!r}")
137 return handle_type 1deacb
139 @staticmethod
140 def _granularity_to_driver(spec: str):
141 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1deacb
142 if granularity is None: 1deacb
143 raise ValueError(f"Unsupported granularity: {spec!r}")
144 return granularity 1deacb
147class VirtualMemoryResource(MemoryResource):
148 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory.
150 Parameters
151 ----------
152 device_id : Device | int
153 Device for which a memory resource is constructed.
155 config : VirtualMemoryResourceOptions
156 A configuration object for the VirtualMemoryResource
159 Warning
160 -------
161 This is a low-level API that is provided only for convenience. Make sure you fully understand
162 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses
163 in cuda.core should already meet the common needs.
164 """
166 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None):
167 self.device = Device(device_id) 1deacb
168 self.config = check_or_create_options( 1deacb
169 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False
170 )
171 # Matches ("host", "host_numa", "host_numa_current")
172 if "host" in self.config.location_type: 1deacb
173 self.device = None
175 if not self.device and self.config.location_type == "device": 1deacb
176 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations")
178 if self.device and not self.device.properties.virtual_memory_management_supported: 1deacb
179 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support")
181 # Validate RDMA support if requested
182 if (
183 self.config.gpu_direct_rdma
184 and self.device is not None
185 and not self.device.properties.gpu_direct_rdma_supported
186 ):
187 raise RuntimeError("GPU Direct RDMA is not supported on this device")
189 @staticmethod
190 def _align_up(size: int, gran: int) -> int:
191 """
192 Align a size up to the nearest multiple of a granularity.
193 """
194 return (size + gran - 1) & ~(gran - 1) 1deacb
196 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer:
197 """
198 Grow an existing allocation using CUDA VMM, with a configurable policy.
200 This implements true growing allocations that preserve the base pointer
201 by extending the virtual address range and mapping additional physical memory.
203 This function uses transactional allocation: if any step fails, the original buffer is not modified and
204 all steps the function took are rolled back so a new allocation is not created.
206 Parameters
207 ----------
208 buf : Buffer
209 The existing buffer to grow
210 new_size : int
211 The new total size for the allocation
212 config : VirtualMemoryResourceOptions, optional
213 Configuration for the new physical memory chunks. If None, uses current config.
215 Returns
216 -------
217 Buffer
218 The same buffer with updated size and properties, preserving the original pointer
219 """
220 if config is not None: 1ab
221 self.config = config 1b
223 # Build allocation properties for new chunks
224 prop = driver.CUmemAllocationProp() 1ab
225 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab
226 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab
227 prop.location.id = self.device.device_id 1ab
228 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab
229 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab
230 prop.win32HandleMetaData = 0 1ab
232 # Query granularity
233 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab
234 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab
235 raise_if_driver_error(res) 1ab
237 # Calculate sizes
238 additional_size = new_size - buf.size 1ab
239 if additional_size <= 0: 1ab
240 # Same size: only update access policy if needed; avoid zero-sized driver calls
241 descs = self._build_access_descriptors(prop) 1ab
242 if descs: 1ab
243 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab
244 raise_if_driver_error(res) 1ab
245 return buf 1ab
247 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a
248 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a
249 aligned_prev_size = total_aligned_size - aligned_additional_size 1a
250 addr_align = self.config.addr_align or gran 1a
252 # Try to extend the existing VA range first
253 res, new_ptr = driver.cuMemAddressReserve( 1a
254 aligned_additional_size,
255 addr_align,
256 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range
257 0,
258 )
260 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a
261 # Check for specific errors that are not recoverable with the slow path
262 if res in ( 1a
263 driver.CUresult.CUDA_ERROR_INVALID_VALUE,
264 driver.CUresult.CUDA_ERROR_NOT_PERMITTED,
265 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED,
266 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED,
267 ):
268 raise_if_driver_error(res)
269 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a
270 raise_if_driver_error(res2) 1a
271 # Fallback: couldn't extend contiguously, need full remapping
272 return self._grow_allocation_slow_path( 1a
273 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align
274 )
275 else:
276 # Success! We can extend the VA range contiguously
277 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr)
279 def _grow_allocation_fast_path(
280 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int
281 ) -> Buffer:
282 """
283 Fast path for growing a virtual memory allocation when the new region can be
284 reserved contiguously after the existing buffer.
286 This function creates and maps new physical memory for the additional size,
287 sets access permissions, and updates the buffer size in place (the pointer
288 remains unchanged).
290 Args:
291 buf (Buffer):
292 The buffer to grow.
294 new_size (int):
295 The new total size in bytes.
297 prop (driver.CUmemAllocationProp):
298 Allocation properties for the new memory.
300 aligned_additional_size (int):
301 The size of the new region to allocate, aligned to granularity.
303 new_ptr (int):
304 The address of the newly reserved contiguous VA region (should
305 be at the end of the current buffer).
307 Returns:
308 Buffer: The same buffer object with its size updated to `new_size`.
309 """
310 with Transaction() as trans: 1c
311 # Create new physical memory for the additional size
312 trans.append( 1c
313 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])
314 )
315 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1c
316 raise_if_driver_error(res) 1c
317 # Register undo for creation
318 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1c
320 # Map the new physical memory to the extended VA range
321 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 1c
322 raise_if_driver_error(res) 1c
323 # Register undo for mapping
324 trans.append( 1c
325 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])
326 )
328 # Set access permissions for the new portion
329 descs = self._build_access_descriptors(prop) 1c
330 if descs: 1c
331 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 1c
332 raise_if_driver_error(res) 1c
334 # All succeeded, cancel undo actions
335 trans.commit() 1c
337 # Update the buffer size (pointer stays the same)
338 buf._size = new_size 1c
339 return buf 1c
341 def _grow_allocation_slow_path(
342 self,
343 buf: Buffer,
344 new_size: int,
345 prop: driver.CUmemAllocationProp,
346 aligned_additional_size: int,
347 total_aligned_size: int,
348 addr_align: int,
349 ) -> Buffer:
350 """
351 Slow path for growing a virtual memory allocation when the new region cannot be
352 reserved contiguously after the existing buffer.
354 This function reserves a new, larger virtual address (VA) range, remaps the old
355 physical memory to the beginning of the new VA range, creates and maps new physical
356 memory for the additional size, sets access permissions, and updates the buffer's
357 pointer and size.
359 Args:
360 buf (Buffer): The buffer to grow.
361 new_size (int): The new total size in bytes.
362 prop (driver.CUmemAllocationProp): Allocation properties for the new memory.
363 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity.
364 total_aligned_size (int): The total new size to reserve, aligned to granularity.
365 addr_align (int): The required address alignment for the new VA range.
367 Returns:
368 Buffer: The buffer object updated with the new pointer and size.
369 """
370 with Transaction() as trans: 1a
371 # Reserve a completely new, larger VA range
372 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a
373 raise_if_driver_error(res) 1a
374 # Register undo for VA reservation
375 trans.append( 1a
376 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])
377 )
379 # Get the old allocation handle for remapping
380 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a
381 raise_if_driver_error(result) 1a
382 # Register undo for old_handle
383 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a
385 # Unmap the old VA range (aligned previous size)
386 aligned_prev_size = total_aligned_size - aligned_additional_size 1a
387 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a
388 raise_if_driver_error(result) 1a
390 def _remap_old(): 1a
391 # Try to remap the old physical memory back to the original VA range
392 try:
393 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0)
394 raise_if_driver_error(res)
395 except Exception: # noqa: S110
396 # TODO: consider logging this exception
397 pass
399 trans.append(_remap_old) 1a
401 # Remap the old physical memory to the new VA range (aligned previous size)
402 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a
403 raise_if_driver_error(res) 1a
405 # Register undo for mapping
406 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a
408 # Create new physical memory for the additional size
409 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a
410 raise_if_driver_error(res) 1a
412 # Register undo for new physical memory
413 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a
415 # Map the new physical memory to the extended portion (aligned offset)
416 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a
417 raise_if_driver_error(res) 1a
419 # Register undo for mapping
420 trans.append( 1a
421 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error(
422 driver.cuMemUnmap(base + offs, s)[0]
423 )
424 )
426 # Set access permissions for the entire new range
427 descs = self._build_access_descriptors(prop) 1a
428 if descs: 1a
429 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a
430 raise_if_driver_error(res) 1a
432 # All succeeded, cancel undo actions
433 trans.commit() 1a
435 # Free the old VA range (aligned previous size)
436 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a
437 raise_if_driver_error(res2) 1a
439 # Invalidate the old buffer so its destructor won't try to free again
440 buf._clear() 1a
442 # Return a new Buffer for the new mapping
443 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a
445 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list:
446 """
447 Build access descriptors for memory access permissions.
449 Returns
450 -------
451 list
452 List of CUmemAccessDesc objects for setting memory access
453 """
454 descs = [] 1deacb
456 # Owner access
457 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1deacb
458 if owner_flags: 1deacb
459 d = driver.CUmemAccessDesc() 1deacb
460 d.location.type = prop.location.type 1deacb
461 d.location.id = prop.location.id 1deacb
462 d.flags = owner_flags 1deacb
463 descs.append(d) 1deacb
465 # Peer device access
466 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1deacb
467 if peer_flags: 1deacb
468 for peer_dev in self.config.peers: 1deacb
469 d = driver.CUmemAccessDesc()
470 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
471 d.location.id = int(peer_dev)
472 d.flags = peer_flags
473 descs.append(d)
475 return descs 1deacb
477 def allocate(self, size: int, *, stream: Stream | None = None) -> Buffer:
478 """
479 Allocate a buffer of the given size using CUDA virtual memory.
481 Parameters
482 ----------
483 size : int
484 The size in bytes of the buffer to allocate.
485 stream : Stream, optional
486 Keyword-only. Unused because virtual memory operations are
487 synchronous.
489 Returns
490 -------
491 Buffer
492 A Buffer object representing the allocated virtual memory.
494 Raises
495 ------
496 CUDAError
497 If any CUDA driver API call fails during allocation.
499 Notes
500 -----
501 This method uses transactional allocation: if any step fails, all resources
502 allocated so far are automatically cleaned up. The allocation is performed
503 with the configured granularity, access permissions, and peer access as
504 specified in the resource's configuration.
505 """
506 if stream is not None: 1deacb
507 from cuda.core._stream import Stream_accept
509 Stream_accept(stream)
511 config = self.config 1deacb
512 # ---- Build allocation properties ----
513 prop = driver.CUmemAllocationProp() 1deacb
514 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1deacb
515 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1deacb
516 prop.location.id = self.device.device_id if config.location_type == "device" else -1 1deacb
517 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1deacb
518 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1deacb
519 prop.win32HandleMetaData = 0 1deacb
521 # ---- Query and apply granularity ----
522 # Choose min vs recommended granularity per config
523 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1deacb
524 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1deacb
525 raise_if_driver_error(res) 1deacb
527 aligned_size = VirtualMemoryResource._align_up(size, gran) 1deacb
528 addr_align = config.addr_align or gran 1deacb
530 # ---- Transactional allocation ----
531 with Transaction() as trans: 1deacb
532 # ---- Create physical memory ----
533 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1deacb
534 raise_if_driver_error(res) 1deacb
535 # Register undo for physical memory
536 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1deacb
538 # ---- Reserve VA space ----
539 # Potentially, use a separate size for the VA reservation from the physical allocation size
540 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1deacb
541 raise_if_driver_error(res) 1deacb
542 # Register undo for VA reservation
543 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1deacb
545 # ---- Map physical memory into VA ----
546 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1deacb
547 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1deacb
548 raise_if_driver_error(res) 1deacb
550 # ---- Set access for owner + peers ----
551 descs = self._build_access_descriptors(prop) 1deacb
552 if descs: 1deacb
553 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1deacb
554 raise_if_driver_error(res) 1deacb
556 trans.commit() 1deacb
558 # Done — return a Buffer that tracks this VA range
559 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1deacb
560 return buf 1deacb
562 def deallocate(self, ptr: int, size: int, *, stream: Stream | None = None) -> None:
563 """
564 Deallocate memory on the device using CUDA VMM APIs.
566 Parameters
567 ----------
568 ptr : int
569 The pointer to the memory to deallocate.
570 size : int
571 The size in bytes of the memory to deallocate.
572 stream : Stream, optional
573 Keyword-only. Unused because virtual memory operations are
574 synchronous.
575 """
576 if stream is not None: 1deacb
577 from cuda.core._stream import Stream_accept 1deacb
579 Stream_accept(stream) 1deacb
580 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1deacb
581 raise_if_driver_error(result) 1deacb
582 (result,) = driver.cuMemUnmap(ptr, size) 1deab
583 raise_if_driver_error(result) 1deab
584 (result,) = driver.cuMemAddressFree(ptr, size) 1deab
585 raise_if_driver_error(result) 1deab
586 (result,) = driver.cuMemRelease(handle) 1deab
587 raise_if_driver_error(result) 1deab
589 @property
590 def is_device_accessible(self) -> bool:
591 """
592 Indicates whether the allocated memory is accessible from the device.
593 """
594 return self.config.location_type == "device"
596 @property
597 def is_host_accessible(self) -> bool:
598 """
599 Indicates whether the allocated memory is accessible from the host.
600 """
601 return self.config.location_type == "host"
603 @property
604 def device_id(self) -> int:
605 """
606 Get the device ID associated with this memory resource.
608 Returns:
609 int: CUDA device ID. -1 if the memory resource allocates host memory
610 """
611 return self.device.device_id if self.config.location_type == "device" else -1 1deb
613 def __repr__(self) -> str:
614 """
615 Return a string representation of the VirtualMemoryResource.
617 Returns:
618 str: A string describing the object
619 """
620 return f"<VirtualMemoryResource device={self.device}>"