Coverage for cuda / core / experimental / _memory / _virtual_memory_resource.py: 83%

240 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-10 01:19 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass, field 

8from typing import TYPE_CHECKING, Iterable, Literal, Union 

9 

10from cuda.core.experimental._device import Device 

11from cuda.core.experimental._memory._buffer import Buffer, MemoryResource 

12from cuda.core.experimental._utils.cuda_utils import ( 

13 Transaction, 

14 check_or_create_options, 

15 driver, 

16 get_binding_version, 

17) 

18from cuda.core.experimental._utils.cuda_utils import ( 

19 _check_driver_error as raise_if_driver_error, 

20) 

21 

22if TYPE_CHECKING: 

23 from cuda.core.experimental._stream import Stream 

24 

25__all__ = ["VirtualMemoryResourceOptions", "VirtualMemoryResource"] 

26 

27VirtualMemoryHandleTypeT = Union[Literal["posix_fd", "generic", "win32", "win32_kmt", "fabric"], None] 

28VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"] 

29VirtualMemoryGranularityT = Literal["minimum", "recommended"] 

30VirtualMemoryAccessTypeT = Union[Literal["rw", "r"], None] 

31VirtualMemoryAllocationTypeT = Literal["pinned", "managed"] 

32 

33 

34@dataclass 

35class VirtualMemoryResourceOptions: 

36 """A configuration object for the VirtualMemoryResource 

37 Stores configuration information which tells the resource how to use the CUDA VMM APIs 

38 

39 Attributes 

40 ---------- 

41 allocation_type: :obj:`~_memory.VirtualMemoryAllocationTypeT` 

42 Controls the type of allocation. 

43 location_type: :obj:`~_memory.VirtualMemoryLocationTypeT` 

44 Controls the location of the allocation. 

45 handle_type: :obj:`~_memory.VirtualMemoryHandleTypeT` 

46 Export handle type for the physical allocation. Use 

47 ``"posix_fd"`` on Linux if you plan to 

48 import/export the allocation (required for cuMemRetainAllocationHandle). 

49 Use `None` if you don't need an exportable handle. 

50 gpu_direct_rdma: bool 

51 Hint that the allocation should be GDR-capable (if supported). 

52 granularity: :obj:`~_memory.VirtualMemoryGranularityT` 

53 Controls granularity query and size rounding. 

54 addr_hint: int 

55 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. 

56 addr_align: int 

57 Alignment for the VA reservation. If `None`, use the queried granularity. 

58 peers: Iterable[int] 

59 Extra device IDs that should be granted access in addition to ``device``. 

60 self_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

61 Access flags for the owning device. 

62 peer_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

63 Access flags for peers. 

64 """ 

65 

66 # Human-friendly strings; normalized in __post_init__ 

67 allocation_type: VirtualMemoryAllocationTypeT = "pinned" 

68 location_type: VirtualMemoryLocationTypeT = "device" 

69 handle_type: VirtualMemoryHandleTypeT = "posix_fd" 

70 granularity: VirtualMemoryGranularityT = "recommended" 

71 gpu_direct_rdma: bool = False 

72 addr_hint: int | None = 0 

73 addr_align: int | None = None 

74 peers: Iterable[int] = field(default_factory=tuple) 

75 self_access: VirtualMemoryAccessTypeT = "rw" 

76 peer_access: VirtualMemoryAccessTypeT = "rw" 

77 

78 _a = driver.CUmemAccess_flags 

79 _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} 

80 _h = driver.CUmemAllocationHandleType 

81 _handle_types = { 

82 None: _h.CU_MEM_HANDLE_TYPE_NONE, 

83 "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 

84 "win32": _h.CU_MEM_HANDLE_TYPE_WIN32, 

85 "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, 

86 "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC, 

87 } 

88 _g = driver.CUmemAllocationGranularity_flags 

89 _granularity = { 

90 "recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, 

91 "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, 

92 } 

93 _l = driver.CUmemLocationType 

94 _location_type = { 

95 "device": _l.CU_MEM_LOCATION_TYPE_DEVICE, 

96 "host": _l.CU_MEM_LOCATION_TYPE_HOST, 

97 "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, 

98 "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, 

99 } 

100 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not 

101 _a = driver.CUmemAllocationType 

102 _allocation_type = {"pinned": _a.CU_MEM_ALLOCATION_TYPE_PINNED} 

103 ver_major, ver_minor = get_binding_version() 

104 if ver_major >= 13: 

105 _allocation_type["managed"] = _a.CU_MEM_ALLOCATION_TYPE_MANAGED 

106 

107 @staticmethod 

108 def _access_to_flags(spec: str): 

109 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 

110 if flags is None: 

111 raise ValueError(f"Unknown access spec: {spec!r}") 

112 return flags 

113 

114 @staticmethod 

115 def _allocation_type_to_driver(spec: str): 

116 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 

117 if alloc_type is None: 

118 raise ValueError(f"Unsupported allocation_type: {spec!r}") 

119 return alloc_type 

120 

121 @staticmethod 

122 def _location_type_to_driver(spec: str): 

123 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 

124 if loc_type is None: 

125 raise ValueError(f"Unsupported location_type: {spec!r}") 

126 return loc_type 

127 

128 @staticmethod 

129 def _handle_type_to_driver(spec: str): 

130 if spec == "win32": 

131 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 

132 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 

133 if handle_type is None: 

134 raise ValueError(f"Unsupported handle_type: {spec!r}") 

135 return handle_type 

136 

137 @staticmethod 

138 def _granularity_to_driver(spec: str): 

139 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 

140 if granularity is None: 

141 raise ValueError(f"Unsupported granularity: {spec!r}") 

142 return granularity 

143 

144 

145class VirtualMemoryResource(MemoryResource): 

146 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory. 

147 

148 Parameters 

149 ---------- 

150 device_id : Device | int 

151 Device for which a memory resource is constructed. 

152 

153 config : VirtualMemoryResourceOptions 

154 A configuration object for the VirtualMemoryResource 

155 

156 

157 Warning 

158 ------- 

159 This is a low-level API that is provided only for convenience. Make sure you fully understand 

160 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses 

161 in cuda.core should already meet the common needs. 

162 """ 

163 

164 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): 

165 self.device = Device(device_id) 

166 self.config = check_or_create_options( 

167 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False 

168 ) 

169 # Matches ("host", "host_numa", "host_numa_current") 

170 if "host" in self.config.location_type: 

171 self.device = None 

172 

173 if not self.device and self.config.location_type == "device": 

174 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations") 

175 

176 if self.device and not self.device.properties.virtual_memory_management_supported: 

177 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support") 

178 

179 # Validate RDMA support if requested 

180 if ( 

181 self.config.gpu_direct_rdma 

182 and self.device is not None 

183 and not self.device.properties.gpu_direct_rdma_supported 

184 ): 

185 raise RuntimeError("GPU Direct RDMA is not supported on this device") 

186 

187 @staticmethod 

188 def _align_up(size: int, gran: int) -> int: 

189 """ 

190 Align a size up to the nearest multiple of a granularity. 

191 """ 

192 return (size + gran - 1) & ~(gran - 1) 

193 

194 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer: 

195 """ 

196 Grow an existing allocation using CUDA VMM, with a configurable policy. 

197 

198 This implements true growing allocations that preserve the base pointer 

199 by extending the virtual address range and mapping additional physical memory. 

200 

201 This function uses transactional allocation: if any step fails, the original buffer is not modified and 

202 all steps the function took are rolled back so a new allocation is not created. 

203 

204 Parameters 

205 ---------- 

206 buf : Buffer 

207 The existing buffer to grow 

208 new_size : int 

209 The new total size for the allocation 

210 config : VirtualMemoryResourceOptions, optional 

211 Configuration for the new physical memory chunks. If None, uses current config. 

212 

213 Returns 

214 ------- 

215 Buffer 

216 The same buffer with updated size and properties, preserving the original pointer 

217 """ 

218 if config is not None: 

219 self.config = config 

220 

221 # Build allocation properties for new chunks 

222 prop = driver.CUmemAllocationProp() 

223 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 

224 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 

225 prop.location.id = self.device.device_id 

226 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 

227 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 

228 prop.win32HandleMetaData = 0 

229 

230 # Query granularity 

231 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 

232 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 

233 raise_if_driver_error(res) 

234 

235 # Calculate sizes 

236 additional_size = new_size - buf.size 

237 if additional_size <= 0: 

238 # Same size: only update access policy if needed; avoid zero-sized driver calls 

239 descs = self._build_access_descriptors(prop) 

240 if descs: 

241 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 

242 raise_if_driver_error(res) 

243 return buf 

244 

245 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 

246 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 

247 aligned_prev_size = total_aligned_size - aligned_additional_size 

248 addr_align = self.config.addr_align or gran 

249 

250 # Try to extend the existing VA range first 

251 res, new_ptr = driver.cuMemAddressReserve( 

252 aligned_additional_size, 

253 addr_align, 

254 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range 

255 0, 

256 ) 

257 

258 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 

259 # Check for specific errors that are not recoverable with the slow path 

260 if res in ( 

261 driver.CUresult.CUDA_ERROR_INVALID_VALUE, 

262 driver.CUresult.CUDA_ERROR_NOT_PERMITTED, 

263 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 

264 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED, 

265 ): 

266 raise_if_driver_error(res) 

267 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 

268 raise_if_driver_error(res2) 

269 # Fallback: couldn't extend contiguously, need full remapping 

270 return self._grow_allocation_slow_path( 

271 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align 

272 ) 

273 else: 

274 # Success! We can extend the VA range contiguously 

275 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr) 

276 

277 def _grow_allocation_fast_path( 

278 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int 

279 ) -> Buffer: 

280 """ 

281 Fast path for growing a virtual memory allocation when the new region can be 

282 reserved contiguously after the existing buffer. 

283 

284 This function creates and maps new physical memory for the additional size, 

285 sets access permissions, and updates the buffer size in place (the pointer 

286 remains unchanged). 

287 

288 Args: 

289 buf (Buffer): 

290 The buffer to grow. 

291 

292 new_size (int): 

293 The new total size in bytes. 

294 

295 prop (driver.CUmemAllocationProp): 

296 Allocation properties for the new memory. 

297 

298 aligned_additional_size (int): 

299 The size of the new region to allocate, aligned to granularity. 

300 

301 new_ptr (int): 

302 The address of the newly reserved contiguous VA region (should 

303 be at the end of the current buffer). 

304 

305 Returns: 

306 Buffer: The same buffer object with its size updated to `new_size`. 

307 """ 

308 with Transaction() as trans: 

309 # Create new physical memory for the additional size 

310 trans.append( 

311 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

312 ) 

313 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 

314 raise_if_driver_error(res) 

315 # Register undo for creation 

316 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

317 

318 # Map the new physical memory to the extended VA range 

319 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 

320 raise_if_driver_error(res) 

321 # Register undo for mapping 

322 trans.append( 

323 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]) 

324 ) 

325 

326 # Set access permissions for the new portion 

327 descs = self._build_access_descriptors(prop) 

328 if descs: 

329 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 

330 raise_if_driver_error(res) 

331 

332 # All succeeded, cancel undo actions 

333 trans.commit() 

334 

335 # Update the buffer size (pointer stays the same) 

336 buf._size = new_size 

337 return buf 

338 

339 def _grow_allocation_slow_path( 

340 self, 

341 buf: Buffer, 

342 new_size: int, 

343 prop: driver.CUmemAllocationProp, 

344 aligned_additional_size: int, 

345 total_aligned_size: int, 

346 addr_align: int, 

347 ) -> Buffer: 

348 """ 

349 Slow path for growing a virtual memory allocation when the new region cannot be 

350 reserved contiguously after the existing buffer. 

351 

352 This function reserves a new, larger virtual address (VA) range, remaps the old 

353 physical memory to the beginning of the new VA range, creates and maps new physical 

354 memory for the additional size, sets access permissions, and updates the buffer's 

355 pointer and size. 

356 

357 Args: 

358 buf (Buffer): The buffer to grow. 

359 new_size (int): The new total size in bytes. 

360 prop (driver.CUmemAllocationProp): Allocation properties for the new memory. 

361 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity. 

362 total_aligned_size (int): The total new size to reserve, aligned to granularity. 

363 addr_align (int): The required address alignment for the new VA range. 

364 

365 Returns: 

366 Buffer: The buffer object updated with the new pointer and size. 

367 """ 

368 with Transaction() as trans: 

369 # Reserve a completely new, larger VA range 

370 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 

371 raise_if_driver_error(res) 

372 # Register undo for VA reservation 

373 trans.append( 

374 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

375 ) 

376 

377 # Get the old allocation handle for remapping 

378 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 

379 raise_if_driver_error(result) 

380 # Register undo for old_handle 

381 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

382 

383 # Unmap the old VA range (aligned previous size) 

384 aligned_prev_size = total_aligned_size - aligned_additional_size 

385 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 

386 raise_if_driver_error(result) 

387 

388 def _remap_old(): 

389 # Try to remap the old physical memory back to the original VA range 

390 try: 

391 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0) 

392 raise_if_driver_error(res) 

393 except Exception: # noqa: S110 

394 # TODO: consider logging this exception 

395 pass 

396 

397 trans.append(_remap_old) 

398 

399 # Remap the old physical memory to the new VA range (aligned previous size) 

400 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 

401 raise_if_driver_error(res) 

402 

403 # Register undo for mapping 

404 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 

405 

406 # Create new physical memory for the additional size 

407 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 

408 raise_if_driver_error(res) 

409 

410 # Register undo for new physical memory 

411 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

412 

413 # Map the new physical memory to the extended portion (aligned offset) 

414 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 

415 raise_if_driver_error(res) 

416 

417 # Register undo for mapping 

418 trans.append( 

419 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error( 

420 driver.cuMemUnmap(base + offs, s)[0] 

421 ) 

422 ) 

423 

424 # Set access permissions for the entire new range 

425 descs = self._build_access_descriptors(prop) 

426 if descs: 

427 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 

428 raise_if_driver_error(res) 

429 

430 # All succeeded, cancel undo actions 

431 trans.commit() 

432 

433 # Free the old VA range (aligned previous size) 

434 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 

435 raise_if_driver_error(res2) 

436 

437 # Invalidate the old buffer so its destructor won't try to free again 

438 buf._clear() 

439 

440 # Return a new Buffer for the new mapping 

441 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 

442 

443 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list: 

444 """ 

445 Build access descriptors for memory access permissions. 

446 

447 Returns 

448 ------- 

449 list 

450 List of CUmemAccessDesc objects for setting memory access 

451 """ 

452 descs = [] 

453 

454 # Owner access 

455 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 

456 if owner_flags: 

457 d = driver.CUmemAccessDesc() 

458 d.location.type = prop.location.type 

459 d.location.id = prop.location.id 

460 d.flags = owner_flags 

461 descs.append(d) 

462 

463 # Peer device access 

464 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 

465 if peer_flags: 

466 for peer_dev in self.config.peers: 

467 d = driver.CUmemAccessDesc() 

468 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE 

469 d.location.id = int(peer_dev) 

470 d.flags = peer_flags 

471 descs.append(d) 

472 

473 return descs 

474 

475 def allocate(self, size: int, stream: Stream | None = None) -> Buffer: 

476 """ 

477 Allocate a buffer of the given size using CUDA virtual memory. 

478 

479 Parameters 

480 ---------- 

481 size : int 

482 The size in bytes of the buffer to allocate. 

483 stream : Stream, optional 

484 CUDA stream to associate with the allocation (not currently supported). 

485 

486 Returns 

487 ------- 

488 Buffer 

489 A Buffer object representing the allocated virtual memory. 

490 

491 Raises 

492 ------ 

493 NotImplementedError 

494 If a stream is provided or if the location type is not device memory. 

495 CUDAError 

496 If any CUDA driver API call fails during allocation. 

497 

498 Notes 

499 ----- 

500 This method uses transactional allocation: if any step fails, all resources 

501 allocated so far are automatically cleaned up. The allocation is performed 

502 with the configured granularity, access permissions, and peer access as 

503 specified in the resource's configuration. 

504 """ 

505 if stream is not None: 

506 raise NotImplementedError("Stream is not supported with VirtualMemoryResource") 

507 

508 config = self.config 

509 # ---- Build allocation properties ---- 

510 prop = driver.CUmemAllocationProp() 

511 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 

512 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 

513 prop.location.id = self.device.device_id if config.location_type == "device" else -1 

514 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 

515 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 

516 prop.win32HandleMetaData = 0 

517 

518 # ---- Query and apply granularity ---- 

519 # Choose min vs recommended granularity per config 

520 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 

521 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 

522 raise_if_driver_error(res) 

523 

524 aligned_size = VirtualMemoryResource._align_up(size, gran) 

525 addr_align = config.addr_align or gran 

526 

527 # ---- Transactional allocation ---- 

528 with Transaction() as trans: 

529 # ---- Create physical memory ---- 

530 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 

531 raise_if_driver_error(res) 

532 # Register undo for physical memory 

533 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

534 

535 # ---- Reserve VA space ---- 

536 # Potentially, use a separate size for the VA reservation from the physical allocation size 

537 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 

538 raise_if_driver_error(res) 

539 # Register undo for VA reservation 

540 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 

541 

542 # ---- Map physical memory into VA ---- 

543 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 

544 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 

545 raise_if_driver_error(res) 

546 

547 # ---- Set access for owner + peers ---- 

548 descs = self._build_access_descriptors(prop) 

549 if descs: 

550 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 

551 raise_if_driver_error(res) 

552 

553 trans.commit() 

554 

555 # Done — return a Buffer that tracks this VA range 

556 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 

557 return buf 

558 

559 def deallocate(self, ptr: int, size: int, stream: Stream | None = None) -> None: 

560 """ 

561 Deallocate memory on the device using CUDA VMM APIs. 

562 """ 

563 result, handle = driver.cuMemRetainAllocationHandle(ptr) 

564 raise_if_driver_error(result) 

565 (result,) = driver.cuMemUnmap(ptr, size) 

566 raise_if_driver_error(result) 

567 (result,) = driver.cuMemAddressFree(ptr, size) 

568 raise_if_driver_error(result) 

569 (result,) = driver.cuMemRelease(handle) 

570 raise_if_driver_error(result) 

571 

572 @property 

573 def is_device_accessible(self) -> bool: 

574 """ 

575 Indicates whether the allocated memory is accessible from the device. 

576 """ 

577 return self.config.location_type == "device" 

578 

579 @property 

580 def is_host_accessible(self) -> bool: 

581 """ 

582 Indicates whether the allocated memory is accessible from the host. 

583 """ 

584 return self.config.location_type == "host" 

585 

586 @property 

587 def device_id(self) -> int: 

588 """ 

589 Get the device ID associated with this memory resource. 

590 

591 Returns: 

592 int: CUDA device ID. -1 if the memory resource allocates host memory 

593 """ 

594 return self.device.device_id if self.config.location_type == "device" else -1 

595 

596 def __repr__(self) -> str: 

597 """ 

598 Return a string representation of the VirtualMemoryResource. 

599 

600 Returns: 

601 str: A string describing the object 

602 """ 

603 return f"<VirtualMemoryResource device={self.device}>"