Coverage for cuda / core / _memory / _virtual_memory_resource.py: 82.92%

240 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-29 01:27 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass, field 

8from typing import TYPE_CHECKING, Iterable, Literal 

9 

10if TYPE_CHECKING: 

11 from cuda.core._stream import Stream 

12 

13from cuda.core._device import Device 

14from cuda.core._memory._buffer import Buffer, MemoryResource 

15from cuda.core._utils.cuda_utils import ( 

16 Transaction, 

17 check_or_create_options, 

18 driver, 

19) 

20from cuda.core._utils.cuda_utils import ( 

21 _check_driver_error as raise_if_driver_error, 

22) 

23from cuda.core._utils.version import binding_version 

24 

25__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] 

26 

27VirtualMemoryHandleTypeT = Literal["posix_fd", "generic", "win32_kmt", "fabric"] | None 

28VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"] 

29VirtualMemoryGranularityT = Literal["minimum", "recommended"] 

30VirtualMemoryAccessTypeT = Literal["rw", "r"] | None 

31VirtualMemoryAllocationTypeT = Literal["pinned", "managed"] 

32 

33 

34@dataclass 

35class VirtualMemoryResourceOptions: 

36 """A configuration object for the VirtualMemoryResource 

37 Stores configuration information which tells the resource how to use the CUDA VMM APIs 

38 

39 Attributes 

40 ---------- 

41 allocation_type: :obj:`~_memory.VirtualMemoryAllocationTypeT` 

42 Controls the type of allocation. 

43 location_type: :obj:`~_memory.VirtualMemoryLocationTypeT` 

44 Controls the location of the allocation. 

45 handle_type: :obj:`~_memory.VirtualMemoryHandleTypeT` 

46 Export handle type for the physical allocation. Use 

47 ``"posix_fd"`` on Linux if you plan to 

48 import/export the allocation (required for cuMemRetainAllocationHandle). 

49 Use `None` if you don't need an exportable handle. 

50 gpu_direct_rdma: bool 

51 Hint that the allocation should be GDR-capable (if supported). 

52 granularity: :obj:`~_memory.VirtualMemoryGranularityT` 

53 Controls granularity query and size rounding. 

54 addr_hint: int 

55 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. 

56 addr_align: int 

57 Alignment for the VA reservation. If `None`, use the queried granularity. 

58 peers: Iterable[int] 

59 Extra device IDs that should be granted access in addition to ``device``. 

60 self_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

61 Access flags for the owning device. 

62 peer_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

63 Access flags for peers. 

64 """ 

65 

66 # Human-friendly strings; normalized in __post_init__ 

67 allocation_type: VirtualMemoryAllocationTypeT = "pinned" 

68 location_type: VirtualMemoryLocationTypeT = "device" 

69 handle_type: VirtualMemoryHandleTypeT = "posix_fd" 

70 granularity: VirtualMemoryGranularityT = "recommended" 

71 gpu_direct_rdma: bool = False 

72 addr_hint: int | None = 0 

73 addr_align: int | None = None 

74 peers: Iterable[int] = field(default_factory=tuple) 

75 self_access: VirtualMemoryAccessTypeT = "rw" 

76 peer_access: VirtualMemoryAccessTypeT = "rw" 

77 

78 _a = driver.CUmemAccess_flags 

79 _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} # noqa: RUF012 

80 _h = driver.CUmemAllocationHandleType 

81 _handle_types = { # noqa: RUF012 

82 None: _h.CU_MEM_HANDLE_TYPE_NONE, 

83 "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 

84 "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, 

85 "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC, 

86 } 

87 _g = driver.CUmemAllocationGranularity_flags 

88 _granularity = { # noqa: RUF012 1ea

89 "recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, 

90 "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, 

91 } 

92 _l = driver.CUmemLocationType 1ea

93 _location_type = { # noqa: RUF012 

94 "device": _l.CU_MEM_LOCATION_TYPE_DEVICE, 

95 "host": _l.CU_MEM_LOCATION_TYPE_HOST, 

96 "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, 

97 "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, 

98 } 

99 _t = driver.CUmemAllocationType 

100 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not 

101 _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 

102 if binding_version() >= (13, 0, 0): 

103 _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED 

104 

105 @staticmethod 

106 def _access_to_flags(spec: str): 

107 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1cdab

108 if flags is None: 1cdab

109 raise ValueError(f"Unknown access spec: {spec!r}") 

110 return flags 1cdab

111 

112 @staticmethod 

113 def _allocation_type_to_driver(spec: str): 

114 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1cdab

115 if alloc_type is None: 1cdab

116 raise ValueError(f"Unsupported allocation_type: {spec!r}") 

117 return alloc_type 1cdab

118 

119 @staticmethod 

120 def _location_type_to_driver(spec: str): 

121 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1cdab

122 if loc_type is None: 1cdab

123 raise ValueError(f"Unsupported location_type: {spec!r}") 

124 return loc_type 1cdab

125 

126 @staticmethod 

127 def _handle_type_to_driver(spec: str): 

128 if spec == "win32": 1cdab

129 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 

130 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1cdab

131 if handle_type is None: 1cdab

132 raise ValueError(f"Unsupported handle_type: {spec!r}") 

133 return handle_type 1cdab

134 

135 @staticmethod 

136 def _granularity_to_driver(spec: str): 

137 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1cdab

138 if granularity is None: 1cdab

139 raise ValueError(f"Unsupported granularity: {spec!r}") 

140 return granularity 1cdab

141 

142 

143class VirtualMemoryResource(MemoryResource): 

144 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory. 

145 

146 Parameters 

147 ---------- 

148 device_id : Device | int 

149 Device for which a memory resource is constructed. 

150 

151 config : VirtualMemoryResourceOptions 

152 A configuration object for the VirtualMemoryResource 

153 

154 

155 Warning 

156 ------- 

157 This is a low-level API that is provided only for convenience. Make sure you fully understand 

158 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses 

159 in cuda.core should already meet the common needs. 

160 """ 

161 

162 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): 

163 self.device = Device(device_id) 1cdab

164 self.config = check_or_create_options( 1cdab

165 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False 

166 ) 

167 # Matches ("host", "host_numa", "host_numa_current") 

168 if "host" in self.config.location_type: 1cdab

169 self.device = None 

170 

171 if not self.device and self.config.location_type == "device": 1cdab

172 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations") 

173 

174 if self.device and not self.device.properties.virtual_memory_management_supported: 1cdab

175 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support") 

176 

177 # Validate RDMA support if requested 

178 if ( 

179 self.config.gpu_direct_rdma 

180 and self.device is not None 

181 and not self.device.properties.gpu_direct_rdma_supported 

182 ): 

183 raise RuntimeError("GPU Direct RDMA is not supported on this device") 

184 

185 @staticmethod 

186 def _align_up(size: int, gran: int) -> int: 

187 """ 

188 Align a size up to the nearest multiple of a granularity. 

189 """ 

190 return (size + gran - 1) & ~(gran - 1) 1cdab

191 

192 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer: 

193 """ 

194 Grow an existing allocation using CUDA VMM, with a configurable policy. 

195 

196 This implements true growing allocations that preserve the base pointer 

197 by extending the virtual address range and mapping additional physical memory. 

198 

199 This function uses transactional allocation: if any step fails, the original buffer is not modified and 

200 all steps the function took are rolled back so a new allocation is not created. 

201 

202 Parameters 

203 ---------- 

204 buf : Buffer 

205 The existing buffer to grow 

206 new_size : int 

207 The new total size for the allocation 

208 config : VirtualMemoryResourceOptions, optional 

209 Configuration for the new physical memory chunks. If None, uses current config. 

210 

211 Returns 

212 ------- 

213 Buffer 

214 The same buffer with updated size and properties, preserving the original pointer 

215 """ 

216 if config is not None: 1ab

217 self.config = config 1b

218 

219 # Build allocation properties for new chunks 

220 prop = driver.CUmemAllocationProp() 1ab

221 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab

222 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab

223 prop.location.id = self.device.device_id 1ab

224 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab

225 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab

226 prop.win32HandleMetaData = 0 1ab

227 

228 # Query granularity 

229 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab

230 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab

231 raise_if_driver_error(res) 1ab

232 

233 # Calculate sizes 

234 additional_size = new_size - buf.size 1ab

235 if additional_size <= 0: 1ab

236 # Same size: only update access policy if needed; avoid zero-sized driver calls 

237 descs = self._build_access_descriptors(prop) 1ab

238 if descs: 1ab

239 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab

240 raise_if_driver_error(res) 1ab

241 return buf 1ab

242 

243 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a

244 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a

245 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

246 addr_align = self.config.addr_align or gran 1a

247 

248 # Try to extend the existing VA range first 

249 res, new_ptr = driver.cuMemAddressReserve( 1a

250 aligned_additional_size, 

251 addr_align, 

252 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range 

253 0, 

254 ) 

255 

256 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a

257 # Check for specific errors that are not recoverable with the slow path 

258 if res in ( 1a

259 driver.CUresult.CUDA_ERROR_INVALID_VALUE, 

260 driver.CUresult.CUDA_ERROR_NOT_PERMITTED, 

261 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 

262 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED, 

263 ): 

264 raise_if_driver_error(res) 

265 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a

266 raise_if_driver_error(res2) 1a

267 # Fallback: couldn't extend contiguously, need full remapping 

268 return self._grow_allocation_slow_path( 1a

269 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align 

270 ) 

271 else: 

272 # Success! We can extend the VA range contiguously 

273 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr) 

274 

275 def _grow_allocation_fast_path( 

276 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int 

277 ) -> Buffer: 

278 """ 

279 Fast path for growing a virtual memory allocation when the new region can be 

280 reserved contiguously after the existing buffer. 

281 

282 This function creates and maps new physical memory for the additional size, 

283 sets access permissions, and updates the buffer size in place (the pointer 

284 remains unchanged). 

285 

286 Args: 

287 buf (Buffer): 

288 The buffer to grow. 

289 

290 new_size (int): 

291 The new total size in bytes. 

292 

293 prop (driver.CUmemAllocationProp): 

294 Allocation properties for the new memory. 

295 

296 aligned_additional_size (int): 

297 The size of the new region to allocate, aligned to granularity. 

298 

299 new_ptr (int): 

300 The address of the newly reserved contiguous VA region (should 

301 be at the end of the current buffer). 

302 

303 Returns: 

304 Buffer: The same buffer object with its size updated to `new_size`. 

305 """ 

306 with Transaction() as trans: 

307 # Create new physical memory for the additional size 

308 trans.append( 

309 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

310 ) 

311 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 

312 raise_if_driver_error(res) 

313 # Register undo for creation 

314 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

315 

316 # Map the new physical memory to the extended VA range 

317 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 

318 raise_if_driver_error(res) 

319 # Register undo for mapping 

320 trans.append( 

321 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]) 

322 ) 

323 

324 # Set access permissions for the new portion 

325 descs = self._build_access_descriptors(prop) 

326 if descs: 

327 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 

328 raise_if_driver_error(res) 

329 

330 # All succeeded, cancel undo actions 

331 trans.commit() 

332 

333 # Update the buffer size (pointer stays the same) 

334 buf._size = new_size 

335 return buf 

336 

337 def _grow_allocation_slow_path( 

338 self, 

339 buf: Buffer, 

340 new_size: int, 

341 prop: driver.CUmemAllocationProp, 

342 aligned_additional_size: int, 

343 total_aligned_size: int, 

344 addr_align: int, 

345 ) -> Buffer: 

346 """ 

347 Slow path for growing a virtual memory allocation when the new region cannot be 

348 reserved contiguously after the existing buffer. 

349 

350 This function reserves a new, larger virtual address (VA) range, remaps the old 

351 physical memory to the beginning of the new VA range, creates and maps new physical 

352 memory for the additional size, sets access permissions, and updates the buffer's 

353 pointer and size. 

354 

355 Args: 

356 buf (Buffer): The buffer to grow. 

357 new_size (int): The new total size in bytes. 

358 prop (driver.CUmemAllocationProp): Allocation properties for the new memory. 

359 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity. 

360 total_aligned_size (int): The total new size to reserve, aligned to granularity. 

361 addr_align (int): The required address alignment for the new VA range. 

362 

363 Returns: 

364 Buffer: The buffer object updated with the new pointer and size. 

365 """ 

366 with Transaction() as trans: 1a

367 # Reserve a completely new, larger VA range 

368 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a

369 raise_if_driver_error(res) 1a

370 # Register undo for VA reservation 

371 trans.append( 1a

372 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

373 ) 

374 

375 # Get the old allocation handle for remapping 

376 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a

377 raise_if_driver_error(result) 1a

378 # Register undo for old_handle 

379 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

380 

381 # Unmap the old VA range (aligned previous size) 

382 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

383 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a

384 raise_if_driver_error(result) 1a

385 

386 def _remap_old(): 1a

387 # Try to remap the old physical memory back to the original VA range 

388 try: 

389 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0) 

390 raise_if_driver_error(res) 

391 except Exception: # noqa: S110 

392 # TODO: consider logging this exception 

393 pass 

394 

395 trans.append(_remap_old) 1a

396 

397 # Remap the old physical memory to the new VA range (aligned previous size) 

398 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a

399 raise_if_driver_error(res) 1a

400 

401 # Register undo for mapping 

402 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a

403 

404 # Create new physical memory for the additional size 

405 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a

406 raise_if_driver_error(res) 1a

407 

408 # Register undo for new physical memory 

409 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

410 

411 # Map the new physical memory to the extended portion (aligned offset) 

412 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a

413 raise_if_driver_error(res) 1a

414 

415 # Register undo for mapping 

416 trans.append( 1a

417 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error( 

418 driver.cuMemUnmap(base + offs, s)[0] 

419 ) 

420 ) 

421 

422 # Set access permissions for the entire new range 

423 descs = self._build_access_descriptors(prop) 1a

424 if descs: 1a

425 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a

426 raise_if_driver_error(res) 1a

427 

428 # All succeeded, cancel undo actions 

429 trans.commit() 1a

430 

431 # Free the old VA range (aligned previous size) 

432 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a

433 raise_if_driver_error(res2) 1a

434 

435 # Invalidate the old buffer so its destructor won't try to free again 

436 buf._clear() 1a

437 

438 # Return a new Buffer for the new mapping 

439 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a

440 

441 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list: 

442 """ 

443 Build access descriptors for memory access permissions. 

444 

445 Returns 

446 ------- 

447 list 

448 List of CUmemAccessDesc objects for setting memory access 

449 """ 

450 descs = [] 1cdab

451 

452 # Owner access 

453 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1cdab

454 if owner_flags: 1cdab

455 d = driver.CUmemAccessDesc() 1cdab

456 d.location.type = prop.location.type 1cdab

457 d.location.id = prop.location.id 1cdab

458 d.flags = owner_flags 1cdab

459 descs.append(d) 1cdab

460 

461 # Peer device access 

462 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1cdab

463 if peer_flags: 1cdab

464 for peer_dev in self.config.peers: 1cdab

465 d = driver.CUmemAccessDesc() 

466 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE 

467 d.location.id = int(peer_dev) 

468 d.flags = peer_flags 

469 descs.append(d) 

470 

471 return descs 1cdab

472 

473 def allocate(self, size: int, stream: Stream | None = None) -> Buffer: 

474 """ 

475 Allocate a buffer of the given size using CUDA virtual memory. 

476 

477 Parameters 

478 ---------- 

479 size : int 

480 The size in bytes of the buffer to allocate. 

481 stream : Stream, optional 

482 CUDA stream to associate with the allocation (not currently supported). 

483 

484 Returns 

485 ------- 

486 Buffer 

487 A Buffer object representing the allocated virtual memory. 

488 

489 Raises 

490 ------ 

491 NotImplementedError 

492 If a stream is provided or if the location type is not device memory. 

493 CUDAError 

494 If any CUDA driver API call fails during allocation. 

495 

496 Notes 

497 ----- 

498 This method uses transactional allocation: if any step fails, all resources 

499 allocated so far are automatically cleaned up. The allocation is performed 

500 with the configured granularity, access permissions, and peer access as 

501 specified in the resource's configuration. 

502 """ 

503 if stream is not None: 1cdab

504 raise NotImplementedError("Stream is not supported with VirtualMemoryResource") 

505 

506 config = self.config 1cdab

507 # ---- Build allocation properties ---- 

508 prop = driver.CUmemAllocationProp() 1cdab

509 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1cdab

510 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1cdab

511 prop.location.id = self.device.device_id if config.location_type == "device" else -1 1cdab

512 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1cdab

513 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1cdab

514 prop.win32HandleMetaData = 0 1cdab

515 

516 # ---- Query and apply granularity ---- 

517 # Choose min vs recommended granularity per config 

518 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1cdab

519 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1cdab

520 raise_if_driver_error(res) 1cdab

521 

522 aligned_size = VirtualMemoryResource._align_up(size, gran) 1cdab

523 addr_align = config.addr_align or gran 1cdab

524 

525 # ---- Transactional allocation ---- 

526 with Transaction() as trans: 1cdab

527 # ---- Create physical memory ---- 

528 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1cdab

529 raise_if_driver_error(res) 1cdab

530 # Register undo for physical memory 

531 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1cdab

532 

533 # ---- Reserve VA space ---- 

534 # Potentially, use a separate size for the VA reservation from the physical allocation size 

535 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1cdab

536 raise_if_driver_error(res) 1cdab

537 # Register undo for VA reservation 

538 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1cdab

539 

540 # ---- Map physical memory into VA ---- 

541 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1cdab

542 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1cdab

543 raise_if_driver_error(res) 1cdab

544 

545 # ---- Set access for owner + peers ---- 

546 descs = self._build_access_descriptors(prop) 1cdab

547 if descs: 1cdab

548 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1cdab

549 raise_if_driver_error(res) 1cdab

550 

551 trans.commit() 1cdab

552 

553 # Done — return a Buffer that tracks this VA range 

554 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1cdab

555 return buf 1cdab

556 

557 def deallocate(self, ptr: int, size: int, stream: Stream | None = None) -> None: # noqa: ARG002 

558 """ 

559 Deallocate memory on the device using CUDA VMM APIs. 

560 """ 

561 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1cdab

562 raise_if_driver_error(result) 1cdab

563 (result,) = driver.cuMemUnmap(ptr, size) 1cdab

564 raise_if_driver_error(result) 1cdab

565 (result,) = driver.cuMemAddressFree(ptr, size) 1cdab

566 raise_if_driver_error(result) 1cdab

567 (result,) = driver.cuMemRelease(handle) 1cdab

568 raise_if_driver_error(result) 1cdab

569 

570 @property 

571 def is_device_accessible(self) -> bool: 

572 """ 

573 Indicates whether the allocated memory is accessible from the device. 

574 """ 

575 return self.config.location_type == "device" 

576 

577 @property 

578 def is_host_accessible(self) -> bool: 

579 """ 

580 Indicates whether the allocated memory is accessible from the host. 

581 """ 

582 return self.config.location_type == "host" 

583 

584 @property 

585 def device_id(self) -> int: 

586 """ 

587 Get the device ID associated with this memory resource. 

588 

589 Returns: 

590 int: CUDA device ID. -1 if the memory resource allocates host memory 

591 """ 

592 return self.device.device_id if self.config.location_type == "device" else -1 1cdb

593 

594 def __repr__(self) -> str: 

595 """ 

596 Return a string representation of the VirtualMemoryResource. 

597 

598 Returns: 

599 str: A string describing the object 

600 """ 

601 return f"<VirtualMemoryResource device={self.device}>"