Coverage for cuda / core / _memory / _virtual_memory_resource.py: 82.92%

240 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass, field 

8from typing import TYPE_CHECKING, Iterable, Literal 

9 

10if TYPE_CHECKING: 

11 from cuda.core._stream import Stream 

12 

13from cuda.core._device import Device 

14from cuda.core._memory._buffer import Buffer, MemoryResource 

15from cuda.core._utils.cuda_utils import ( 

16 Transaction, 

17 check_or_create_options, 

18 driver, 

19 get_binding_version, 

20) 

21from cuda.core._utils.cuda_utils import ( 

22 _check_driver_error as raise_if_driver_error, 

23) 

24 

25__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] 

26 

27VirtualMemoryHandleTypeT = Literal["posix_fd", "generic", "win32_kmt", "fabric"] | None 

28VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"] 

29VirtualMemoryGranularityT = Literal["minimum", "recommended"] 

30VirtualMemoryAccessTypeT = Literal["rw", "r"] | None 

31VirtualMemoryAllocationTypeT = Literal["pinned", "managed"] 

32 

33 

34@dataclass 

35class VirtualMemoryResourceOptions: 

36 """A configuration object for the VirtualMemoryResource 

37 Stores configuration information which tells the resource how to use the CUDA VMM APIs 

38 

39 Attributes 

40 ---------- 

41 allocation_type: :obj:`~_memory.VirtualMemoryAllocationTypeT` 

42 Controls the type of allocation. 

43 location_type: :obj:`~_memory.VirtualMemoryLocationTypeT` 

44 Controls the location of the allocation. 

45 handle_type: :obj:`~_memory.VirtualMemoryHandleTypeT` 

46 Export handle type for the physical allocation. Use 

47 ``"posix_fd"`` on Linux if you plan to 

48 import/export the allocation (required for cuMemRetainAllocationHandle). 

49 Use `None` if you don't need an exportable handle. 

50 gpu_direct_rdma: bool 

51 Hint that the allocation should be GDR-capable (if supported). 

52 granularity: :obj:`~_memory.VirtualMemoryGranularityT` 

53 Controls granularity query and size rounding. 

54 addr_hint: int 

55 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. 

56 addr_align: int 

57 Alignment for the VA reservation. If `None`, use the queried granularity. 

58 peers: Iterable[int] 

59 Extra device IDs that should be granted access in addition to ``device``. 

60 self_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

61 Access flags for the owning device. 

62 peer_access: :obj:`~_memory.VirtualMemoryAccessTypeT` 

63 Access flags for peers. 

64 """ 

65 

66 # Human-friendly strings; normalized in __post_init__ 

67 allocation_type: VirtualMemoryAllocationTypeT = "pinned" 

68 location_type: VirtualMemoryLocationTypeT = "device" 

69 handle_type: VirtualMemoryHandleTypeT = "posix_fd" 

70 granularity: VirtualMemoryGranularityT = "recommended" 

71 gpu_direct_rdma: bool = False 

72 addr_hint: int | None = 0 

73 addr_align: int | None = None 

74 peers: Iterable[int] = field(default_factory=tuple) 

75 self_access: VirtualMemoryAccessTypeT = "rw" 

76 peer_access: VirtualMemoryAccessTypeT = "rw" 

77 

78 _a = driver.CUmemAccess_flags 

79 _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} # noqa: RUF012 

80 _h = driver.CUmemAllocationHandleType 

81 _handle_types = { # noqa: RUF012 

82 None: _h.CU_MEM_HANDLE_TYPE_NONE, 

83 "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 

84 "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, 

85 "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC, 

86 } 

87 _g = driver.CUmemAllocationGranularity_flags 

88 _granularity = { # noqa: RUF012 

89 "recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, 

90 "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, 

91 } 

92 _l = driver.CUmemLocationType 1ea

93 _location_type = { # noqa: RUF012 1ea

94 "device": _l.CU_MEM_LOCATION_TYPE_DEVICE, 

95 "host": _l.CU_MEM_LOCATION_TYPE_HOST, 

96 "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, 

97 "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, 

98 } 

99 _t = driver.CUmemAllocationType 

100 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not 

101 _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 

102 ver_major, ver_minor = get_binding_version() 

103 if ver_major >= 13: 

104 _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED 

105 

106 @staticmethod 

107 def _access_to_flags(spec: str): 

108 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1cdab

109 if flags is None: 1cdab

110 raise ValueError(f"Unknown access spec: {spec!r}") 

111 return flags 1cdab

112 

113 @staticmethod 

114 def _allocation_type_to_driver(spec: str): 

115 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1cdab

116 if alloc_type is None: 1cdab

117 raise ValueError(f"Unsupported allocation_type: {spec!r}") 

118 return alloc_type 1cdab

119 

120 @staticmethod 

121 def _location_type_to_driver(spec: str): 

122 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1cdab

123 if loc_type is None: 1cdab

124 raise ValueError(f"Unsupported location_type: {spec!r}") 

125 return loc_type 1cdab

126 

127 @staticmethod 

128 def _handle_type_to_driver(spec: str): 

129 if spec == "win32": 1cdab

130 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 

131 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1cdab

132 if handle_type is None: 1cdab

133 raise ValueError(f"Unsupported handle_type: {spec!r}") 

134 return handle_type 1cdab

135 

136 @staticmethod 

137 def _granularity_to_driver(spec: str): 

138 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1cdab

139 if granularity is None: 1cdab

140 raise ValueError(f"Unsupported granularity: {spec!r}") 

141 return granularity 1cdab

142 

143 

144class VirtualMemoryResource(MemoryResource): 

145 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory. 

146 

147 Parameters 

148 ---------- 

149 device_id : Device | int 

150 Device for which a memory resource is constructed. 

151 

152 config : VirtualMemoryResourceOptions 

153 A configuration object for the VirtualMemoryResource 

154 

155 

156 Warning 

157 ------- 

158 This is a low-level API that is provided only for convenience. Make sure you fully understand 

159 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses 

160 in cuda.core should already meet the common needs. 

161 """ 

162 

163 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): 

164 self.device = Device(device_id) 1cdab

165 self.config = check_or_create_options( 1cdab

166 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False 

167 ) 

168 # Matches ("host", "host_numa", "host_numa_current") 

169 if "host" in self.config.location_type: 1cdab

170 self.device = None 

171 

172 if not self.device and self.config.location_type == "device": 1cdab

173 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations") 

174 

175 if self.device and not self.device.properties.virtual_memory_management_supported: 1cdab

176 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support") 

177 

178 # Validate RDMA support if requested 

179 if ( 

180 self.config.gpu_direct_rdma 

181 and self.device is not None 

182 and not self.device.properties.gpu_direct_rdma_supported 

183 ): 

184 raise RuntimeError("GPU Direct RDMA is not supported on this device") 

185 

186 @staticmethod 

187 def _align_up(size: int, gran: int) -> int: 

188 """ 

189 Align a size up to the nearest multiple of a granularity. 

190 """ 

191 return (size + gran - 1) & ~(gran - 1) 1cdab

192 

193 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer: 

194 """ 

195 Grow an existing allocation using CUDA VMM, with a configurable policy. 

196 

197 This implements true growing allocations that preserve the base pointer 

198 by extending the virtual address range and mapping additional physical memory. 

199 

200 This function uses transactional allocation: if any step fails, the original buffer is not modified and 

201 all steps the function took are rolled back so a new allocation is not created. 

202 

203 Parameters 

204 ---------- 

205 buf : Buffer 

206 The existing buffer to grow 

207 new_size : int 

208 The new total size for the allocation 

209 config : VirtualMemoryResourceOptions, optional 

210 Configuration for the new physical memory chunks. If None, uses current config. 

211 

212 Returns 

213 ------- 

214 Buffer 

215 The same buffer with updated size and properties, preserving the original pointer 

216 """ 

217 if config is not None: 1ab

218 self.config = config 1b

219 

220 # Build allocation properties for new chunks 

221 prop = driver.CUmemAllocationProp() 1ab

222 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab

223 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab

224 prop.location.id = self.device.device_id 1ab

225 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab

226 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab

227 prop.win32HandleMetaData = 0 1ab

228 

229 # Query granularity 

230 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab

231 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab

232 raise_if_driver_error(res) 1ab

233 

234 # Calculate sizes 

235 additional_size = new_size - buf.size 1ab

236 if additional_size <= 0: 1ab

237 # Same size: only update access policy if needed; avoid zero-sized driver calls 

238 descs = self._build_access_descriptors(prop) 1ab

239 if descs: 1ab

240 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab

241 raise_if_driver_error(res) 1ab

242 return buf 1ab

243 

244 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a

245 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a

246 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

247 addr_align = self.config.addr_align or gran 1a

248 

249 # Try to extend the existing VA range first 

250 res, new_ptr = driver.cuMemAddressReserve( 1a

251 aligned_additional_size, 

252 addr_align, 

253 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range 

254 0, 

255 ) 

256 

257 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a

258 # Check for specific errors that are not recoverable with the slow path 

259 if res in ( 1a

260 driver.CUresult.CUDA_ERROR_INVALID_VALUE, 

261 driver.CUresult.CUDA_ERROR_NOT_PERMITTED, 

262 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 

263 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED, 

264 ): 

265 raise_if_driver_error(res) 

266 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a

267 raise_if_driver_error(res2) 1a

268 # Fallback: couldn't extend contiguously, need full remapping 

269 return self._grow_allocation_slow_path( 1a

270 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align 

271 ) 

272 else: 

273 # Success! We can extend the VA range contiguously 

274 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr) 

275 

276 def _grow_allocation_fast_path( 

277 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int 

278 ) -> Buffer: 

279 """ 

280 Fast path for growing a virtual memory allocation when the new region can be 

281 reserved contiguously after the existing buffer. 

282 

283 This function creates and maps new physical memory for the additional size, 

284 sets access permissions, and updates the buffer size in place (the pointer 

285 remains unchanged). 

286 

287 Args: 

288 buf (Buffer): 

289 The buffer to grow. 

290 

291 new_size (int): 

292 The new total size in bytes. 

293 

294 prop (driver.CUmemAllocationProp): 

295 Allocation properties for the new memory. 

296 

297 aligned_additional_size (int): 

298 The size of the new region to allocate, aligned to granularity. 

299 

300 new_ptr (int): 

301 The address of the newly reserved contiguous VA region (should 

302 be at the end of the current buffer). 

303 

304 Returns: 

305 Buffer: The same buffer object with its size updated to `new_size`. 

306 """ 

307 with Transaction() as trans: 

308 # Create new physical memory for the additional size 

309 trans.append( 

310 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

311 ) 

312 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 

313 raise_if_driver_error(res) 

314 # Register undo for creation 

315 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 

316 

317 # Map the new physical memory to the extended VA range 

318 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 

319 raise_if_driver_error(res) 

320 # Register undo for mapping 

321 trans.append( 

322 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]) 

323 ) 

324 

325 # Set access permissions for the new portion 

326 descs = self._build_access_descriptors(prop) 

327 if descs: 

328 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 

329 raise_if_driver_error(res) 

330 

331 # All succeeded, cancel undo actions 

332 trans.commit() 

333 

334 # Update the buffer size (pointer stays the same) 

335 buf._size = new_size 

336 return buf 

337 

338 def _grow_allocation_slow_path( 

339 self, 

340 buf: Buffer, 

341 new_size: int, 

342 prop: driver.CUmemAllocationProp, 

343 aligned_additional_size: int, 

344 total_aligned_size: int, 

345 addr_align: int, 

346 ) -> Buffer: 

347 """ 

348 Slow path for growing a virtual memory allocation when the new region cannot be 

349 reserved contiguously after the existing buffer. 

350 

351 This function reserves a new, larger virtual address (VA) range, remaps the old 

352 physical memory to the beginning of the new VA range, creates and maps new physical 

353 memory for the additional size, sets access permissions, and updates the buffer's 

354 pointer and size. 

355 

356 Args: 

357 buf (Buffer): The buffer to grow. 

358 new_size (int): The new total size in bytes. 

359 prop (driver.CUmemAllocationProp): Allocation properties for the new memory. 

360 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity. 

361 total_aligned_size (int): The total new size to reserve, aligned to granularity. 

362 addr_align (int): The required address alignment for the new VA range. 

363 

364 Returns: 

365 Buffer: The buffer object updated with the new pointer and size. 

366 """ 

367 with Transaction() as trans: 1a

368 # Reserve a completely new, larger VA range 

369 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a

370 raise_if_driver_error(res) 1a

371 # Register undo for VA reservation 

372 trans.append( 1a

373 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

374 ) 

375 

376 # Get the old allocation handle for remapping 

377 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a

378 raise_if_driver_error(result) 1a

379 # Register undo for old_handle 

380 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

381 

382 # Unmap the old VA range (aligned previous size) 

383 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

384 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a

385 raise_if_driver_error(result) 1a

386 

387 def _remap_old(): 1a

388 # Try to remap the old physical memory back to the original VA range 

389 try: 

390 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0) 

391 raise_if_driver_error(res) 

392 except Exception: # noqa: S110 

393 # TODO: consider logging this exception 

394 pass 

395 

396 trans.append(_remap_old) 1a

397 

398 # Remap the old physical memory to the new VA range (aligned previous size) 

399 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a

400 raise_if_driver_error(res) 1a

401 

402 # Register undo for mapping 

403 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a

404 

405 # Create new physical memory for the additional size 

406 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a

407 raise_if_driver_error(res) 1a

408 

409 # Register undo for new physical memory 

410 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

411 

412 # Map the new physical memory to the extended portion (aligned offset) 

413 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a

414 raise_if_driver_error(res) 1a

415 

416 # Register undo for mapping 

417 trans.append( 1a

418 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error( 

419 driver.cuMemUnmap(base + offs, s)[0] 

420 ) 

421 ) 

422 

423 # Set access permissions for the entire new range 

424 descs = self._build_access_descriptors(prop) 1a

425 if descs: 1a

426 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a

427 raise_if_driver_error(res) 1a

428 

429 # All succeeded, cancel undo actions 

430 trans.commit() 1a

431 

432 # Free the old VA range (aligned previous size) 

433 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a

434 raise_if_driver_error(res2) 1a

435 

436 # Invalidate the old buffer so its destructor won't try to free again 

437 buf._clear() 1a

438 

439 # Return a new Buffer for the new mapping 

440 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a

441 

442 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list: 

443 """ 

444 Build access descriptors for memory access permissions. 

445 

446 Returns 

447 ------- 

448 list 

449 List of CUmemAccessDesc objects for setting memory access 

450 """ 

451 descs = [] 1cdab

452 

453 # Owner access 

454 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1cdab

455 if owner_flags: 1cdab

456 d = driver.CUmemAccessDesc() 1cdab

457 d.location.type = prop.location.type 1cdab

458 d.location.id = prop.location.id 1cdab

459 d.flags = owner_flags 1cdab

460 descs.append(d) 1cdab

461 

462 # Peer device access 

463 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1cdab

464 if peer_flags: 1cdab

465 for peer_dev in self.config.peers: 1cdab

466 d = driver.CUmemAccessDesc() 

467 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE 

468 d.location.id = int(peer_dev) 

469 d.flags = peer_flags 

470 descs.append(d) 

471 

472 return descs 1cdab

473 

474 def allocate(self, size: int, stream: Stream | None = None) -> Buffer: 

475 """ 

476 Allocate a buffer of the given size using CUDA virtual memory. 

477 

478 Parameters 

479 ---------- 

480 size : int 

481 The size in bytes of the buffer to allocate. 

482 stream : Stream, optional 

483 CUDA stream to associate with the allocation (not currently supported). 

484 

485 Returns 

486 ------- 

487 Buffer 

488 A Buffer object representing the allocated virtual memory. 

489 

490 Raises 

491 ------ 

492 NotImplementedError 

493 If a stream is provided or if the location type is not device memory. 

494 CUDAError 

495 If any CUDA driver API call fails during allocation. 

496 

497 Notes 

498 ----- 

499 This method uses transactional allocation: if any step fails, all resources 

500 allocated so far are automatically cleaned up. The allocation is performed 

501 with the configured granularity, access permissions, and peer access as 

502 specified in the resource's configuration. 

503 """ 

504 if stream is not None: 1cdab

505 raise NotImplementedError("Stream is not supported with VirtualMemoryResource") 

506 

507 config = self.config 1cdab

508 # ---- Build allocation properties ---- 

509 prop = driver.CUmemAllocationProp() 1cdab

510 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1cdab

511 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1cdab

512 prop.location.id = self.device.device_id if config.location_type == "device" else -1 1cdab

513 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1cdab

514 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1cdab

515 prop.win32HandleMetaData = 0 1cdab

516 

517 # ---- Query and apply granularity ---- 

518 # Choose min vs recommended granularity per config 

519 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1cdab

520 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1cdab

521 raise_if_driver_error(res) 1cdab

522 

523 aligned_size = VirtualMemoryResource._align_up(size, gran) 1cdab

524 addr_align = config.addr_align or gran 1cdab

525 

526 # ---- Transactional allocation ---- 

527 with Transaction() as trans: 1cdab

528 # ---- Create physical memory ---- 

529 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1cdab

530 raise_if_driver_error(res) 1cdab

531 # Register undo for physical memory 

532 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1cdab

533 

534 # ---- Reserve VA space ---- 

535 # Potentially, use a separate size for the VA reservation from the physical allocation size 

536 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1cdab

537 raise_if_driver_error(res) 1cdab

538 # Register undo for VA reservation 

539 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1cdab

540 

541 # ---- Map physical memory into VA ---- 

542 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1cdab

543 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1cdab

544 raise_if_driver_error(res) 1cdab

545 

546 # ---- Set access for owner + peers ---- 

547 descs = self._build_access_descriptors(prop) 1cdab

548 if descs: 1cdab

549 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1cdab

550 raise_if_driver_error(res) 1cdab

551 

552 trans.commit() 1cdab

553 

554 # Done — return a Buffer that tracks this VA range 

555 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1cdab

556 return buf 1cdab

557 

558 def deallocate(self, ptr: int, size: int, stream: Stream | None = None) -> None: # noqa: ARG002 

559 """ 

560 Deallocate memory on the device using CUDA VMM APIs. 

561 """ 

562 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1cdab

563 raise_if_driver_error(result) 1cdab

564 (result,) = driver.cuMemUnmap(ptr, size) 1cdab

565 raise_if_driver_error(result) 1cdab

566 (result,) = driver.cuMemAddressFree(ptr, size) 1cdab

567 raise_if_driver_error(result) 1cdab

568 (result,) = driver.cuMemRelease(handle) 1cdab

569 raise_if_driver_error(result) 1cdab

570 

571 @property 

572 def is_device_accessible(self) -> bool: 

573 """ 

574 Indicates whether the allocated memory is accessible from the device. 

575 """ 

576 return self.config.location_type == "device" 

577 

578 @property 

579 def is_host_accessible(self) -> bool: 

580 """ 

581 Indicates whether the allocated memory is accessible from the host. 

582 """ 

583 return self.config.location_type == "host" 

584 

585 @property 

586 def device_id(self) -> int: 

587 """ 

588 Get the device ID associated with this memory resource. 

589 

590 Returns: 

591 int: CUDA device ID. -1 if the memory resource allocates host memory 

592 """ 

593 return self.device.device_id if self.config.location_type == "device" else -1 1cdb

594 

595 def __repr__(self) -> str: 

596 """ 

597 Return a string representation of the VirtualMemoryResource. 

598 

599 Returns: 

600 str: A string describing the object 

601 """ 

602 return f"<VirtualMemoryResource device={self.device}>"