Coverage for cuda / core / _memory / _virtual_memory_resource.py: 88.75%

240 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-22 01:37 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass, field 

8from typing import TYPE_CHECKING, Iterable 

9 

10if TYPE_CHECKING: 

11 from cuda.core._stream import Stream 

12 

13from cuda.core._device import Device 

14from cuda.core._memory._buffer import Buffer, MemoryResource 

15from cuda.core._utils.cuda_utils import ( 

16 Transaction, 

17 check_or_create_options, 

18 driver, 

19) 

20from cuda.core._utils.cuda_utils import ( 

21 _check_driver_error as raise_if_driver_error, 

22) 

23from cuda.core._utils.version import binding_version 

24from cuda.core.typing import ( 

25 VirtualMemoryAccessType, 

26 VirtualMemoryAllocationType, 

27 VirtualMemoryGranularityType, 

28 VirtualMemoryHandleType, 

29 VirtualMemoryLocationType, 

30) 

31 

32__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] 

33 

34 

35@dataclass 

36class VirtualMemoryResourceOptions: 

37 """A configuration object for the VirtualMemoryResource 

38 Stores configuration information which tells the resource how to use the CUDA VMM APIs 

39 

40 Attributes 

41 ---------- 

42 allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str 

43 Controls the type of allocation. 

44 location_type: :obj:`~_memory.VirtualMemoryLocationType` | str 

45 Controls the location of the allocation. 

46 handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str 

47 Export handle type for the physical allocation. Use 

48 ``"posix_fd"`` on Linux if you plan to 

49 import/export the allocation (required for cuMemRetainAllocationHandle). 

50 Use `None` if you don't need an exportable handle. 

51 gpu_direct_rdma: bool 

52 Hint that the allocation should be GDR-capable (if supported). 

53 granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str 

54 Controls granularity query and size rounding. 

55 addr_hint: int 

56 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. 

57 addr_align: int 

58 Alignment for the VA reservation. If `None`, use the queried granularity. 

59 peers: Iterable[int] 

60 Extra device IDs that should be granted access in addition to ``device``. 

61 self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str 

62 Access flags for the owning device. 

63 peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str 

64 Access flags for peers. 

65 """ 

66 

67 allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED 

68 location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE 

69 handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD 

70 granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED 

71 gpu_direct_rdma: bool = False 

72 addr_hint: int | None = 0 

73 addr_align: int | None = None 

74 peers: Iterable[int] = field(default_factory=tuple) 

75 self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE 

76 peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE 

77 

78 _a = driver.CUmemAccess_flags 

79 _access_flags = { # noqa: RUF012 

80 VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, 

81 VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ, 

82 None: 0, 

83 } 

84 _h = driver.CUmemAllocationHandleType 

85 _handle_types = { # noqa: RUF012 

86 None: _h.CU_MEM_HANDLE_TYPE_NONE, 

87 VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 

88 VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, 

89 VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC, 

90 } 

91 _g = driver.CUmemAllocationGranularity_flags 

92 _granularity = { # noqa: RUF012 1fa

93 VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, 

94 VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, 

95 } 

96 _l = driver.CUmemLocationType 1fa

97 _location_type = { # noqa: RUF012 

98 VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE, 

99 VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST, 

100 VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, 

101 VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, 

102 } 

103 _t = driver.CUmemAllocationType 

104 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not 

105 _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 

106 if binding_version() >= (13, 0, 0): 

107 _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED 

108 

109 @staticmethod 

110 def _access_to_flags(spec: str): 

111 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1deacb

112 if flags is None: 1deacb

113 raise ValueError(f"Unknown access spec: {spec!r}") 

114 return flags 1deacb

115 

116 @staticmethod 

117 def _allocation_type_to_driver(spec: str): 

118 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1deacb

119 if alloc_type is None: 1deacb

120 raise ValueError(f"Unsupported allocation_type: {spec!r}") 

121 return alloc_type 1deacb

122 

123 @staticmethod 

124 def _location_type_to_driver(spec: str): 

125 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1deacb

126 if loc_type is None: 1deacb

127 raise ValueError(f"Unsupported location_type: {spec!r}") 

128 return loc_type 1deacb

129 

130 @staticmethod 

131 def _handle_type_to_driver(spec: str): 

132 if spec == "win32": 1deacb

133 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 

134 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1deacb

135 if handle_type is None: 1deacb

136 raise ValueError(f"Unsupported handle_type: {spec!r}") 

137 return handle_type 1deacb

138 

139 @staticmethod 

140 def _granularity_to_driver(spec: str): 

141 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1deacb

142 if granularity is None: 1deacb

143 raise ValueError(f"Unsupported granularity: {spec!r}") 

144 return granularity 1deacb

145 

146 

147class VirtualMemoryResource(MemoryResource): 

148 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory. 

149 

150 Parameters 

151 ---------- 

152 device_id : Device | int 

153 Device for which a memory resource is constructed. 

154 

155 config : VirtualMemoryResourceOptions 

156 A configuration object for the VirtualMemoryResource 

157 

158 

159 Warning 

160 ------- 

161 This is a low-level API that is provided only for convenience. Make sure you fully understand 

162 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses 

163 in cuda.core should already meet the common needs. 

164 """ 

165 

166 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): 

167 self.device = Device(device_id) 1deacb

168 self.config = check_or_create_options( 1deacb

169 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False 

170 ) 

171 # Matches ("host", "host_numa", "host_numa_current") 

172 if "host" in self.config.location_type: 1deacb

173 self.device = None 

174 

175 if not self.device and self.config.location_type == "device": 1deacb

176 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations") 

177 

178 if self.device and not self.device.properties.virtual_memory_management_supported: 1deacb

179 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support") 

180 

181 # Validate RDMA support if requested 

182 if ( 

183 self.config.gpu_direct_rdma 

184 and self.device is not None 

185 and not self.device.properties.gpu_direct_rdma_supported 

186 ): 

187 raise RuntimeError("GPU Direct RDMA is not supported on this device") 

188 

189 @staticmethod 

190 def _align_up(size: int, gran: int) -> int: 

191 """ 

192 Align a size up to the nearest multiple of a granularity. 

193 """ 

194 return (size + gran - 1) & ~(gran - 1) 1deacb

195 

196 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer: 

197 """ 

198 Grow an existing allocation using CUDA VMM, with a configurable policy. 

199 

200 This implements true growing allocations that preserve the base pointer 

201 by extending the virtual address range and mapping additional physical memory. 

202 

203 This function uses transactional allocation: if any step fails, the original buffer is not modified and 

204 all steps the function took are rolled back so a new allocation is not created. 

205 

206 Parameters 

207 ---------- 

208 buf : Buffer 

209 The existing buffer to grow 

210 new_size : int 

211 The new total size for the allocation 

212 config : VirtualMemoryResourceOptions, optional 

213 Configuration for the new physical memory chunks. If None, uses current config. 

214 

215 Returns 

216 ------- 

217 Buffer 

218 The same buffer with updated size and properties, preserving the original pointer 

219 """ 

220 if config is not None: 1ab

221 self.config = config 1b

222 

223 # Build allocation properties for new chunks 

224 prop = driver.CUmemAllocationProp() 1ab

225 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab

226 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab

227 prop.location.id = self.device.device_id 1ab

228 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab

229 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab

230 prop.win32HandleMetaData = 0 1ab

231 

232 # Query granularity 

233 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab

234 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab

235 raise_if_driver_error(res) 1ab

236 

237 # Calculate sizes 

238 additional_size = new_size - buf.size 1ab

239 if additional_size <= 0: 1ab

240 # Same size: only update access policy if needed; avoid zero-sized driver calls 

241 descs = self._build_access_descriptors(prop) 1ab

242 if descs: 1ab

243 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab

244 raise_if_driver_error(res) 1ab

245 return buf 1ab

246 

247 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a

248 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a

249 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

250 addr_align = self.config.addr_align or gran 1a

251 

252 # Try to extend the existing VA range first 

253 res, new_ptr = driver.cuMemAddressReserve( 1a

254 aligned_additional_size, 

255 addr_align, 

256 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range 

257 0, 

258 ) 

259 

260 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a

261 # Check for specific errors that are not recoverable with the slow path 

262 if res in ( 1a

263 driver.CUresult.CUDA_ERROR_INVALID_VALUE, 

264 driver.CUresult.CUDA_ERROR_NOT_PERMITTED, 

265 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 

266 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED, 

267 ): 

268 raise_if_driver_error(res) 

269 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a

270 raise_if_driver_error(res2) 1a

271 # Fallback: couldn't extend contiguously, need full remapping 

272 return self._grow_allocation_slow_path( 1a

273 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align 

274 ) 

275 else: 

276 # Success! We can extend the VA range contiguously 

277 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr) 

278 

279 def _grow_allocation_fast_path( 

280 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int 

281 ) -> Buffer: 

282 """ 

283 Fast path for growing a virtual memory allocation when the new region can be 

284 reserved contiguously after the existing buffer. 

285 

286 This function creates and maps new physical memory for the additional size, 

287 sets access permissions, and updates the buffer size in place (the pointer 

288 remains unchanged). 

289 

290 Args: 

291 buf (Buffer): 

292 The buffer to grow. 

293 

294 new_size (int): 

295 The new total size in bytes. 

296 

297 prop (driver.CUmemAllocationProp): 

298 Allocation properties for the new memory. 

299 

300 aligned_additional_size (int): 

301 The size of the new region to allocate, aligned to granularity. 

302 

303 new_ptr (int): 

304 The address of the newly reserved contiguous VA region (should 

305 be at the end of the current buffer). 

306 

307 Returns: 

308 Buffer: The same buffer object with its size updated to `new_size`. 

309 """ 

310 with Transaction() as trans: 1c

311 # Create new physical memory for the additional size 

312 trans.append( 1c

313 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

314 ) 

315 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1c

316 raise_if_driver_error(res) 1c

317 # Register undo for creation 

318 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1c

319 

320 # Map the new physical memory to the extended VA range 

321 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 1c

322 raise_if_driver_error(res) 1c

323 # Register undo for mapping 

324 trans.append( 1c

325 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]) 

326 ) 

327 

328 # Set access permissions for the new portion 

329 descs = self._build_access_descriptors(prop) 1c

330 if descs: 1c

331 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 1c

332 raise_if_driver_error(res) 1c

333 

334 # All succeeded, cancel undo actions 

335 trans.commit() 1c

336 

337 # Update the buffer size (pointer stays the same) 

338 buf._size = new_size 1c

339 return buf 1c

340 

341 def _grow_allocation_slow_path( 

342 self, 

343 buf: Buffer, 

344 new_size: int, 

345 prop: driver.CUmemAllocationProp, 

346 aligned_additional_size: int, 

347 total_aligned_size: int, 

348 addr_align: int, 

349 ) -> Buffer: 

350 """ 

351 Slow path for growing a virtual memory allocation when the new region cannot be 

352 reserved contiguously after the existing buffer. 

353 

354 This function reserves a new, larger virtual address (VA) range, remaps the old 

355 physical memory to the beginning of the new VA range, creates and maps new physical 

356 memory for the additional size, sets access permissions, and updates the buffer's 

357 pointer and size. 

358 

359 Args: 

360 buf (Buffer): The buffer to grow. 

361 new_size (int): The new total size in bytes. 

362 prop (driver.CUmemAllocationProp): Allocation properties for the new memory. 

363 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity. 

364 total_aligned_size (int): The total new size to reserve, aligned to granularity. 

365 addr_align (int): The required address alignment for the new VA range. 

366 

367 Returns: 

368 Buffer: The buffer object updated with the new pointer and size. 

369 """ 

370 with Transaction() as trans: 1a

371 # Reserve a completely new, larger VA range 

372 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a

373 raise_if_driver_error(res) 1a

374 # Register undo for VA reservation 

375 trans.append( 1a

376 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

377 ) 

378 

379 # Get the old allocation handle for remapping 

380 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a

381 raise_if_driver_error(result) 1a

382 # Register undo for old_handle 

383 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

384 

385 # Unmap the old VA range (aligned previous size) 

386 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

387 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a

388 raise_if_driver_error(result) 1a

389 

390 def _remap_old(): 1a

391 # Try to remap the old physical memory back to the original VA range 

392 try: 

393 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0) 

394 raise_if_driver_error(res) 

395 except Exception: # noqa: S110 

396 # TODO: consider logging this exception 

397 pass 

398 

399 trans.append(_remap_old) 1a

400 

401 # Remap the old physical memory to the new VA range (aligned previous size) 

402 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a

403 raise_if_driver_error(res) 1a

404 

405 # Register undo for mapping 

406 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a

407 

408 # Create new physical memory for the additional size 

409 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a

410 raise_if_driver_error(res) 1a

411 

412 # Register undo for new physical memory 

413 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

414 

415 # Map the new physical memory to the extended portion (aligned offset) 

416 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a

417 raise_if_driver_error(res) 1a

418 

419 # Register undo for mapping 

420 trans.append( 1a

421 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error( 

422 driver.cuMemUnmap(base + offs, s)[0] 

423 ) 

424 ) 

425 

426 # Set access permissions for the entire new range 

427 descs = self._build_access_descriptors(prop) 1a

428 if descs: 1a

429 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a

430 raise_if_driver_error(res) 1a

431 

432 # All succeeded, cancel undo actions 

433 trans.commit() 1a

434 

435 # Free the old VA range (aligned previous size) 

436 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a

437 raise_if_driver_error(res2) 1a

438 

439 # Invalidate the old buffer so its destructor won't try to free again 

440 buf._clear() 1a

441 

442 # Return a new Buffer for the new mapping 

443 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a

444 

445 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list: 

446 """ 

447 Build access descriptors for memory access permissions. 

448 

449 Returns 

450 ------- 

451 list 

452 List of CUmemAccessDesc objects for setting memory access 

453 """ 

454 descs = [] 1deacb

455 

456 # Owner access 

457 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1deacb

458 if owner_flags: 1deacb

459 d = driver.CUmemAccessDesc() 1deacb

460 d.location.type = prop.location.type 1deacb

461 d.location.id = prop.location.id 1deacb

462 d.flags = owner_flags 1deacb

463 descs.append(d) 1deacb

464 

465 # Peer device access 

466 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1deacb

467 if peer_flags: 1deacb

468 for peer_dev in self.config.peers: 1deacb

469 d = driver.CUmemAccessDesc() 

470 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE 

471 d.location.id = int(peer_dev) 

472 d.flags = peer_flags 

473 descs.append(d) 

474 

475 return descs 1deacb

476 

477 def allocate(self, size: int, *, stream: Stream | None = None) -> Buffer: 

478 """ 

479 Allocate a buffer of the given size using CUDA virtual memory. 

480 

481 Parameters 

482 ---------- 

483 size : int 

484 The size in bytes of the buffer to allocate. 

485 stream : Stream, optional 

486 Keyword-only. Unused because virtual memory operations are 

487 synchronous. 

488 

489 Returns 

490 ------- 

491 Buffer 

492 A Buffer object representing the allocated virtual memory. 

493 

494 Raises 

495 ------ 

496 CUDAError 

497 If any CUDA driver API call fails during allocation. 

498 

499 Notes 

500 ----- 

501 This method uses transactional allocation: if any step fails, all resources 

502 allocated so far are automatically cleaned up. The allocation is performed 

503 with the configured granularity, access permissions, and peer access as 

504 specified in the resource's configuration. 

505 """ 

506 if stream is not None: 1deacb

507 from cuda.core._stream import Stream_accept 

508 

509 Stream_accept(stream) 

510 

511 config = self.config 1deacb

512 # ---- Build allocation properties ---- 

513 prop = driver.CUmemAllocationProp() 1deacb

514 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1deacb

515 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1deacb

516 prop.location.id = self.device.device_id if config.location_type == "device" else -1 1deacb

517 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1deacb

518 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1deacb

519 prop.win32HandleMetaData = 0 1deacb

520 

521 # ---- Query and apply granularity ---- 

522 # Choose min vs recommended granularity per config 

523 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1deacb

524 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1deacb

525 raise_if_driver_error(res) 1deacb

526 

527 aligned_size = VirtualMemoryResource._align_up(size, gran) 1deacb

528 addr_align = config.addr_align or gran 1deacb

529 

530 # ---- Transactional allocation ---- 

531 with Transaction() as trans: 1deacb

532 # ---- Create physical memory ---- 

533 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1deacb

534 raise_if_driver_error(res) 1deacb

535 # Register undo for physical memory 

536 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1deacb

537 

538 # ---- Reserve VA space ---- 

539 # Potentially, use a separate size for the VA reservation from the physical allocation size 

540 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1deacb

541 raise_if_driver_error(res) 1deacb

542 # Register undo for VA reservation 

543 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1deacb

544 

545 # ---- Map physical memory into VA ---- 

546 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1deacb

547 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1deacb

548 raise_if_driver_error(res) 1deacb

549 

550 # ---- Set access for owner + peers ---- 

551 descs = self._build_access_descriptors(prop) 1deacb

552 if descs: 1deacb

553 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1deacb

554 raise_if_driver_error(res) 1deacb

555 

556 trans.commit() 1deacb

557 

558 # Done — return a Buffer that tracks this VA range 

559 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1deacb

560 return buf 1deacb

561 

562 def deallocate(self, ptr: int, size: int, *, stream: Stream | None = None) -> None: 

563 """ 

564 Deallocate memory on the device using CUDA VMM APIs. 

565 

566 Parameters 

567 ---------- 

568 ptr : int 

569 The pointer to the memory to deallocate. 

570 size : int 

571 The size in bytes of the memory to deallocate. 

572 stream : Stream, optional 

573 Keyword-only. Unused because virtual memory operations are 

574 synchronous. 

575 """ 

576 if stream is not None: 1deacb

577 from cuda.core._stream import Stream_accept 1deacb

578 

579 Stream_accept(stream) 1deacb

580 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1deacb

581 raise_if_driver_error(result) 1deacb

582 (result,) = driver.cuMemUnmap(ptr, size) 1deab

583 raise_if_driver_error(result) 1deab

584 (result,) = driver.cuMemAddressFree(ptr, size) 1deab

585 raise_if_driver_error(result) 1deab

586 (result,) = driver.cuMemRelease(handle) 1deab

587 raise_if_driver_error(result) 1deab

588 

589 @property 

590 def is_device_accessible(self) -> bool: 

591 """ 

592 Indicates whether the allocated memory is accessible from the device. 

593 """ 

594 return self.config.location_type == "device" 

595 

596 @property 

597 def is_host_accessible(self) -> bool: 

598 """ 

599 Indicates whether the allocated memory is accessible from the host. 

600 """ 

601 return self.config.location_type == "host" 

602 

603 @property 

604 def device_id(self) -> int: 

605 """ 

606 Get the device ID associated with this memory resource. 

607 

608 Returns: 

609 int: CUDA device ID. -1 if the memory resource allocates host memory 

610 """ 

611 return self.device.device_id if self.config.location_type == "device" else -1 1deb

612 

613 def __repr__(self) -> str: 

614 """ 

615 Return a string representation of the VirtualMemoryResource. 

616 

617 Returns: 

618 str: A string describing the object 

619 """ 

620 return f"<VirtualMemoryResource device={self.device}>"