Coverage for cuda/core/_memory/_virtual_memory_resource.py: 91.32%

242 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-13 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5from __future__ import annotations 

6 

7from dataclasses import dataclass, field 

8from typing import TYPE_CHECKING, Iterable 

9 

10if TYPE_CHECKING: 

11 from cuda.core._stream import Stream 

12 from cuda.core.graph import GraphBuilder 

13 

14from cuda.core._device import Device 

15from cuda.core._memory._buffer import Buffer, MemoryResource 

16from cuda.core._utils.cuda_utils import ( 

17 Transaction, 

18 check_or_create_options, 

19 driver, 

20) 

21from cuda.core._utils.cuda_utils import ( 

22 _check_driver_error as raise_if_driver_error, 

23) 

24from cuda.core._utils.version import binding_version 

25from cuda.core.typing import ( 

26 DevicePointerType, 

27 VirtualMemoryAccessType, 

28 VirtualMemoryAllocationType, 

29 VirtualMemoryGranularityType, 

30 VirtualMemoryHandleType, 

31 VirtualMemoryLocationType, 

32) 

33 

34__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] 

35 

36 

37@dataclass 

38class VirtualMemoryResourceOptions: 

39 """A configuration object for the VirtualMemoryResource 

40 Stores configuration information which tells the resource how to use the CUDA VMM APIs 

41 

42 Attributes 

43 ---------- 

44 allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str 

45 Controls the type of allocation. 

46 location_type: :obj:`~_memory.VirtualMemoryLocationType` | str 

47 Controls the location of the allocation. 

48 handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str 

49 Export handle type for the physical allocation. Use 

50 ``"posix_fd"`` on Linux if you plan to 

51 import/export the allocation (required for cuMemRetainAllocationHandle). 

52 Use `None` if you don't need an exportable handle. 

53 gpu_direct_rdma: bool 

54 Hint that the allocation should be GDR-capable (if supported). 

55 granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str 

56 Controls granularity query and size rounding. 

57 addr_hint: int 

58 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. 

59 addr_align: int 

60 Alignment for the VA reservation. If `None`, use the queried granularity. 

61 peers: Iterable[int] 

62 Extra device IDs that should be granted access in addition to ``device``. 

63 self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str 

64 Access flags for the owning device. 

65 peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str 

66 Access flags for peers. 

67 """ 

68 

69 allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED 

70 location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE 

71 handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD 

72 granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED 

73 gpu_direct_rdma: bool = False 

74 addr_hint: int | None = 0 

75 addr_align: int | None = None 

76 peers: Iterable[int] = field(default_factory=tuple) 

77 self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE 

78 peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE 

79 

80 _a = driver.CUmemAccess_flags 

81 _access_flags = { # noqa: RUF012 

82 VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, 

83 VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ, 

84 None: 0, 

85 } 

86 _h = driver.CUmemAllocationHandleType 

87 _handle_types = { # noqa: RUF012 

88 None: _h.CU_MEM_HANDLE_TYPE_NONE, 

89 VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 

90 VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, 

91 VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC, 

92 } 

93 _g = driver.CUmemAllocationGranularity_flags 

94 _granularity = { # noqa: RUF012 

95 VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, 

96 VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, 

97 } 

98 _l = driver.CUmemLocationType 1ea

99 _location_type = { # noqa: RUF012 1ea

100 VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE, 

101 VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST, 

102 VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, 

103 VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, 

104 } 

105 _t = driver.CUmemAllocationType 

106 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not 

107 _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 

108 if binding_version() >= (13, 0, 0): 

109 _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED 

110 

111 @staticmethod 

112 def _access_to_flags(spec: VirtualMemoryAccessType | None) -> int: 

113 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 1cdafbh

114 if flags is None: 1cdafbh

115 raise ValueError(f"Unknown access spec: {spec!r}") 1h

116 return flags # type: ignore[no-any-return] 1cdafb

117 

118 @staticmethod 

119 def _allocation_type_to_driver(spec: VirtualMemoryAllocationType) -> int: 

120 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 1cdabi

121 if alloc_type is None: 1cdabi

122 raise ValueError(f"Unsupported allocation_type: {spec!r}") 1i

123 return alloc_type # type: ignore[no-any-return] 1cdab

124 

125 @staticmethod 

126 def _location_type_to_driver(spec: VirtualMemoryLocationType) -> int: 

127 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 1cdabj

128 if loc_type is None: 1cdabj

129 raise ValueError(f"Unsupported location_type: {spec!r}") 1j

130 return loc_type # type: ignore[no-any-return] 1cdab

131 

132 @staticmethod 

133 def _handle_type_to_driver(spec: VirtualMemoryHandleType | None) -> int: 

134 if spec == "win32": 1cdablg

135 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team") 1l

136 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 1cdabg

137 if handle_type is None: 1cdabg

138 raise ValueError(f"Unsupported handle_type: {spec!r}") 1g

139 return handle_type # type: ignore[no-any-return] 1cdab

140 

141 @staticmethod 

142 def _granularity_to_driver(spec: VirtualMemoryGranularityType) -> int: 

143 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 1cdabk

144 if granularity is None: 1cdabk

145 raise ValueError(f"Unsupported granularity: {spec!r}") 1k

146 return granularity # type: ignore[no-any-return] 1cdab

147 

148 

149class VirtualMemoryResource(MemoryResource): 

150 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory. 

151 

152 Parameters 

153 ---------- 

154 device_id : Device | int 

155 Device for which a memory resource is constructed. 

156 

157 config : VirtualMemoryResourceOptions, optional 

158 A configuration object for the VirtualMemoryResource 

159 

160 

161 Warning 

162 ------- 

163 This is a low-level API that is provided only for convenience. Make sure you fully understand 

164 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses 

165 in cuda.core should already meet the common needs. 

166 """ 

167 

168 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions | None = None) -> None: 

169 self.device: Device | None = Device(device_id) 1cdafb

170 self.config: VirtualMemoryResourceOptions = check_or_create_options( # type: ignore[assignment] 1cdafb

171 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False 

172 ) 

173 # Matches ("host", "host_numa", "host_numa_current") 

174 if "host" in self.config.location_type: 1cdafb

175 self.device = None 

176 

177 if not self.device and self.config.location_type == "device": 1cdafb

178 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations") 

179 

180 if self.device and not self.device.properties.virtual_memory_management_supported: 1cdafb

181 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support") 

182 

183 # Validate RDMA support if requested 

184 if ( 

185 self.config.gpu_direct_rdma 

186 and self.device is not None 

187 and not self.device.properties.gpu_direct_rdma_supported 

188 ): 

189 raise RuntimeError("GPU Direct RDMA is not supported on this device") 

190 

191 @staticmethod 

192 def _align_up(size: int, gran: int) -> int: 

193 """ 

194 Align a size up to the nearest multiple of a granularity. 

195 """ 

196 return (size + gran - 1) & ~(gran - 1) 1cdab

197 

198 def modify_allocation( 

199 self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions | None = None 

200 ) -> Buffer: 

201 """ 

202 Grow an existing allocation using CUDA VMM, with a configurable policy. 

203 

204 This implements true growing allocations that preserve the base pointer 

205 by extending the virtual address range and mapping additional physical memory. 

206 

207 This function uses transactional allocation: if any step fails, the original buffer is not modified and 

208 all steps the function took are rolled back so a new allocation is not created. 

209 

210 Parameters 

211 ---------- 

212 buf : Buffer 

213 The existing buffer to grow 

214 new_size : int 

215 The new total size for the allocation 

216 config : VirtualMemoryResourceOptions, optional 

217 Configuration for the new physical memory chunks. If None, uses current config. 

218 

219 Returns 

220 ------- 

221 Buffer 

222 The same buffer with updated size and properties, preserving the original pointer 

223 """ 

224 if config is not None: 1ab

225 self.config = config 1b

226 

227 # Build allocation properties for new chunks 

228 prop = driver.CUmemAllocationProp() 1ab

229 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 1ab

230 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 1ab

231 # Caller must not invoke modify_allocation on a host-located resource; 

232 # we rely on the dataclass invariant that self.device is non-None for 

233 # device-located resources (it's only None when location is host). 

234 assert self.device is not None, "modify_allocation requires a device-located resource" 1ab

235 prop.location.id = self.device.device_id 1ab

236 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 1ab

237 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 1ab

238 prop.win32HandleMetaData = 0 1ab

239 

240 # Query granularity 

241 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 1ab

242 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1ab

243 raise_if_driver_error(res) 1ab

244 

245 # Calculate sizes 

246 additional_size = new_size - buf.size 1ab

247 if additional_size <= 0: 1ab

248 # Same size: only update access policy if needed; avoid zero-sized driver calls 

249 descs = self._build_access_descriptors(prop) 1ab

250 if descs: 1ab

251 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 1ab

252 raise_if_driver_error(res) 1ab

253 return buf 1ab

254 

255 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1a

256 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1a

257 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

258 addr_align = self.config.addr_align or gran 1a

259 

260 # Try to extend the existing VA range first 

261 res, new_ptr = driver.cuMemAddressReserve( 1a

262 aligned_additional_size, 

263 addr_align, 

264 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range 

265 0, 

266 ) 

267 

268 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1a

269 # Check for specific errors that are not recoverable with the slow path 

270 if res in ( 1a

271 driver.CUresult.CUDA_ERROR_INVALID_VALUE, 

272 driver.CUresult.CUDA_ERROR_NOT_PERMITTED, 

273 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, 

274 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED, 

275 ): 

276 raise_if_driver_error(res) 

277 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1a

278 raise_if_driver_error(res2) 1a

279 # Fallback: couldn't extend contiguously, need full remapping 

280 return self._grow_allocation_slow_path( 1a

281 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align 

282 ) 

283 else: 

284 # Success! We can extend the VA range contiguously 

285 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr) 

286 

287 def _grow_allocation_fast_path( 

288 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int 

289 ) -> Buffer: 

290 """ 

291 Fast path for growing a virtual memory allocation when the new region can be 

292 reserved contiguously after the existing buffer. 

293 

294 This function creates and maps new physical memory for the additional size, 

295 sets access permissions, and updates the buffer size in place (the pointer 

296 remains unchanged). 

297 

298 Args: 

299 buf (Buffer): 

300 The buffer to grow. 

301 

302 new_size (int): 

303 The new total size in bytes. 

304 

305 prop (driver.CUmemAllocationProp): 

306 Allocation properties for the new memory. 

307 

308 aligned_additional_size (int): 

309 The size of the new region to allocate, aligned to granularity. 

310 

311 new_ptr (int): 

312 The address of the newly reserved contiguous VA region (should 

313 be at the end of the current buffer). 

314 

315 Returns: 

316 Buffer: The same buffer object with its size updated to `new_size`. 

317 """ 

318 with Transaction() as trans: 1f

319 # Create new physical memory for the additional size 

320 trans.append( 1f

321 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

322 ) 

323 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1f

324 raise_if_driver_error(res) 1f

325 # Register undo for creation 

326 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1f

327 

328 # Map the new physical memory to the extended VA range 

329 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 1f

330 raise_if_driver_error(res) 1f

331 # Register undo for mapping 

332 trans.append( 1f

333 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]) 

334 ) 

335 

336 # Set access permissions for the new portion 

337 descs = self._build_access_descriptors(prop) 1f

338 if descs: 1f

339 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 1f

340 raise_if_driver_error(res) 1f

341 

342 # All succeeded, cancel undo actions 

343 trans.commit() 1f

344 

345 # Update the buffer size (pointer stays the same) 

346 # TODO: #2049 This is a real bug, accessing _size which doesn't exist. 

347 # Fix bug and remove the "type: ignore[attr-defined]" comment. 

348 buf._size = new_size # type: ignore[attr-defined] 1f

349 return buf 1f

350 

351 def _grow_allocation_slow_path( 

352 self, 

353 buf: Buffer, 

354 new_size: int, 

355 prop: driver.CUmemAllocationProp, 

356 aligned_additional_size: int, 

357 total_aligned_size: int, 

358 addr_align: int, 

359 ) -> Buffer: 

360 """ 

361 Slow path for growing a virtual memory allocation when the new region cannot be 

362 reserved contiguously after the existing buffer. 

363 

364 This function reserves a new, larger virtual address (VA) range, remaps the old 

365 physical memory to the beginning of the new VA range, creates and maps new physical 

366 memory for the additional size, sets access permissions, and updates the buffer's 

367 pointer and size. 

368 

369 Args: 

370 buf (Buffer): The buffer to grow. 

371 new_size (int): The new total size in bytes. 

372 prop (driver.CUmemAllocationProp): Allocation properties for the new memory. 

373 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity. 

374 total_aligned_size (int): The total new size to reserve, aligned to granularity. 

375 addr_align (int): The required address alignment for the new VA range. 

376 

377 Returns: 

378 Buffer: The buffer object updated with the new pointer and size. 

379 """ 

380 with Transaction() as trans: 1a

381 # Reserve a completely new, larger VA range 

382 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1a

383 raise_if_driver_error(res) 1a

384 # Register undo for VA reservation 

385 trans.append( 1a

386 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]) 

387 ) 

388 

389 # Get the old allocation handle for remapping 

390 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1a

391 raise_if_driver_error(result) 1a

392 # Register undo for old_handle 

393 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

394 

395 # Unmap the old VA range (aligned previous size) 

396 aligned_prev_size = total_aligned_size - aligned_additional_size 1a

397 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1a

398 raise_if_driver_error(result) 1a

399 

400 def _remap_old() -> None: 1a

401 # Try to remap the old physical memory back to the original VA range 

402 try: 

403 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0) 

404 raise_if_driver_error(res) 

405 except Exception: # noqa: S110 

406 # TODO: consider logging this exception 

407 pass 

408 

409 trans.append(_remap_old) 1a

410 

411 # Remap the old physical memory to the new VA range (aligned previous size) 

412 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1a

413 raise_if_driver_error(res) 1a

414 

415 # Register undo for mapping 

416 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1a

417 

418 # Create new physical memory for the additional size 

419 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1a

420 raise_if_driver_error(res) 1a

421 

422 # Register undo for new physical memory 

423 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1a

424 

425 # Map the new physical memory to the extended portion (aligned offset) 

426 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1a

427 raise_if_driver_error(res) 1a

428 

429 # Register undo for mapping 

430 trans.append( 1a

431 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error( 

432 driver.cuMemUnmap(base + offs, s)[0] 

433 ) 

434 ) 

435 

436 # Set access permissions for the entire new range 

437 descs = self._build_access_descriptors(prop) 1a

438 if descs: 1a

439 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1a

440 raise_if_driver_error(res) 1a

441 

442 # All succeeded, cancel undo actions 

443 trans.commit() 1a

444 

445 # Free the old VA range (aligned previous size) 

446 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1a

447 raise_if_driver_error(res2) 1a

448 

449 # Invalidate the old buffer so its destructor won't try to free again 

450 buf._clear() 1a

451 

452 # Return a new Buffer for the new mapping 

453 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1a

454 

455 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list[driver.CUmemAccessDesc]: 

456 """ 

457 Build access descriptors for memory access permissions. 

458 

459 Returns 

460 ------- 

461 list 

462 List of CUmemAccessDesc objects for setting memory access 

463 """ 

464 descs = [] 1cdafb

465 

466 # Owner access 

467 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 1cdafb

468 if owner_flags: 1cdafb

469 d = driver.CUmemAccessDesc() 1cdafb

470 d.location.type = prop.location.type 1cdafb

471 d.location.id = prop.location.id 1cdafb

472 d.flags = owner_flags 1cdafb

473 descs.append(d) 1cdafb

474 

475 # Peer device access 

476 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 1cdafb

477 if peer_flags: 1cdafb

478 for peer_dev in self.config.peers: 1cdafb

479 d = driver.CUmemAccessDesc() 

480 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE 

481 d.location.id = int(peer_dev) 

482 d.flags = peer_flags 

483 descs.append(d) 

484 

485 return descs 1cdafb

486 

487 def allocate(self, size: int, *, stream: Stream | GraphBuilder | None = None) -> Buffer: 

488 """ 

489 Allocate a buffer of the given size using CUDA virtual memory. 

490 

491 Parameters 

492 ---------- 

493 size : int 

494 The size in bytes of the buffer to allocate. 

495 stream : Stream, optional 

496 Keyword-only. Unused because virtual memory operations are 

497 synchronous. 

498 

499 Returns 

500 ------- 

501 Buffer 

502 A Buffer object representing the allocated virtual memory. 

503 

504 Raises 

505 ------ 

506 CUDAError 

507 If any CUDA driver API call fails during allocation. 

508 

509 Notes 

510 ----- 

511 This method uses transactional allocation: if any step fails, all resources 

512 allocated so far are automatically cleaned up. The allocation is performed 

513 with the configured granularity, access permissions, and peer access as 

514 specified in the resource's configuration. 

515 """ 

516 if stream is not None: 1cdab

517 from cuda.core._stream import Stream_accept 

518 

519 Stream_accept(stream) 

520 

521 config = self.config 1cdab

522 # ---- Build allocation properties ---- 

523 prop = driver.CUmemAllocationProp() 1cdab

524 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 1cdab

525 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 1cdab

526 prop.location.id = self.device.device_id if self.device is not None else -1 1cdab

527 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 1cdab

528 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 1cdab

529 prop.win32HandleMetaData = 0 1cdab

530 

531 # ---- Query and apply granularity ---- 

532 # Choose min vs recommended granularity per config 

533 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 1cdab

534 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 1cdab

535 raise_if_driver_error(res) 1cdab

536 

537 aligned_size = VirtualMemoryResource._align_up(size, gran) 1cdab

538 addr_align = config.addr_align or gran 1cdab

539 

540 # ---- Transactional allocation ---- 

541 with Transaction() as trans: 1cdab

542 # ---- Create physical memory ---- 

543 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 1cdab

544 raise_if_driver_error(res) 1cdab

545 # Register undo for physical memory 

546 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1cdab

547 

548 # ---- Reserve VA space ---- 

549 # Potentially, use a separate size for the VA reservation from the physical allocation size 

550 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 1cdab

551 raise_if_driver_error(res) 1cdab

552 # Register undo for VA reservation 

553 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 1cdab

554 

555 # ---- Map physical memory into VA ---- 

556 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 1cdab

557 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 1cdab

558 raise_if_driver_error(res) 1cdab

559 

560 # ---- Set access for owner + peers ---- 

561 descs = self._build_access_descriptors(prop) 1cdab

562 if descs: 1cdab

563 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 1cdab

564 raise_if_driver_error(res) 1cdab

565 

566 trans.commit() 1cdab

567 

568 # Done — return a Buffer that tracks this VA range 

569 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 1cdab

570 return buf 1cdab

571 

572 def deallocate(self, ptr: DevicePointerType, size: int, *, stream: Stream | GraphBuilder | None = None) -> None: 

573 """ 

574 Deallocate memory on the device using CUDA VMM APIs. 

575 

576 Parameters 

577 ---------- 

578 ptr : DevicePointerType 

579 The pointer to the memory to deallocate. 

580 size : int 

581 The size in bytes of the memory to deallocate. 

582 stream : Stream, optional 

583 Keyword-only. Unused because virtual memory operations are 

584 synchronous. 

585 """ 

586 ptr = 0 if ptr is None else int(ptr) 1cdab

587 

588 if stream is not None: 1cdab

589 from cuda.core._stream import Stream_accept 1cdab

590 

591 Stream_accept(stream) 1cdab

592 result, handle = driver.cuMemRetainAllocationHandle(ptr) 1cdab

593 raise_if_driver_error(result) 1cdab

594 (result,) = driver.cuMemUnmap(ptr, size) 1cdab

595 raise_if_driver_error(result) 1cdab

596 (result,) = driver.cuMemAddressFree(ptr, size) 1cdab

597 raise_if_driver_error(result) 1cdab

598 (result,) = driver.cuMemRelease(handle) 1cdab

599 raise_if_driver_error(result) 1cdab

600 

601 @property 

602 def is_device_accessible(self) -> bool: 

603 """ 

604 Indicates whether the allocated memory is accessible from the device. 

605 """ 

606 return self.config.location_type == "device" 

607 

608 @property 

609 def is_host_accessible(self) -> bool: 

610 """ 

611 Indicates whether the allocated memory is accessible from the host. 

612 """ 

613 return self.config.location_type == "host" 

614 

615 @property 

616 def device_id(self) -> int: 

617 """ 

618 Get the device ID associated with this memory resource. 

619 

620 Returns: 

621 int: CUDA device ID. -1 if the memory resource allocates host memory 

622 """ 

623 return self.device.device_id if self.device is not None else -1 1cdb

624 

625 def __repr__(self) -> str: 

626 """ 

627 Return a string representation of the VirtualMemoryResource. 

628 

629 Returns: 

630 str: A string describing the object 

631 """ 

632 return f"<VirtualMemoryResource device={self.device}>"