Coverage for cuda/core/_memory/_virtual_memory

3# SPDX-License-Identifier: Apache-2.0

5from __future__ import annotations (empty)

7from dataclasses import dataclass, field (empty)

8from typing import TYPE_CHECKING, Iterable (empty)

10if TYPE_CHECKING: (empty)

11 from cuda.core._stream import Stream

13from cuda.core._device import Device (empty)

14from cuda.core._memory._buffer import Buffer, MemoryResource (empty)

15from cuda.core._utils.cuda_utils import ( (empty)

16 Transaction,

17 check_or_create_options,

18 driver,

19)

20from cuda.core._utils.cuda_utils import ( (empty)

21 _check_driver_error as raise_if_driver_error,

22)

23from cuda.core._utils.version import binding_version (empty)

24from cuda.core.typing import ( (empty)

25 VirtualMemoryAccessType,

26 VirtualMemoryAllocationType,

27 VirtualMemoryGranularityType,

28 VirtualMemoryHandleType,

29 VirtualMemoryLocationType,

30)

32__all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] (empty)

35@dataclass (empty)

36class VirtualMemoryResourceOptions: (empty)

37 """A configuration object for the VirtualMemoryResource

38 Stores configuration information which tells the resource how to use the CUDA VMM APIs

40 Attributes

41 ----------

42 allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str

43 Controls the type of allocation.

44 location_type: :obj:`~_memory.VirtualMemoryLocationType` | str

45 Controls the location of the allocation.

46 handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str

47 Export handle type for the physical allocation. Use

48 ``"posix_fd"`` on Linux if you plan to

49 import/export the allocation (required for cuMemRetainAllocationHandle).

50 Use `None` if you don't need an exportable handle.

51 gpu_direct_rdma: bool

52 Hint that the allocation should be GDR-capable (if supported).

53 granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str

54 Controls granularity query and size rounding.

55 addr_hint: int

56 A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide.

57 addr_align: int

58 Alignment for the VA reservation. If `None`, use the queried granularity.

59 peers: Iterable[int]

60 Extra device IDs that should be granted access in addition to ``device``.

61 self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str

62 Access flags for the owning device.

63 peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str

64 Access flags for peers.

65 """

67 allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED (empty)

68 location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE (empty)

69 handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD (empty)

70 granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED (empty)

71 gpu_direct_rdma: bool = False (empty)

72 addr_hint: int | None = 0 (empty)

73 addr_align: int | None = None (empty)

74 peers: Iterable[int] = field(default_factory=tuple) (empty)

75 self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE (empty)

76 peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE (empty)

78 _a = driver.CUmemAccess_flags (empty)

79 _access_flags = { # noqa: RUF012 (empty)

80 VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE,

81 VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ,

82 None: 0,

83 }

84 _h = driver.CUmemAllocationHandleType (empty)

85 _handle_types = { # noqa: RUF012 (empty)

86 None: _h.CU_MEM_HANDLE_TYPE_NONE,

87 VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,

88 VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT,

89 VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC,

90 }

91 _g = driver.CUmemAllocationGranularity_flags (empty)

92 _granularity = { # noqa: RUF012 2 ctx1fa

93 VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED,

94 VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM,

95 }

96 _l = driver.CUmemLocationType 2 ctx1fa

97 _location_type = { # noqa: RUF012 (empty)

98 VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE,

99 VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST,

100 VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA,

101 VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,

102 }

103 _t = driver.CUmemAllocationType (empty)

104 # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not

105 _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 (empty)

106 if binding_version() >= (13, 0, 0): (empty)

107 _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED (empty)

108

109 @staticmethod (empty)

110 def _access_to_flags(spec: str): (empty)

111 flags = VirtualMemoryResourceOptions._access_flags.get(spec) 5 ctx1deacb

112 if flags is None: 5 ctx1deacb

113 raise ValueError(f"Unknown access spec: {spec!r}")

114 return flags 5 ctx1deacb

115

116 @staticmethod (empty)

117 def _allocation_type_to_driver(spec: str): (empty)

118 alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec) 5 ctx1deacb

119 if alloc_type is None: 5 ctx1deacb

120 raise ValueError(f"Unsupported allocation_type: {spec!r}")

121 return alloc_type 5 ctx1deacb

122

123 @staticmethod (empty)

124 def _location_type_to_driver(spec: str): (empty)

125 loc_type = VirtualMemoryResourceOptions._location_type.get(spec) 5 ctx1deacb

126 if loc_type is None: 5 ctx1deacb

127 raise ValueError(f"Unsupported location_type: {spec!r}")

128 return loc_type 5 ctx1deacb

129

130 @staticmethod (empty)

131 def _handle_type_to_driver(spec: str): (empty)

132 if spec == "win32": 5 ctx1deacb

133 raise NotImplementedError("win32 is currently not supported, please reach out to the CUDA Python team")

134 handle_type = VirtualMemoryResourceOptions._handle_types.get(spec) 5 ctx1deacb

135 if handle_type is None: 5 ctx1deacb

136 raise ValueError(f"Unsupported handle_type: {spec!r}")

137 return handle_type 5 ctx1deacb

138

139 @staticmethod (empty)

140 def _granularity_to_driver(spec: str): (empty)

141 granularity = VirtualMemoryResourceOptions._granularity.get(spec) 5 ctx1deacb

142 if granularity is None: 5 ctx1deacb

143 raise ValueError(f"Unsupported granularity: {spec!r}")

144 return granularity 5 ctx1deacb

145

146

147class VirtualMemoryResource(MemoryResource): (empty)

148 """Create a device memory resource that uses the CUDA VMM APIs to allocate memory.

149

150 Parameters

151 ----------

152 device_id : Device | int

153 Device for which a memory resource is constructed.

154

155 config : VirtualMemoryResourceOptions

156 A configuration object for the VirtualMemoryResource

157

158

159 Warning

160 -------

161 This is a low-level API that is provided only for convenience. Make sure you fully understand

162 how CUDA Virtual Memory Management works before using this. Other MemoryResource subclasses

163 in cuda.core should already meet the common needs.

164 """

165

166 def __init__(self, device_id: Device | int, config: VirtualMemoryResourceOptions = None): (empty)

167 self.device = Device(device_id) 5 ctx1deacb

168 self.config = check_or_create_options( 5 ctx1deacb

169 VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False

170 )

171 # Matches ("host", "host_numa", "host_numa_current")

172 if "host" in self.config.location_type: 5 ctx1deacb

173 self.device = None

174

175 if not self.device and self.config.location_type == "device": 5 ctx1deacb

176 raise RuntimeError("VirtualMemoryResource requires a device for device memory allocations")

177

178 if self.device and not self.device.properties.virtual_memory_management_supported: 5 ctx1deacb

179 raise RuntimeError("VirtualMemoryResource requires CUDA VMM API support")

180

181 # Validate RDMA support if requested

182 if (

183 self.config.gpu_direct_rdma

184 and self.device is not None

185 and not self.device.properties.gpu_direct_rdma_supported

186 ):

187 raise RuntimeError("GPU Direct RDMA is not supported on this device")

188

189 @staticmethod (empty)

190 def _align_up(size: int, gran: int) -> int: (empty)

191 """

192 Align a size up to the nearest multiple of a granularity.

193 """

194 return (size + gran - 1) & ~(gran - 1) 5 ctx1deacb

195

196 def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer: (empty)

197 """

198 Grow an existing allocation using CUDA VMM, with a configurable policy.

199

200 This implements true growing allocations that preserve the base pointer

201 by extending the virtual address range and mapping additional physical memory.

202

203 This function uses transactional allocation: if any step fails, the original buffer is not modified and

204 all steps the function took are rolled back so a new allocation is not created.

205

206 Parameters

207 ----------

208 buf : Buffer

209 The existing buffer to grow

210 new_size : int

211 The new total size for the allocation

212 config : VirtualMemoryResourceOptions, optional

213 Configuration for the new physical memory chunks. If None, uses current config.

214

215 Returns

216 -------

217 Buffer

218 The same buffer with updated size and properties, preserving the original pointer

219 """

220 if config is not None: 2 ctx1ab

221 self.config = config 1 ctx1b

222

223 # Build allocation properties for new chunks

224 prop = driver.CUmemAllocationProp() 2 ctx1ab

225 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type) 2 ctx1ab

226 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type) 2 ctx1ab

227 prop.location.id = self.device.device_id 2 ctx1ab

228 prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0 2 ctx1ab

229 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type) 2 ctx1ab

230 prop.win32HandleMetaData = 0 2 ctx1ab

231

232 # Query granularity

233 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity) 2 ctx1ab

234 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 2 ctx1ab

235 raise_if_driver_error(res) 2 ctx1ab

236

237 # Calculate sizes

238 additional_size = new_size - buf.size 2 ctx1ab

239 if additional_size <= 0: 2 ctx1ab

240 # Same size: only update access policy if needed; avoid zero-sized driver calls

241 descs = self._build_access_descriptors(prop) 2 ctx1ab

242 if descs: 2 ctx1ab

243 (res,) = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs)) 2 ctx1ab

244 raise_if_driver_error(res) 2 ctx1ab

245 return buf 2 ctx1ab

246

247 aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran) 1 ctx1a

248 total_aligned_size = VirtualMemoryResource._align_up(new_size, gran) 1 ctx1a

249 aligned_prev_size = total_aligned_size - aligned_additional_size 1 ctx1a

250 addr_align = self.config.addr_align or gran 1 ctx1a

251

252 # Try to extend the existing VA range first

253 res, new_ptr = driver.cuMemAddressReserve( 1 ctx1a

254 aligned_additional_size,

255 addr_align,

256 int(buf.handle) + aligned_prev_size, # fixedAddr hint - aligned end of current range

257 0,

258 )

259

260 if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size): 1 ctx1a

261 # Check for specific errors that are not recoverable with the slow path

262 if res in ( 1 ctx1a

263 driver.CUresult.CUDA_ERROR_INVALID_VALUE,

264 driver.CUresult.CUDA_ERROR_NOT_PERMITTED,

265 driver.CUresult.CUDA_ERROR_NOT_INITIALIZED,

266 driver.CUresult.CUDA_ERROR_NOT_SUPPORTED,

267 ):

268 raise_if_driver_error(res)

269 (res2,) = driver.cuMemAddressFree(new_ptr, aligned_additional_size) 1 ctx1a

270 raise_if_driver_error(res2) 1 ctx1a

271 # Fallback: couldn't extend contiguously, need full remapping

272 return self._grow_allocation_slow_path( 1 ctx1a

273 buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align

274 )

275 else:

276 # Success! We can extend the VA range contiguously

277 return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr)

278

279 def _grow_allocation_fast_path( (empty)

280 self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp, aligned_additional_size: int, new_ptr: int

281 ) -> Buffer:

282 """

283 Fast path for growing a virtual memory allocation when the new region can be

284 reserved contiguously after the existing buffer.

285

286 This function creates and maps new physical memory for the additional size,

287 sets access permissions, and updates the buffer size in place (the pointer

288 remains unchanged).

289

290 Args:

291 buf (Buffer):

292 The buffer to grow.

293

294 new_size (int):

295 The new total size in bytes.

296

297 prop (driver.CUmemAllocationProp):

298 Allocation properties for the new memory.

299

300 aligned_additional_size (int):

301 The size of the new region to allocate, aligned to granularity.

302

303 new_ptr (int):

304 The address of the newly reserved contiguous VA region (should

305 be at the end of the current buffer).

306

307 Returns:

308 Buffer: The same buffer object with its size updated to `new_size`.

309 """

310 with Transaction() as trans: 1 ctx1c

311 # Create new physical memory for the additional size

312 trans.append( 1 ctx1c

313 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])

314 )

315 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1 ctx1c

316 raise_if_driver_error(res) 1 ctx1c

317 # Register undo for creation

318 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1 ctx1c

319

320 # Map the new physical memory to the extended VA range

321 (res,) = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0) 1 ctx1c

322 raise_if_driver_error(res) 1 ctx1c

323 # Register undo for mapping

324 trans.append( 1 ctx1c

325 lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])

326 )

327

328 # Set access permissions for the new portion

329 descs = self._build_access_descriptors(prop) 1 ctx1c

330 if descs: 1 ctx1c

331 (res,) = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs)) 1 ctx1c

332 raise_if_driver_error(res) 1 ctx1c

333

334 # All succeeded, cancel undo actions

335 trans.commit() 1 ctx1c

336

337 # Update the buffer size (pointer stays the same)

338 buf._size = new_size 1 ctx1c

339 return buf 1 ctx1c

340

341 def _grow_allocation_slow_path( (empty)

342 self,

343 buf: Buffer,

344 new_size: int,

345 prop: driver.CUmemAllocationProp,

346 aligned_additional_size: int,

347 total_aligned_size: int,

348 addr_align: int,

349 ) -> Buffer:

350 """

351 Slow path for growing a virtual memory allocation when the new region cannot be

352 reserved contiguously after the existing buffer.

353

354 This function reserves a new, larger virtual address (VA) range, remaps the old

355 physical memory to the beginning of the new VA range, creates and maps new physical

356 memory for the additional size, sets access permissions, and updates the buffer's

357 pointer and size.

358

359 Args:

360 buf (Buffer): The buffer to grow.

361 new_size (int): The new total size in bytes.

362 prop (driver.CUmemAllocationProp): Allocation properties for the new memory.

363 aligned_additional_size (int): The size of the new region to allocate, aligned to granularity.

364 total_aligned_size (int): The total new size to reserve, aligned to granularity.

365 addr_align (int): The required address alignment for the new VA range.

366

367 Returns:

368 Buffer: The buffer object updated with the new pointer and size.

369 """

370 with Transaction() as trans: 1 ctx1a

371 # Reserve a completely new, larger VA range

372 res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0) 1 ctx1a

373 raise_if_driver_error(res) 1 ctx1a

374 # Register undo for VA reservation

375 trans.append( 1 ctx1a

376 lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0])

377 )

378

379 # Get the old allocation handle for remapping

380 result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle) 1 ctx1a

381 raise_if_driver_error(result) 1 ctx1a

382 # Register undo for old_handle

383 trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1 ctx1a

384

385 # Unmap the old VA range (aligned previous size)

386 aligned_prev_size = total_aligned_size - aligned_additional_size 1 ctx1a

387 (result,) = driver.cuMemUnmap(int(buf.handle), aligned_prev_size) 1 ctx1a

388 raise_if_driver_error(result) 1 ctx1a

389

390 def _remap_old(): 1 ctx1a

391 # Try to remap the old physical memory back to the original VA range

392 try:

393 (res,) = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0)

394 raise_if_driver_error(res)

395 except Exception: # noqa: S110

396 # TODO: consider logging this exception

397 pass

398

399 trans.append(_remap_old) 1 ctx1a

400

401 # Remap the old physical memory to the new VA range (aligned previous size)

402 (res,) = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0) 1 ctx1a

403 raise_if_driver_error(res) 1 ctx1a

404

405 # Register undo for mapping

406 trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0])) 1 ctx1a

407

408 # Create new physical memory for the additional size

409 res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0) 1 ctx1a

410 raise_if_driver_error(res) 1 ctx1a

411

412 # Register undo for new physical memory

413 trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 1 ctx1a

414

415 # Map the new physical memory to the extended portion (aligned offset)

416 (res,) = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0) 1 ctx1a

417 raise_if_driver_error(res) 1 ctx1a

418

419 # Register undo for mapping

420 trans.append( 1 ctx1a

421 lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error(

422 driver.cuMemUnmap(base + offs, s)[0]

423 )

424 )

425

426 # Set access permissions for the entire new range

427 descs = self._build_access_descriptors(prop) 1 ctx1a

428 if descs: 1 ctx1a

429 (res,) = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs)) 1 ctx1a

430 raise_if_driver_error(res) 1 ctx1a

431

432 # All succeeded, cancel undo actions

433 trans.commit() 1 ctx1a

434

435 # Free the old VA range (aligned previous size)

436 (res2,) = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size) 1 ctx1a

437 raise_if_driver_error(res2) 1 ctx1a

438

439 # Invalidate the old buffer so its destructor won't try to free again

440 buf._clear() 1 ctx1a

441

442 # Return a new Buffer for the new mapping

443 return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self) 1 ctx1a

444

445 def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list: (empty)

446 """

447 Build access descriptors for memory access permissions.

448

449 Returns

450 -------

451 list

452 List of CUmemAccessDesc objects for setting memory access

453 """

454 descs = [] 5 ctx1deacb

455

456 # Owner access

457 owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access) 5 ctx1deacb

458 if owner_flags: 5 ctx1deacb

459 d = driver.CUmemAccessDesc() 5 ctx1deacb

460 d.location.type = prop.location.type 5 ctx1deacb

461 d.location.id = prop.location.id 5 ctx1deacb

462 d.flags = owner_flags 5 ctx1deacb

463 descs.append(d) 5 ctx1deacb

464

465 # Peer device access

466 peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access) 5 ctx1deacb

467 if peer_flags: 5 ctx1deacb

468 for peer_dev in self.config.peers: 5 ctx1deacb

469 d = driver.CUmemAccessDesc()

470 d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE

471 d.location.id = int(peer_dev)

472 d.flags = peer_flags

473 descs.append(d)

474

475 return descs 5 ctx1deacb

476

477 def allocate(self, size: int, *, stream: Stream | None = None) -> Buffer: (empty)

478 """

479 Allocate a buffer of the given size using CUDA virtual memory.

480

481 Parameters

482 ----------

483 size : int

484 The size in bytes of the buffer to allocate.

485 stream : Stream, optional

486 Keyword-only. Unused because virtual memory operations are

487 synchronous.

488

489 Returns

490 -------

491 Buffer

492 A Buffer object representing the allocated virtual memory.

493

494 Raises

495 ------

496 CUDAError

497 If any CUDA driver API call fails during allocation.

498

499 Notes

500 -----

501 This method uses transactional allocation: if any step fails, all resources

502 allocated so far are automatically cleaned up. The allocation is performed

503 with the configured granularity, access permissions, and peer access as

504 specified in the resource's configuration.

505 """

506 if stream is not None: 5 ctx1deacb

507 from cuda.core._stream import Stream_accept

508

509 Stream_accept(stream)

510

511 config = self.config 5 ctx1deacb

512 # ---- Build allocation properties ----

513 prop = driver.CUmemAllocationProp() 5 ctx1deacb

514 prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type) 5 ctx1deacb

515 prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type) 5 ctx1deacb

516 prop.location.id = self.device.device_id if config.location_type == "device" else -1 5 ctx1deacb

517 prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0 5 ctx1deacb

518 prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type) 5 ctx1deacb

519 prop.win32HandleMetaData = 0 5 ctx1deacb

520

521 # ---- Query and apply granularity ----

522 # Choose min vs recommended granularity per config

523 gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity) 5 ctx1deacb

524 res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag) 5 ctx1deacb

525 raise_if_driver_error(res) 5 ctx1deacb

526

527 aligned_size = VirtualMemoryResource._align_up(size, gran) 5 ctx1deacb

528 addr_align = config.addr_align or gran 5 ctx1deacb

529

530 # ---- Transactional allocation ----

531 with Transaction() as trans: 5 ctx1deacb

532 # ---- Create physical memory ----

533 res, handle = driver.cuMemCreate(aligned_size, prop, 0) 5 ctx1deacb

534 raise_if_driver_error(res) 5 ctx1deacb

535 # Register undo for physical memory

536 trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0])) 5 ctx1deacb

537

538 # ---- Reserve VA space ----

539 # Potentially, use a separate size for the VA reservation from the physical allocation size

540 res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0) 5 ctx1deacb

541 raise_if_driver_error(res) 5 ctx1deacb

542 # Register undo for VA reservation

543 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0])) 5 ctx1deacb

544

545 # ---- Map physical memory into VA ----

546 (res,) = driver.cuMemMap(ptr, aligned_size, 0, handle, 0) 5 ctx1deacb

547 trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0])) 5 ctx1deacb

548 raise_if_driver_error(res) 5 ctx1deacb

549

550 # ---- Set access for owner + peers ----

551 descs = self._build_access_descriptors(prop) 5 ctx1deacb

552 if descs: 5 ctx1deacb

553 (res,) = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs)) 5 ctx1deacb

554 raise_if_driver_error(res) 5 ctx1deacb

555

556 trans.commit() 5 ctx1deacb

557

558 # Done — return a Buffer that tracks this VA range

559 buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self) 5 ctx1deacb

560 return buf 5 ctx1deacb

561

562 def deallocate(self, ptr: int, size: int, *, stream: Stream | None = None) -> None: (empty)

563 """

564 Deallocate memory on the device using CUDA VMM APIs.

565

566 Parameters

567 ----------

568 ptr : int

569 The pointer to the memory to deallocate.

570 size : int

571 The size in bytes of the memory to deallocate.

572 stream : Stream, optional

573 Keyword-only. Unused because virtual memory operations are

574 synchronous.

575 """

576 if stream is not None: 5 ctx1deacb

577 from cuda.core._stream import Stream_accept 5 ctx1deacb

578

579 Stream_accept(stream) 5 ctx1deacb

580 result, handle = driver.cuMemRetainAllocationHandle(ptr) 5 ctx1deacb

581 raise_if_driver_error(result) 5 ctx1deacb

582 (result,) = driver.cuMemUnmap(ptr, size) 4 ctx1deab

583 raise_if_driver_error(result) 4 ctx1deab

584 (result,) = driver.cuMemAddressFree(ptr, size) 4 ctx1deab

585 raise_if_driver_error(result) 4 ctx1deab

586 (result,) = driver.cuMemRelease(handle) 4 ctx1deab

587 raise_if_driver_error(result) 4 ctx1deab

588

589 @property (empty)

590 def is_device_accessible(self) -> bool: (empty)

591 """

592 Indicates whether the allocated memory is accessible from the device.

593 """

594 return self.config.location_type == "device"

595

596 @property (empty)

597 def is_host_accessible(self) -> bool: (empty)

598 """

599 Indicates whether the allocated memory is accessible from the host.

600 """

601 return self.config.location_type == "host"

602

603 @property (empty)

604 def device_id(self) -> int: (empty)

605 """

606 Get the device ID associated with this memory resource.

607

608 Returns:

609 int: CUDA device ID. -1 if the memory resource allocates host memory

610 """

611 return self.device.device_id if self.config.location_type == "device" else -1 3 ctx1deb

612

613 def __repr__(self) -> str: (empty)

614 """

615 Return a string representation of the VirtualMemoryResource.

616

617 Returns:

618 str: A string describing the object

619 """

620 return f"<VirtualMemoryResource device={self.device}>"

Coverage for cuda / core / _memory / _virtual_memory_resource.py: 88.75%

240 statements