Coverage for cuda/core/experimental/

3# SPDX-License-Identifier: Apache-2.0

5cimport cpython

6from libc.stdint cimport uintptr_t

8from cuda.bindings cimport cydriver

9from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN

11import threading

12from typing import Optional, TYPE_CHECKING, Union

14from cuda.core.experimental._context import Context, ContextOptions

15from cuda.core.experimental._event import Event, EventOptions

16from cuda.core.experimental._graph import GraphBuilder

17from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions

18from cuda.core.experimental._utils.clear_error_support import assert_type

19from cuda.core.experimental._utils.cuda_utils import (

20 ComputeCapability,

21 CUDAError,

22 driver,

23 handle_return,

24 runtime,

25)

26from cuda.core.experimental._stream cimport default_stream

28if TYPE_CHECKING:

29 from cuda.core.experimental._memory import Buffer, MemoryResource

31# TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python,

32# but it seems it is very convenient to expose them for testing purposes...

33_tls = threading.local()

34_lock = threading.Lock()

35cdef bint _is_cuInit = False

38cdef class DeviceProperties:

39 """

40 A class to query various attributes of a CUDA device.

42 Attributes are read-only and provide information about the device.

43 """

44 cdef:

45 int _handle

46 dict _cache

48 def __init__(self, *args, **kwargs):

49 raise RuntimeError("DeviceProperties cannot be instantiated directly. Please use Device APIs.")

51 @classmethod

52 def _init(cls, handle):

53 cdef DeviceProperties self = DeviceProperties.__new__(cls)

54 self._handle = handle

55 self._cache = {}

56 return self

58 cdef inline _get_attribute(self, cydriver.CUdevice_attribute attr):

59 """Retrieve the attribute value directly from the driver."""

60 cdef int val

61 with nogil:

62 HANDLE_RETURN(cydriver.cuDeviceGetAttribute(&val, attr, self._handle))

63 return val

65 cdef _get_cached_attribute(self, attr):

66 """Retrieve the attribute value, using cache if applicable."""

67 if attr not in self._cache:

68 self._cache[attr] = self._get_attribute(attr)

69 return self._cache[attr]

71 @property

72 def max_threads_per_block(self) -> int:

73 """int: Maximum number of threads per block."""

74 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)

76 @property

77 def max_block_dim_x(self) -> int:

78 """int: Maximum block dimension X."""

79 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)

81 @property

82 def max_block_dim_y(self) -> int:

83 """int: Maximum block dimension Y."""

84 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y)

86 @property

87 def max_block_dim_z(self) -> int:

88 """int: Maximum block dimension Z."""

89 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z)

91 @property

92 def max_grid_dim_x(self) -> int:

93 """int: Maximum grid dimension X."""

94 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X)

96 @property

97 def max_grid_dim_y(self) -> int:

98 """int: Maximum grid dimension Y."""

99 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y)

100

101 @property

102 def max_grid_dim_z(self) -> int:

103 """int: Maximum grid dimension Z."""

104 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z)

105

106 @property

107 def max_shared_memory_per_block(self) -> int:

108 """int: Maximum shared memory available per block in bytes."""

109 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK)

110

111 @property

112 def total_constant_memory(self) -> int:

113 """int: Memory available on device for constant variables in a CUDA C kernel in bytes."""

114 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY)

115

116 @property

117 def warp_size(self) -> int:

118 """int: Warp size in threads."""

119 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE)

120

121 @property

122 def max_pitch(self) -> int:

123 """int: Maximum pitch in bytes allowed by memory copies."""

124 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH)

125

126 @property

127 def maximum_texture1d_width(self) -> int:

128 """int: Maximum 1D texture width."""

129 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH)

130

131 @property

132 def maximum_texture1d_linear_width(self) -> int:

133 """int: Maximum width for a 1D texture bound to linear memory."""

134 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH)

135

136 @property

137 def maximum_texture1d_mipmapped_width(self) -> int:

138 """int: Maximum mipmapped 1D texture width."""

139 return self._get_cached_attribute(

140 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH

141 )

142

143 @property

144 def maximum_texture2d_width(self) -> int:

145 """int: Maximum 2D texture width."""

146 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH)

147

148 @property

149 def maximum_texture2d_height(self) -> int:

150 """int: Maximum 2D texture height."""

151 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT)

152

153 @property

154 def maximum_texture2d_linear_width(self) -> int:

155 """int: Maximum width for a 2D texture bound to linear memory."""

156 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH)

157

158 @property

159 def maximum_texture2d_linear_height(self) -> int:

160 """int: Maximum height for a 2D texture bound to linear memory."""

161 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT)

162

163 @property

164 def maximum_texture2d_linear_pitch(self) -> int:

165 """int: Maximum pitch in bytes for a 2D texture bound to linear memory."""

166 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH)

167

168 @property

169 def maximum_texture2d_mipmapped_width(self) -> int:

170 """int: Maximum mipmapped 2D texture width."""

171 return self._get_cached_attribute(

172 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH

173 )

174

175 @property

176 def maximum_texture2d_mipmapped_height(self) -> int:

177 """int: Maximum mipmapped 2D texture height."""

178 return self._get_cached_attribute(

179 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT

180 )

181

182 @property

183 def maximum_texture3d_width(self) -> int:

184 """int: Maximum 3D texture width."""

185 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH)

186

187 @property

188 def maximum_texture3d_height(self) -> int:

189 """int: Maximum 3D texture height."""

190 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT)

191

192 @property

193 def maximum_texture3d_depth(self) -> int:

194 """int: Maximum 3D texture depth."""

195 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH)

196

197 @property

198 def maximum_texture3d_width_alternate(self) -> int:

199 """int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported."""

200 return self._get_cached_attribute(

201 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE

202 )

203

204 @property

205 def maximum_texture3d_height_alternate(self) -> int:

206 """int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported."""

207 return self._get_cached_attribute(

208 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE

209 )

210

211 @property

212 def maximum_texture3d_depth_alternate(self) -> int:

213 """int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported."""

214 return self._get_cached_attribute(

215 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE

216 )

217

218 @property

219 def maximum_texturecubemap_width(self) -> int:

220 """int: Maximum cubemap texture width or height."""

221 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH)

222

223 @property

224 def maximum_texture1d_layered_width(self) -> int:

225 """int: Maximum 1D layered texture width."""

226 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH)

227

228 @property

229 def maximum_texture1d_layered_layers(self) -> int:

230 """int: Maximum layers in a 1D layered texture."""

231 return self._get_cached_attribute(

232 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS

233 )

234

235 @property

236 def maximum_texture2d_layered_width(self) -> int:

237 """int: Maximum 2D layered texture width."""

238 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH)

239

240 @property

241 def maximum_texture2d_layered_height(self) -> int:

242 """int: Maximum 2D layered texture height."""

243 return self._get_cached_attribute(

244 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT

245 )

246

247 @property

248 def maximum_texture2d_layered_layers(self) -> int:

249 """int: Maximum layers in a 2D layered texture."""

250 return self._get_cached_attribute(

251 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS

252 )

253

254 @property

255 def maximum_texturecubemap_layered_width(self) -> int:

256 """int: Maximum cubemap layered texture width or height."""

257 return self._get_cached_attribute(

258 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH

259 )

260

261 @property

262 def maximum_texturecubemap_layered_layers(self) -> int:

263 """int: Maximum layers in a cubemap layered texture."""

264 return self._get_cached_attribute(

265 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS

266 )

267

268 @property

269 def maximum_surface1d_width(self) -> int:

270 """int: Maximum 1D surface width."""

271 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH)

272

273 @property

274 def maximum_surface2d_width(self) -> int:

275 """int: Maximum 2D surface width."""

276 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH)

277

278 @property

279 def maximum_surface2d_height(self) -> int:

280 """int: Maximum 2D surface height."""

281 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT)

282

283 @property

284 def maximum_surface3d_width(self) -> int:

285 """int: Maximum 3D surface width."""

286 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH)

287

288 @property

289 def maximum_surface3d_height(self) -> int:

290 """int: Maximum 3D surface height."""

291 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT)

292

293 @property

294 def maximum_surface3d_depth(self) -> int:

295 """int: Maximum 3D surface depth."""

296 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH)

297

298 @property

299 def maximum_surface1d_layered_width(self) -> int:

300 """int: Maximum 1D layered surface width."""

301 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH)

302

303 @property

304 def maximum_surface1d_layered_layers(self) -> int:

305 """int: Maximum layers in a 1D layered surface."""

306 return self._get_cached_attribute(

307 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS

308 )

309

310 @property

311 def maximum_surface2d_layered_width(self) -> int:

312 """int: Maximum 2D layered surface width."""

313 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH)

314

315 @property

316 def maximum_surface2d_layered_height(self) -> int:

317 """int: Maximum 2D layered surface height."""

318 return self._get_cached_attribute(

319 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT

320 )

321

322 @property

323 def maximum_surface2d_layered_layers(self) -> int:

324 """int: Maximum layers in a 2D layered surface."""

325 return self._get_cached_attribute(

326 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS

327 )

328

329 @property

330 def maximum_surfacecubemap_width(self) -> int:

331 """int: Maximum cubemap surface width."""

332 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH)

333

334 @property

335 def maximum_surfacecubemap_layered_width(self) -> int:

336 """int: Maximum cubemap layered surface width."""

337 return self._get_cached_attribute(

338 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH

339 )

340

341 @property

342 def maximum_surfacecubemap_layered_layers(self) -> int:

343 """int: Maximum layers in a cubemap layered surface."""

344 return self._get_cached_attribute(

345 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS

346 )

347

348 @property

349 def max_registers_per_block(self) -> int:

350 """int: Maximum number of 32-bit registers available to a thread block."""

351 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK)

352

353 @property

354 def clock_rate(self) -> int:

355 """int: Typical clock frequency in kilohertz."""

356 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE)

357

358 @property

359 def texture_alignment(self) -> int:

360 """int: Alignment requirement for textures."""

361 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT)

362

363 @property

364 def texture_pitch_alignment(self) -> int:

365 """int: Pitch alignment requirement for textures."""

366 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT)

367

368 @property

369 def gpu_overlap(self) -> bool:

370 """bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead async_engine_count."""

371 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP))

372

373 @property

374 def multiprocessor_count(self) -> int:

375 """int: Number of multiprocessors on device."""

376 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)

377

378 @property

379 def kernel_exec_timeout(self) -> bool:

380 """bool: Specifies whether there is a run time limit on kernels."""

381 return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT))

382

383 @property

384 def integrated(self) -> bool:

385 """bool: Device is integrated with host memory."""

386 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED))

387

388 @property

389 def can_map_host_memory(self) -> bool:

390 """bool: Device can map host memory into CUDA address space."""

391 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY))

392

393 @property

394 def compute_mode(self) -> int:

395 """int: Compute mode (See CUcomputemode for details)."""

396 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE)

397

398 @property

399 def concurrent_kernels(self) -> bool:

400 """bool: Device can possibly execute multiple kernels concurrently."""

401 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS))

402

403 @property

404 def ecc_enabled(self) -> bool:

405 """bool: Device has ECC support enabled."""

406 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED))

407

408 @property

409 def pci_bus_id(self) -> int:

410 """int: PCI bus ID of the device."""

411 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID)

412

413 @property

414 def pci_device_id(self) -> int:

415 """int: PCI device ID of the device."""

416 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID)

417

418 @property

419 def pci_domain_id(self) -> int:

420 """int: PCI domain ID of the device."""

421 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID)

422

423 @property

424 def tcc_driver(self) -> bool:

425 """bool: Device is using TCC driver model."""

426 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER))

427

428 @property

429 def memory_clock_rate(self) -> int:

430 """int: Peak memory clock frequency in kilohertz."""

431 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE)

432

433 @property

434 def global_memory_bus_width(self) -> int:

435 """int: Global memory bus width in bits."""

436 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH)

437

438 @property

439 def l2_cache_size(self) -> int:

440 """int: Size of L2 cache in bytes."""

441 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE)

442

443 @property

444 def max_threads_per_multiprocessor(self) -> int:

445 """int: Maximum resident threads per multiprocessor."""

446 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR)

447

448 @property

449 def unified_addressing(self) -> bool:

450 """bool: Device shares a unified address space with the host."""

451 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING))

452

453 @property

454 def compute_capability_major(self) -> int:

455 """int: Major compute capability version number."""

456 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR)

457

458 @property

459 def compute_capability_minor(self) -> int:

460 """int: Minor compute capability version number."""

461 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)

462

463 @property

464 def global_l1_cache_supported(self) -> bool:

465 """bool: Device supports caching globals in L1."""

466 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED))

467

468 @property

469 def local_l1_cache_supported(self) -> bool:

470 """bool: Device supports caching locals in L1."""

471 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED))

472

473 @property

474 def max_shared_memory_per_multiprocessor(self) -> int:

475 """int: Maximum shared memory available per multiprocessor in bytes."""

476 return self._get_cached_attribute(

477 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR

478 )

479

480 @property

481 def max_registers_per_multiprocessor(self) -> int:

482 """int: Maximum number of 32-bit registers available per multiprocessor."""

483 return self._get_cached_attribute(

484 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR

485 )

486

487 @property

488 def managed_memory(self) -> bool:

489 """bool: Device can allocate managed memory on this system."""

490 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY))

491

492 @property

493 def multi_gpu_board(self) -> bool:

494 """bool: Device is on a multi-GPU board."""

495 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD))

496

497 @property

498 def multi_gpu_board_group_id(self) -> int:

499 """int: Unique id for a group of devices on the same multi-GPU board."""

500 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID)

501

502 @property

503 def host_native_atomic_supported(self) -> bool:

504 """bool: Link between the device and the host supports all native atomic operations."""

505 return bool(

506 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED)

507 )

508

509 @property

510 def single_to_double_precision_perf_ratio(self) -> int:

511 """int: Ratio of single precision performance (in floating-point operations per second) to double precision performance."""

512 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO)

513

514 @property

515 def pageable_memory_access(self) -> bool:

516 """bool: Device supports coherently accessing pageable memory without calling cudaHostRegister on it."""

517 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS))

518

519 @property

520 def concurrent_managed_access(self) -> bool:

521 """bool: Device can coherently access managed memory concurrently with the CPU."""

522 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS))

523

524 @property

525 def compute_preemption_supported(self) -> bool:

526 """bool: Device supports compute preemption."""

527 return bool(

528 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED)

529 )

530

531 @property

532 def can_use_host_pointer_for_registered_mem(self) -> bool:

533 """bool: Device can access host registered memory at the same virtual address as the CPU."""

534 return bool(

535 self._get_cached_attribute(

536 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM

537 )

538 )

539

540 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675)

541

542 @property

543 def cooperative_launch(self) -> bool:

544 """bool: Device supports launching cooperative kernels via cuLaunchCooperativeKernel."""

545 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH))

546

547 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675)

548

549 @property

550 def max_shared_memory_per_block_optin(self) -> int:

551 """int: Maximum optin shared memory per block."""

552 return self._get_cached_attribute(

553 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN

554 )

555

556 @property

557 def pageable_memory_access_uses_host_page_tables(self) -> bool:

558 """bool: Device accesses pageable memory via the host's page tables."""

559 return bool(

560 self._get_cached_attribute(

561 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES

562 )

563 )

564

565 @property

566 def direct_managed_mem_access_from_host(self) -> bool:

567 """bool: The host can directly access managed memory on the device without migration."""

568 return bool(

569 self._get_cached_attribute(

570 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST

571 )

572 )

573

574 @property

575 def virtual_memory_management_supported(self) -> bool:

576 """bool: Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs."""

577 return bool(

578 self._get_cached_attribute(

579 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED

580 )

581 )

582

583 @property

584 def handle_type_posix_file_descriptor_supported(self) -> bool:

585 """bool: Device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, if requested via cuMemCreate."""

586 return bool(

587 self._get_cached_attribute(

588 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED

589 )

590 )

591

592 @property

593 def handle_type_win32_handle_supported(self) -> bool:

594 """bool: Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, if requested via cuMemCreate."""

595 return bool(

596 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED)

597 )

598

599 @property

600 def handle_type_win32_kmt_handle_supported(self) -> bool:

601 """bool: Device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, if requested via cuMemCreate."""

602 return bool(

603 self._get_cached_attribute(

604 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED

605 )

606 )

607

608 @property

609 def max_blocks_per_multiprocessor(self) -> int:

610 """int: Maximum number of blocks per multiprocessor."""

611 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR)

612

613 @property

614 def generic_compression_supported(self) -> bool:

615 """bool: Device supports compression of memory."""

616 return bool(

617 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED)

618 )

619

620 @property

621 def max_persisting_l2_cache_size(self) -> int:

622 """int: Maximum L2 persisting lines capacity setting in bytes."""

623 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE)

624

625 @property

626 def max_access_policy_window_size(self) -> int:

627 """int: Maximum value of CUaccessPolicyWindow.num_bytes."""

628 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE)

629

630 @property

631 def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool:

632 """bool: Device supports specifying the GPUDirect RDMA flag with cuMemCreate."""

633 return bool(

634 self._get_cached_attribute(

635 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED

636 )

637 )

638

639 @property

640 def reserved_shared_memory_per_block(self) -> int:

641 """int: Shared memory reserved by CUDA driver per block in bytes."""

642 return self._get_cached_attribute(

643 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK

644 )

645

646 @property

647 def sparse_cuda_array_supported(self) -> bool:

648 """bool: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays."""

649 return bool(

650 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED)

651 )

652

653 @property

654 def read_only_host_register_supported(self) -> bool:

655 """bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not."""

656 return bool(

657 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED)

658 )

659

660 @property

661 def memory_pools_supported(self) -> bool:

662 """bool: Device supports using the cuMemAllocAsync and cuMemPool family of APIs."""

663 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED))

664

665 @property

666 def gpu_direct_rdma_supported(self) -> bool:

667 """bool: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)."""

668 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED))

669

670 @property

671 def gpu_direct_rdma_flush_writes_options(self) -> int:

672 """int: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum."""

673 return self._get_cached_attribute(

674 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS

675 )

676

677 @property

678 def gpu_direct_rdma_writes_ordering(self) -> int:

679 """int: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical values returned here."""

680 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING)

681

682 @property

683 def mempool_supported_handle_types(self) -> int:

684 """int: Handle types supported with mempool based IPC."""

685 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)

686

687 @property

688 def deferred_mapping_cuda_array_supported(self) -> bool:

689 """bool: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays."""

690 return bool(

691 self._get_cached_attribute(

692 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED

693 )

694 )

695

696 @property

697 def numa_config(self) -> int:

698 """int: NUMA configuration of a device: value is of type CUdeviceNumaConfig enum."""

699 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG)

700

701 @property

702 def numa_id(self) -> int:

703 """int: NUMA node ID of the GPU memory."""

704 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID)

705

706 @property

707 def multicast_supported(self) -> bool:

708 """bool: Device supports switch multicast and reduction operations."""

709 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED))

710

711 @property

712 def surface_alignment(self) -> int:

713 """int: Surface alignment requirement in bytes."""

714 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT)

715

716 @property

717 def async_engine_count(self) -> int:

718 """int: Number of asynchronous engines."""

719 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT)

720

721 @property

722 def can_tex2d_gather(self) -> bool:

723 """bool: True if device supports 2D texture gather operations, False if not."""

724 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER))

725

726 @property

727 def maximum_texture2d_gather_width(self) -> int:

728 """int: Maximum 2D texture gather width."""

729 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH)

730

731 @property

732 def maximum_texture2d_gather_height(self) -> int:

733 """int: Maximum 2D texture gather height."""

734 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT)

735

736 @property

737 def stream_priorities_supported(self) -> bool:

738 """bool: True if device supports stream priorities, False if not."""

739 return bool(

740 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED)

741 )

742

743 @property

744 def can_flush_remote_writes(self) -> bool:

745 """bool: The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See Stream Memory Operations for additional details."""

746 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES))

747

748 @property

749 def host_register_supported(self) -> bool:

750 """bool: Device supports host memory registration via cudaHostRegister."""

751 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED))

752

753 @property

754 def timeline_semaphore_interop_supported(self) -> bool:

755 """bool: External timeline semaphore interop is supported on the device."""

756 return bool(

757 self._get_cached_attribute(

758 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED

759 )

760 )

761

762 @property

763 def cluster_launch(self) -> bool:

764 """bool: Indicates device supports cluster launch."""

765 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH))

766

767 @property

768 def can_use_64_bit_stream_mem_ops(self) -> bool:

769 """bool: 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs."""

770 return bool(

771 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS)

772 )

773

774 @property

775 def can_use_stream_wait_value_nor(self) -> bool:

776 """bool: CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs."""

777 return bool(

778 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR)

779 )

780

781 @property

782 def dma_buf_supported(self) -> bool:

783 """bool: Device supports buffer sharing with dma_buf mechanism."""

784 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED))

785

786 @property

787 def ipc_event_supported(self) -> bool:

788 """bool: Device supports IPC Events."""

789 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED))

790

791 @property

792 def mem_sync_domain_count(self) -> int:

793 """int: Number of memory domains the device supports."""

794 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT)

795

796 @property

797 def tensor_map_access_supported(self) -> bool:

798 """bool: Device supports accessing memory using Tensor Map."""

799 return bool(

800 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED)

801 )

802

803 @property

804 def handle_type_fabric_supported(self) -> bool:

805 """bool: Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or requested with cuMemCreate()."""

806 return bool(

807 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED)

808 )

809

810 @property

811 def unified_function_pointers(self) -> bool:

812 """bool: Device supports unified function pointers."""

813 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS))

814

815 @property

816 def mps_enabled(self) -> bool:

817 """bool: Indicates if contexts created on this device will be shared via MPS."""

818 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MPS_ENABLED))

819

820 @property

821 def host_numa_id(self) -> int:

822 """int: NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA."""

823 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID)

824

825 @property

826 def d3d12_cig_supported(self) -> bool:

827 """bool: Device supports CIG with D3D12."""

828 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED))

829

830 @property

831 def mem_decompress_algorithm_mask(self) -> int:

832 """int: The returned valued shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum."""

833 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK)

834

835 @property

836 def mem_decompress_maximum_length(self) -> int:

837 """int: The returned valued is the maximum length in bytes of a single decompress operation that is allowed."""

838 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH)

839

840 @property

841 def vulkan_cig_supported(self) -> bool:

842 """bool: Device supports CIG with Vulkan."""

843 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VULKAN_CIG_SUPPORTED))

844

845 @property

846 def gpu_pci_device_id(self) -> int:

847 """int: The combined 16-bit PCI device ID and 16-bit PCI vendor ID."""

848 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID)

849

850 @property

851 def gpu_pci_subsystem_id(self) -> int:

852 """int: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID."""

853 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID)

854

855 @property

856 def host_numa_virtual_memory_management_supported(self) -> bool:

857 """bool: Device supports HOST_NUMA location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs."""

858 return bool(

859 self._get_cached_attribute(

860 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED

861 )

862 )

863

864 @property

865 def host_numa_memory_pools_supported(self) -> bool:

866 """bool: Device supports HOST_NUMA location with the cuMemAllocAsync and cuMemPool family of APIs."""

867 return bool(

868 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MEMORY_POOLS_SUPPORTED)

869 )

870

871 @property

872 def host_numa_multinode_ipc_supported(self) -> bool:

873 """bool: Device supports HOST_NUMA location IPC between nodes in a multi-node system."""

874 return bool(

875 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MULTINODE_IPC_SUPPORTED)

876 )

877

878 @property

879 def host_memory_pools_supported(self) -> bool:

880 """bool: Device suports HOST location with the cuMemAllocAsync and cuMemPool family of APIs."""

881 return bool(

882 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_MEMORY_POOLS_SUPPORTED)

883 )

884

885 @property

886 def host_virtual_memory_management_supported(self) -> bool:

887 """bool: Device supports HOST location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs."""

888 return bool(

889 self._get_cached_attribute(

890 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED

891 )

892 )

893

894 @property

895 def host_alloc_dma_buf_supported(self) -> bool:

896 """bool: Device supports page-locked host memory buffer sharing with dma_buf mechanism."""

897 return bool(

898 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_ALLOC_DMA_BUF_SUPPORTED)

899 )

900

901 @property

902 def only_partial_host_native_atomic_supported(self) -> bool:

903 """bool: Link between the device and the host supports only some native atomic operations."""

904 return bool(

905 self._get_cached_attribute(

906 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ONLY_PARTIAL_HOST_NATIVE_ATOMIC_SUPPORTED

907 )

908 )

909

910

911cdef cydriver.CUcontext _get_primary_context(int dev_id) except?NULL:

912 try:

913 primary_ctxs = _tls.primary_ctxs

914 except AttributeError:

915 total = len(_tls.devices)

916 primary_ctxs = _tls.primary_ctxs = [0] * total

917 cdef cydriver.CUcontext ctx = <cydriver.CUcontext><uintptr_t>(primary_ctxs[dev_id])

918 if ctx == NULL:

919 with nogil:

920 HANDLE_RETURN(cydriver.cuDevicePrimaryCtxRetain(&ctx, dev_id))

921 primary_ctxs[dev_id] = <uintptr_t>(ctx)

922 return ctx

923

924

925class Device:

926 """Represent a GPU and act as an entry point for cuda.core features.

927

928 This is a singleton object that helps ensure interoperability

929 across multiple libraries imported in the process to both see

930 and use the same GPU device.

931

932 While acting as the entry point, many other CUDA resources can be

933 allocated such as streams and buffers. Any :obj:`~_context.Context` dependent

934 resource created through this device, will continue to refer to

935 this device's context.

936

937 Newly returned :obj:`~_device.Device` objects are thread-local singletons

938 for a specified device.

939

940 Note

941 ----

942 Will not initialize the GPU.

943

944 Parameters

945 ----------

946 device_id : int, optional

947 Device ordinal to return a :obj:`~_device.Device` object for.

948 Default value of `None` return the currently used device.

949

950 """

951 __slots__ = ("_id", "_memory_resource", "_has_inited", "_properties", "_uuid")

952

953 def __new__(cls, device_id: Device | int | None = None):

954 # Handle device_id argument.

955 if isinstance(device_id, Device):

956 return device_id

957 else:

958 device_id = getattr(device_id, 'device_id', device_id)

959

960 # Initialize CUDA.

961 global _is_cuInit

962 if _is_cuInit is False:

963 with _lock, nogil:

964 HANDLE_RETURN(cydriver.cuInit(0))

965 _is_cuInit = True

966

967 # important: creating a Device instance does not initialize the GPU!

968 cdef cydriver.CUdevice dev

969 cdef cydriver.CUcontext ctx

970 if device_id is None:

971 with nogil:

972 err = cydriver.cuCtxGetDevice(&dev)

973 if err == cydriver.CUresult.CUDA_SUCCESS:

974 device_id = int(dev)

975 elif err == cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT:

976 with nogil:

977 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx))

978 assert <void*>(ctx) == NULL

979 device_id = 0 # cudart behavior

980 else:

981 HANDLE_RETURN(err)

982 elif device_id < 0:

983 raise ValueError(f"device_id must be >= 0, got {device_id}")

984

985 # ensure Device is singleton

986 cdef int total

987 try:

988 devices = _tls.devices

989 except AttributeError:

990 with nogil:

991 HANDLE_RETURN(cydriver.cuDeviceGetCount(&total))

992 devices = _tls.devices = []

993 for dev_id in range(total):

994 device = super().__new__(cls)

995 device._id = dev_id

996 device._memory_resource = None

997 device._has_inited = False

998 device._properties = None

999 device._uuid = None

1000 devices.append(device)

1001

1002 try:

1003 return devices[device_id]

1004 except IndexError:

1005 raise ValueError(f"device_id must be within [0, {len(devices)}), got {device_id}") from None

1006

1007 def _check_context_initialized(self):

1008 if not self._has_inited:

1009 raise CUDAError(

1010 f"Device {self._id} is not yet initialized, perhaps you forgot to call .set_current() first?"

1011 )

1012

1013 def _get_current_context(self, bint check_consistency=False) -> driver.CUcontext:

1014 cdef cydriver.CUcontext ctx

1015 cdef cydriver.CUdevice dev

1016 cdef cydriver.CUdevice this_dev = self._id

1017 with nogil:

1018 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx))

1019 if ctx == NULL:

1020 raise CUDAError("No context is bound to the calling CPU thread.")

1021 if check_consistency:

1022 HANDLE_RETURN(cydriver.cuCtxGetDevice(&dev))

1023 if dev != this_dev:

1024 raise CUDAError("Internal error (current device is not equal to Device.device_id)")

1025 return driver.CUcontext(<uintptr_t>ctx)

1026

1027 @property

1028 def device_id(self) -> int:

1029 """Return device ordinal."""

1030 return self._id

1031

1032 @property

1033 def pci_bus_id(self) -> str:

1034 """Return a PCI Bus Id string for this device."""

1035 bus_id = handle_return(runtime.cudaDeviceGetPCIBusId(13, self._id))

1036 return bus_id[:12].decode()

1037

1038 def can_access_peer(self, peer: Device | int) -> bool:

1039 """Check if this device can access memory from the specified peer device.

1040

1041 Queries whether peer-to-peer memory access is supported between this

1042 device and the specified peer device.

1043

1044 Parameters

1045 ----------

1046 peer : Device | int

1047 The peer device to check accessibility to. Can be a Device object or device ID.

1048 """

1049 peer = Device(peer)

1050 cdef int d1 = <int> self.device_id

1051 cdef int d2 = <int> peer.device_id

1052 if d1 == d2:

1053 return True

1054 cdef int value = 0

1055 with nogil:

1056 HANDLE_RETURN(cydriver.cuDeviceCanAccessPeer(&value, d1, d2))

1057 return bool(value)

1058

1059 @property

1060 def uuid(self) -> str:

1061 """Return a UUID for the device.

1062

1063 Returns 16-octets identifying the device. If the device is in

1064 MIG mode, returns its MIG UUID which uniquely identifies the

1065 subscribed MIG compute instance.

1066

1067 Note

1068 ----

1069 MIG UUID is only returned when device is in MIG mode and the

1070 driver is older than CUDA 11.4.

1071

1072 The UUID is cached after first access to avoid repeated CUDA API calls.

1073

1074 """

1075 cdef cydriver.CUuuid uuid

1076 cdef cydriver.CUdevice dev

1077 cdef bytes uuid_b

1078 cdef str uuid_hex

1079

1080 if self._uuid is None:

1081 dev = self._id

1082 with nogil:

1083 IF CUDA_CORE_BUILD_MAJOR == "12":

1084 HANDLE_RETURN(cydriver.cuDeviceGetUuid_v2(&uuid, dev))

1085 ELSE: # 13.0+

1086 HANDLE_RETURN(cydriver.cuDeviceGetUuid(&uuid, dev))

1087 uuid_b = cpython.PyBytes_FromStringAndSize(uuid.bytes, sizeof(uuid.bytes))

1088 uuid_hex = uuid_b.hex()

1089 # 8-4-4-4-12

1090 self._uuid = f"{uuid_hex[:8]}-{uuid_hex[8:12]}-{uuid_hex[12:16]}-{uuid_hex[16:20]}-{uuid_hex[20:]}"

1091 return self._uuid

1092

1093 @property

1094 def name(self) -> str:

1095 """Return the device name."""

1096 # Use 256 characters to be consistent with CUDA Runtime

1097 cdef int LENGTH = 256

1098 cdef bytes name = bytes(LENGTH)

1099 cdef char* name_ptr = name

1100 cdef cydriver.CUdevice this_dev = self._id

1101 with nogil:

1102 HANDLE_RETURN(cydriver.cuDeviceGetName(name_ptr, LENGTH, this_dev))

1103 name = name.split(b"\0")[0]

1104 return name.decode()

1105

1106 @property

1107 def properties(self) -> DeviceProperties:

1108 """Return a :obj:`~_device.DeviceProperties` class with information about the device."""

1109 if self._properties is None:

1110 self._properties = DeviceProperties._init(self._id)

1111

1112 return self._properties

1113

1114 @property

1115 def compute_capability(self) -> ComputeCapability:

1116 """Return a named tuple with 2 fields: major and minor."""

1117 cdef DeviceProperties prop = self.properties

1118 if "compute_capability" in prop._cache:

1119 return prop._cache["compute_capability"]

1120 cc = ComputeCapability(prop.compute_capability_major, prop.compute_capability_minor)

1121 prop._cache["compute_capability"] = cc

1122 return cc

1123

1124 @property

1125 def arch(self) -> str:

1126 """Return compute capability as a string (e.g., '75' for CC 7.5)."""

1127 return f"{self.compute_capability.major}{self.compute_capability.minor}"

1128

1129 @property

1130 def context(self) -> Context:

1131 """Return the current :obj:`~_context.Context` associated with this device.

1132

1133 Note

1134 ----

1135 Device must be initialized.

1136

1137 """

1138 self._check_context_initialized()

1139 ctx = self._get_current_context(check_consistency=True)

1140 return Context._from_ctx(ctx, self._id)

1141

1142 @property

1143 def memory_resource(self) -> MemoryResource:

1144 """Return :obj:`~_memory.MemoryResource` associated with this device."""

1145 cdef int attr, device_id

1146 if self._memory_resource is None:

1147 # If the device is in TCC mode, or does not support memory pools for some other reason,

1148 # use the SynchronousMemoryResource which does not use memory pools.

1149 device_id = self._id

1150 with nogil:

1151 HANDLE_RETURN(

1152 cydriver.cuDeviceGetAttribute(

1153 &attr, cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device_id

1154 )

1155 )

1156 if attr == 1:

1157 from cuda.core.experimental._memory import DeviceMemoryResource

1158 self._memory_resource = DeviceMemoryResource(self._id)

1159 else:

1160 from cuda.core.experimental._memory import _SynchronousMemoryResource

1161 self._memory_resource = _SynchronousMemoryResource(self._id)

1162

1163 return self._memory_resource

1164

1165 @memory_resource.setter

1166 def memory_resource(self, mr):

1167 from cuda.core.experimental._memory import MemoryResource

1168 assert_type(mr, MemoryResource)

1169 self._memory_resource = mr

1170

1171 @property

1172 def default_stream(self) -> Stream:

1173 """Return default CUDA :obj:`~_stream.Stream` associated with this device.

1174

1175 The type of default stream returned depends on if the environment

1176 variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set.

1177

1178 If set, returns a per-thread default stream. Otherwise returns

1179 the legacy stream.

1180

1181 """

1182 return default_stream()

1183

1184 def __int__(self):

1185 """Return device_id."""

1186 return self._id

1187

1188 def __repr__(self):

1189 return f"<Device {self._id} ({self.name})>"

1190

1191 def __hash__(self) -> int:

1192 return hash(self.uuid)

1193

1194 def __eq__(self, other) -> bool:

1195 if not isinstance(other, Device):

1196 return NotImplemented

1197 return self._id == other._id

1198

1199 def __reduce__(self):

1200 return Device, (self.device_id,)

1201

1202 def set_current(self, ctx: Context = None) -> Union[Context, None]:

1203 """Set device to be used for GPU executions.

1204

1205 Initializes CUDA and sets the calling thread to a valid CUDA

1206 context. By default the primary context is used, but optional `ctx`

1207 parameter can be used to explicitly supply a :obj:`~_context.Context` object.

1208

1209 Providing a `ctx` causes the previous set context to be popped and returned.

1210

1211 Parameters

1212 ----------

1213 ctx : :obj:`~_context.Context`, optional

1214 Optional context to push onto this device's current thread stack.

1215

1216 Returns

1217 -------

1218 Union[:obj:`~_context.Context`, None], optional

1219 Popped context.

1220

1221 Examples

1222 --------

1223 Acts as an entry point of this object. Users always start a code by

1224 calling this method, e.g.

1225

1226 >>> from cuda.core.experimental import Device

1227 >>> dev0 = Device(0)

1228 >>> dev0.set_current()

1229 >>> # ... do work on device 0 ...

1230

1231 """

1232 cdef cydriver.CUcontext prev_ctx

1233 cdef cydriver.CUcontext curr_ctx

1234 if ctx is not None:

1235 # TODO: revisit once Context is cythonized

1236 assert_type(ctx, Context)

1237 if ctx._id != self._id:

1238 raise RuntimeError(

1239 "the provided context was created on the device with"

1240 f" id={ctx._id}, which is different from the target id={self._id}"

1241 )

1242 # prev_ctx is the previous context

1243 curr_ctx = <cydriver.CUcontext>(ctx._handle)

1244 with nogil:

1245 HANDLE_RETURN(cydriver.cuCtxPopCurrent(&prev_ctx))

1246 HANDLE_RETURN(cydriver.cuCtxPushCurrent(curr_ctx))

1247 self._has_inited = True

1248 if prev_ctx != NULL:

1249 return Context._from_ctx(<uintptr_t>(prev_ctx), self._id)

1250 else:

1251 # use primary ctx

1252 curr_ctx = _get_primary_context(self._id)

1253 with nogil:

1254 HANDLE_RETURN(cydriver.cuCtxSetCurrent(curr_ctx))

1255 self._has_inited = True

1256

1257 def create_context(self, options: ContextOptions = None) -> Context:

1258 """Create a new :obj:`~_context.Context` object.

1259

1260 Note

1261 ----

1262 The newly context will not be set as current.

1263

1264 Parameters

1265 ----------

1266 options : :obj:`~_context.ContextOptions`, optional

1267 Customizable dataclass for context creation options.

1268

1269 Returns

1270 -------

1271 :obj:`~_context.Context`

1272 Newly created context object.

1273

1274 """

1275 raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")

1276

1277 def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:

1278 """Create a Stream object.

1279

1280 New stream objects can be created in two different ways:

1281

1282 1) Create a new CUDA stream with customizable ``options``.

1283 2) Wrap an existing foreign `obj` supporting the ``__cuda_stream__`` protocol.

1284

1285 Option (2) internally holds a reference to the foreign object

1286 such that the lifetime is managed.

1287

1288 Note

1289 ----

1290 Device must be initialized.

1291

1292 Parameters

1293 ----------

1294 obj : :obj:`~_stream.IsStreamT`, optional

1295 Any object supporting the ``__cuda_stream__`` protocol.

1296 options : :obj:`~_stream.StreamOptions`, optional

1297 Customizable dataclass for stream creation options.

1298

1299 Returns

1300 -------

1301 :obj:`~_stream.Stream`

1302 Newly created stream object.

1303

1304 """

1305 self._check_context_initialized()

1306 return Stream._init(obj=obj, options=options, device_id=self._id)

1307

1308 def create_event(self, options: EventOptions | None = None) -> Event:

1309 """Create an Event object without recording it to a Stream.

1310

1311 Note

1312 ----

1313 Device must be initialized.

1314

1315 Parameters

1316 ----------

1317 options : :obj:`EventOptions`, optional

1318 Customizable dataclass for event creation options.

1319

1320 Returns

1321 -------

1322 :obj:`~_event.Event`

1323 Newly created event object.

1324

1325 """

1326 self._check_context_initialized()

1327 ctx = self._get_current_context()

1328 return Event._init(self._id, ctx, options, True)

1329

1330 def allocate(self, size, stream: Stream | GraphBuilder | None = None) -> Buffer:

1331 """Allocate device memory from a specified stream.

1332

1333 Allocates device memory of `size` bytes on the specified `stream`

1334 using the memory resource currently associated with this Device.

1335

1336 Parameter `stream` is optional, using a default stream by default.

1337

1338 Note

1339 ----

1340 Device must be initialized.

1341

1342 Parameters

1343 ----------

1344 size : int

1345 Number of bytes to allocate.

1346 stream : :obj:`~_stream.Stream`, optional

1347 The stream establishing the stream ordering semantic.

1348 Default value of `None` uses default stream.

1349

1350 Returns

1351 -------

1352 :obj:`~_memory.Buffer`

1353 Newly created buffer object.

1354

1355 """

1356 self._check_context_initialized()

1357 return self.memory_resource.allocate(size, stream)

1358

1359 def sync(self):

1360 """Synchronize the device.

1361

1362 Note

1363 ----

1364 Device must be initialized.

1365

1366 """

1367 self._check_context_initialized()

1368 handle_return(runtime.cudaDeviceSynchronize())

1369

1370 def create_graph_builder(self) -> GraphBuilder:

1371 """Create a new :obj:`~_graph.GraphBuilder` object.

1372

1373 Returns

1374 -------

1375 :obj:`~_graph.GraphBuilder`

1376 Newly created graph builder object.

1377

1378 """

1379 self._check_context_initialized()

1380 return GraphBuilder._init(stream=self.create_stream(), is_stream_owner=True)

Coverage for cuda / core / experimental / _device.pyx: 92%

413 statements