Coverage for cuda / core / experimental / _device.pyx: 92%

413 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-10 01:19 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5cimport cpython 

6from libc.stdint cimport uintptr_t 

7  

8from cuda.bindings cimport cydriver 

9from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN 

10  

11import threading 

12from typing import Optional, TYPE_CHECKING, Union 

13  

14from cuda.core.experimental._context import Context, ContextOptions 

15from cuda.core.experimental._event import Event, EventOptions 

16from cuda.core.experimental._graph import GraphBuilder 

17from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions 

18from cuda.core.experimental._utils.clear_error_support import assert_type 

19from cuda.core.experimental._utils.cuda_utils import ( 

20 ComputeCapability, 

21 CUDAError, 

22 driver, 

23 handle_return, 

24 runtime, 

25) 

26from cuda.core.experimental._stream cimport default_stream 

27  

28if TYPE_CHECKING: 

29 from cuda.core.experimental._memory import Buffer, MemoryResource 

30  

31# TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python, 

32# but it seems it is very convenient to expose them for testing purposes... 

33_tls = threading.local() 

34_lock = threading.Lock() 

35cdef bint _is_cuInit = False 

36  

37  

38cdef class DeviceProperties: 

39 """ 

40 A class to query various attributes of a CUDA device. 

41  

42 Attributes are read-only and provide information about the device. 

43 """ 

44 cdef: 

45 int _handle 

46 dict _cache 

47  

48 def __init__(self, *args, **kwargs): 

49 raise RuntimeError("DeviceProperties cannot be instantiated directly. Please use Device APIs.") 

50  

51 @classmethod 

52 def _init(cls, handle): 

53 cdef DeviceProperties self = DeviceProperties.__new__(cls) 

54 self._handle = handle 

55 self._cache = {} 

56 return self 

57  

58 cdef inline _get_attribute(self, cydriver.CUdevice_attribute attr): 

59 """Retrieve the attribute value directly from the driver.""" 

60 cdef int val 

61 with nogil: 

62 HANDLE_RETURN(cydriver.cuDeviceGetAttribute(&val, attr, self._handle)) 

63 return val 

64  

65 cdef _get_cached_attribute(self, attr): 

66 """Retrieve the attribute value, using cache if applicable.""" 

67 if attr not in self._cache: 

68 self._cache[attr] = self._get_attribute(attr) 

69 return self._cache[attr] 

70  

71 @property 

72 def max_threads_per_block(self) -> int: 

73 """int: Maximum number of threads per block.""" 

74 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK) 

75  

76 @property 

77 def max_block_dim_x(self) -> int: 

78 """int: Maximum block dimension X.""" 

79 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X) 

80  

81 @property 

82 def max_block_dim_y(self) -> int: 

83 """int: Maximum block dimension Y.""" 

84 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y) 

85  

86 @property 

87 def max_block_dim_z(self) -> int: 

88 """int: Maximum block dimension Z.""" 

89 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z) 

90  

91 @property 

92 def max_grid_dim_x(self) -> int: 

93 """int: Maximum grid dimension X.""" 

94 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X) 

95  

96 @property 

97 def max_grid_dim_y(self) -> int: 

98 """int: Maximum grid dimension Y.""" 

99 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y) 

100  

101 @property 

102 def max_grid_dim_z(self) -> int: 

103 """int: Maximum grid dimension Z.""" 

104 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z) 

105  

106 @property 

107 def max_shared_memory_per_block(self) -> int: 

108 """int: Maximum shared memory available per block in bytes.""" 

109 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK) 

110  

111 @property 

112 def total_constant_memory(self) -> int: 

113 """int: Memory available on device for constant variables in a CUDA C kernel in bytes.""" 

114 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY) 

115  

116 @property 

117 def warp_size(self) -> int: 

118 """int: Warp size in threads.""" 

119 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE) 

120  

121 @property 

122 def max_pitch(self) -> int: 

123 """int: Maximum pitch in bytes allowed by memory copies.""" 

124 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH) 

125  

126 @property 

127 def maximum_texture1d_width(self) -> int: 

128 """int: Maximum 1D texture width.""" 

129 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH) 

130  

131 @property 

132 def maximum_texture1d_linear_width(self) -> int: 

133 """int: Maximum width for a 1D texture bound to linear memory.""" 

134 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH) 

135  

136 @property 

137 def maximum_texture1d_mipmapped_width(self) -> int: 

138 """int: Maximum mipmapped 1D texture width.""" 

139 return self._get_cached_attribute( 

140 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH 

141 ) 

142  

143 @property 

144 def maximum_texture2d_width(self) -> int: 

145 """int: Maximum 2D texture width.""" 

146 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH) 

147  

148 @property 

149 def maximum_texture2d_height(self) -> int: 

150 """int: Maximum 2D texture height.""" 

151 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT) 

152  

153 @property 

154 def maximum_texture2d_linear_width(self) -> int: 

155 """int: Maximum width for a 2D texture bound to linear memory.""" 

156 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH) 

157  

158 @property 

159 def maximum_texture2d_linear_height(self) -> int: 

160 """int: Maximum height for a 2D texture bound to linear memory.""" 

161 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT) 

162  

163 @property 

164 def maximum_texture2d_linear_pitch(self) -> int: 

165 """int: Maximum pitch in bytes for a 2D texture bound to linear memory.""" 

166 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH) 

167  

168 @property 

169 def maximum_texture2d_mipmapped_width(self) -> int: 

170 """int: Maximum mipmapped 2D texture width.""" 

171 return self._get_cached_attribute( 

172 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH 

173 ) 

174  

175 @property 

176 def maximum_texture2d_mipmapped_height(self) -> int: 

177 """int: Maximum mipmapped 2D texture height.""" 

178 return self._get_cached_attribute( 

179 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT 

180 ) 

181  

182 @property 

183 def maximum_texture3d_width(self) -> int: 

184 """int: Maximum 3D texture width.""" 

185 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH) 

186  

187 @property 

188 def maximum_texture3d_height(self) -> int: 

189 """int: Maximum 3D texture height.""" 

190 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT) 

191  

192 @property 

193 def maximum_texture3d_depth(self) -> int: 

194 """int: Maximum 3D texture depth.""" 

195 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH) 

196  

197 @property 

198 def maximum_texture3d_width_alternate(self) -> int: 

199 """int: Alternate maximum 3D texture width, 0 if no alternate maximum 3D texture size is supported.""" 

200 return self._get_cached_attribute( 

201 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE 

202 ) 

203  

204 @property 

205 def maximum_texture3d_height_alternate(self) -> int: 

206 """int: Alternate maximum 3D texture height, 0 if no alternate maximum 3D texture size is supported.""" 

207 return self._get_cached_attribute( 

208 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE 

209 ) 

210  

211 @property 

212 def maximum_texture3d_depth_alternate(self) -> int: 

213 """int: Alternate maximum 3D texture depth, 0 if no alternate maximum 3D texture size is supported.""" 

214 return self._get_cached_attribute( 

215 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE 

216 ) 

217  

218 @property 

219 def maximum_texturecubemap_width(self) -> int: 

220 """int: Maximum cubemap texture width or height.""" 

221 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH) 

222  

223 @property 

224 def maximum_texture1d_layered_width(self) -> int: 

225 """int: Maximum 1D layered texture width.""" 

226 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH) 

227  

228 @property 

229 def maximum_texture1d_layered_layers(self) -> int: 

230 """int: Maximum layers in a 1D layered texture.""" 

231 return self._get_cached_attribute( 

232 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS 

233 ) 

234  

235 @property 

236 def maximum_texture2d_layered_width(self) -> int: 

237 """int: Maximum 2D layered texture width.""" 

238 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH) 

239  

240 @property 

241 def maximum_texture2d_layered_height(self) -> int: 

242 """int: Maximum 2D layered texture height.""" 

243 return self._get_cached_attribute( 

244 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT 

245 ) 

246  

247 @property 

248 def maximum_texture2d_layered_layers(self) -> int: 

249 """int: Maximum layers in a 2D layered texture.""" 

250 return self._get_cached_attribute( 

251 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS 

252 ) 

253  

254 @property 

255 def maximum_texturecubemap_layered_width(self) -> int: 

256 """int: Maximum cubemap layered texture width or height.""" 

257 return self._get_cached_attribute( 

258 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH 

259 ) 

260  

261 @property 

262 def maximum_texturecubemap_layered_layers(self) -> int: 

263 """int: Maximum layers in a cubemap layered texture.""" 

264 return self._get_cached_attribute( 

265 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS 

266 ) 

267  

268 @property 

269 def maximum_surface1d_width(self) -> int: 

270 """int: Maximum 1D surface width.""" 

271 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH) 

272  

273 @property 

274 def maximum_surface2d_width(self) -> int: 

275 """int: Maximum 2D surface width.""" 

276 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH) 

277  

278 @property 

279 def maximum_surface2d_height(self) -> int: 

280 """int: Maximum 2D surface height.""" 

281 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT) 

282  

283 @property 

284 def maximum_surface3d_width(self) -> int: 

285 """int: Maximum 3D surface width.""" 

286 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH) 

287  

288 @property 

289 def maximum_surface3d_height(self) -> int: 

290 """int: Maximum 3D surface height.""" 

291 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT) 

292  

293 @property 

294 def maximum_surface3d_depth(self) -> int: 

295 """int: Maximum 3D surface depth.""" 

296 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH) 

297  

298 @property 

299 def maximum_surface1d_layered_width(self) -> int: 

300 """int: Maximum 1D layered surface width.""" 

301 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH) 

302  

303 @property 

304 def maximum_surface1d_layered_layers(self) -> int: 

305 """int: Maximum layers in a 1D layered surface.""" 

306 return self._get_cached_attribute( 

307 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS 

308 ) 

309  

310 @property 

311 def maximum_surface2d_layered_width(self) -> int: 

312 """int: Maximum 2D layered surface width.""" 

313 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH) 

314  

315 @property 

316 def maximum_surface2d_layered_height(self) -> int: 

317 """int: Maximum 2D layered surface height.""" 

318 return self._get_cached_attribute( 

319 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT 

320 ) 

321  

322 @property 

323 def maximum_surface2d_layered_layers(self) -> int: 

324 """int: Maximum layers in a 2D layered surface.""" 

325 return self._get_cached_attribute( 

326 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS 

327 ) 

328  

329 @property 

330 def maximum_surfacecubemap_width(self) -> int: 

331 """int: Maximum cubemap surface width.""" 

332 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH) 

333  

334 @property 

335 def maximum_surfacecubemap_layered_width(self) -> int: 

336 """int: Maximum cubemap layered surface width.""" 

337 return self._get_cached_attribute( 

338 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH 

339 ) 

340  

341 @property 

342 def maximum_surfacecubemap_layered_layers(self) -> int: 

343 """int: Maximum layers in a cubemap layered surface.""" 

344 return self._get_cached_attribute( 

345 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS 

346 ) 

347  

348 @property 

349 def max_registers_per_block(self) -> int: 

350 """int: Maximum number of 32-bit registers available to a thread block.""" 

351 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK) 

352  

353 @property 

354 def clock_rate(self) -> int: 

355 """int: Typical clock frequency in kilohertz.""" 

356 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE) 

357  

358 @property 

359 def texture_alignment(self) -> int: 

360 """int: Alignment requirement for textures.""" 

361 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT) 

362  

363 @property 

364 def texture_pitch_alignment(self) -> int: 

365 """int: Pitch alignment requirement for textures.""" 

366 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT) 

367  

368 @property 

369 def gpu_overlap(self) -> bool: 

370 """bool: Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead async_engine_count.""" 

371 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP)) 

372  

373 @property 

374 def multiprocessor_count(self) -> int: 

375 """int: Number of multiprocessors on device.""" 

376 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT) 

377  

378 @property 

379 def kernel_exec_timeout(self) -> bool: 

380 """bool: Specifies whether there is a run time limit on kernels.""" 

381 return bool(self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT)) 

382  

383 @property 

384 def integrated(self) -> bool: 

385 """bool: Device is integrated with host memory.""" 

386 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED)) 

387  

388 @property 

389 def can_map_host_memory(self) -> bool: 

390 """bool: Device can map host memory into CUDA address space.""" 

391 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY)) 

392  

393 @property 

394 def compute_mode(self) -> int: 

395 """int: Compute mode (See CUcomputemode for details).""" 

396 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE) 

397  

398 @property 

399 def concurrent_kernels(self) -> bool: 

400 """bool: Device can possibly execute multiple kernels concurrently.""" 

401 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS)) 

402  

403 @property 

404 def ecc_enabled(self) -> bool: 

405 """bool: Device has ECC support enabled.""" 

406 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED)) 

407  

408 @property 

409 def pci_bus_id(self) -> int: 

410 """int: PCI bus ID of the device.""" 

411 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID) 

412  

413 @property 

414 def pci_device_id(self) -> int: 

415 """int: PCI device ID of the device.""" 

416 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID) 

417  

418 @property 

419 def pci_domain_id(self) -> int: 

420 """int: PCI domain ID of the device.""" 

421 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID) 

422  

423 @property 

424 def tcc_driver(self) -> bool: 

425 """bool: Device is using TCC driver model.""" 

426 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER)) 

427  

428 @property 

429 def memory_clock_rate(self) -> int: 

430 """int: Peak memory clock frequency in kilohertz.""" 

431 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE) 

432  

433 @property 

434 def global_memory_bus_width(self) -> int: 

435 """int: Global memory bus width in bits.""" 

436 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH) 

437  

438 @property 

439 def l2_cache_size(self) -> int: 

440 """int: Size of L2 cache in bytes.""" 

441 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE) 

442  

443 @property 

444 def max_threads_per_multiprocessor(self) -> int: 

445 """int: Maximum resident threads per multiprocessor.""" 

446 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR) 

447  

448 @property 

449 def unified_addressing(self) -> bool: 

450 """bool: Device shares a unified address space with the host.""" 

451 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING)) 

452  

453 @property 

454 def compute_capability_major(self) -> int: 

455 """int: Major compute capability version number.""" 

456 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) 

457  

458 @property 

459 def compute_capability_minor(self) -> int: 

460 """int: Minor compute capability version number.""" 

461 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR) 

462  

463 @property 

464 def global_l1_cache_supported(self) -> bool: 

465 """bool: Device supports caching globals in L1.""" 

466 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED)) 

467  

468 @property 

469 def local_l1_cache_supported(self) -> bool: 

470 """bool: Device supports caching locals in L1.""" 

471 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED)) 

472  

473 @property 

474 def max_shared_memory_per_multiprocessor(self) -> int: 

475 """int: Maximum shared memory available per multiprocessor in bytes.""" 

476 return self._get_cached_attribute( 

477 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR 

478 ) 

479  

480 @property 

481 def max_registers_per_multiprocessor(self) -> int: 

482 """int: Maximum number of 32-bit registers available per multiprocessor.""" 

483 return self._get_cached_attribute( 

484 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 

485 ) 

486  

487 @property 

488 def managed_memory(self) -> bool: 

489 """bool: Device can allocate managed memory on this system.""" 

490 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY)) 

491  

492 @property 

493 def multi_gpu_board(self) -> bool: 

494 """bool: Device is on a multi-GPU board.""" 

495 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD)) 

496  

497 @property 

498 def multi_gpu_board_group_id(self) -> int: 

499 """int: Unique id for a group of devices on the same multi-GPU board.""" 

500 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID) 

501  

502 @property 

503 def host_native_atomic_supported(self) -> bool: 

504 """bool: Link between the device and the host supports all native atomic operations.""" 

505 return bool( 

506 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED) 

507 ) 

508  

509 @property 

510 def single_to_double_precision_perf_ratio(self) -> int: 

511 """int: Ratio of single precision performance (in floating-point operations per second) to double precision performance.""" 

512 return self._get_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO) 

513  

514 @property 

515 def pageable_memory_access(self) -> bool: 

516 """bool: Device supports coherently accessing pageable memory without calling cudaHostRegister on it.""" 

517 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS)) 

518  

519 @property 

520 def concurrent_managed_access(self) -> bool: 

521 """bool: Device can coherently access managed memory concurrently with the CPU.""" 

522 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) 

523  

524 @property 

525 def compute_preemption_supported(self) -> bool: 

526 """bool: Device supports compute preemption.""" 

527 return bool( 

528 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED) 

529 ) 

530  

531 @property 

532 def can_use_host_pointer_for_registered_mem(self) -> bool: 

533 """bool: Device can access host registered memory at the same virtual address as the CPU.""" 

534 return bool( 

535 self._get_cached_attribute( 

536 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM 

537 ) 

538 ) 

539  

540 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675) 

541  

542 @property 

543 def cooperative_launch(self) -> bool: 

544 """bool: Device supports launching cooperative kernels via cuLaunchCooperativeKernel.""" 

545 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH)) 

546  

547 # TODO: A few attrs are missing here (NVIDIA/cuda-python#675) 

548  

549 @property 

550 def max_shared_memory_per_block_optin(self) -> int: 

551 """int: Maximum optin shared memory per block.""" 

552 return self._get_cached_attribute( 

553 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN 

554 ) 

555  

556 @property 

557 def pageable_memory_access_uses_host_page_tables(self) -> bool: 

558 """bool: Device accesses pageable memory via the host's page tables.""" 

559 return bool( 

560 self._get_cached_attribute( 

561 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES 

562 ) 

563 ) 

564  

565 @property 

566 def direct_managed_mem_access_from_host(self) -> bool: 

567 """bool: The host can directly access managed memory on the device without migration.""" 

568 return bool( 

569 self._get_cached_attribute( 

570 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST 

571 ) 

572 ) 

573  

574 @property 

575 def virtual_memory_management_supported(self) -> bool: 

576 """bool: Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs.""" 

577 return bool( 

578 self._get_cached_attribute( 

579 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED 

580 ) 

581 ) 

582  

583 @property 

584 def handle_type_posix_file_descriptor_supported(self) -> bool: 

585 """bool: Device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, if requested via cuMemCreate.""" 

586 return bool( 

587 self._get_cached_attribute( 

588 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED 

589 ) 

590 ) 

591  

592 @property 

593 def handle_type_win32_handle_supported(self) -> bool: 

594 """bool: Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, if requested via cuMemCreate.""" 

595 return bool( 

596 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED) 

597 ) 

598  

599 @property 

600 def handle_type_win32_kmt_handle_supported(self) -> bool: 

601 """bool: Device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, if requested via cuMemCreate.""" 

602 return bool( 

603 self._get_cached_attribute( 

604 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED 

605 ) 

606 ) 

607  

608 @property 

609 def max_blocks_per_multiprocessor(self) -> int: 

610 """int: Maximum number of blocks per multiprocessor.""" 

611 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR) 

612  

613 @property 

614 def generic_compression_supported(self) -> bool: 

615 """bool: Device supports compression of memory.""" 

616 return bool( 

617 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED) 

618 ) 

619  

620 @property 

621 def max_persisting_l2_cache_size(self) -> int: 

622 """int: Maximum L2 persisting lines capacity setting in bytes.""" 

623 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE) 

624  

625 @property 

626 def max_access_policy_window_size(self) -> int: 

627 """int: Maximum value of CUaccessPolicyWindow.num_bytes.""" 

628 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE) 

629  

630 @property 

631 def gpu_direct_rdma_with_cuda_vmm_supported(self) -> bool: 

632 """bool: Device supports specifying the GPUDirect RDMA flag with cuMemCreate.""" 

633 return bool( 

634 self._get_cached_attribute( 

635 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED 

636 ) 

637 ) 

638  

639 @property 

640 def reserved_shared_memory_per_block(self) -> int: 

641 """int: Shared memory reserved by CUDA driver per block in bytes.""" 

642 return self._get_cached_attribute( 

643 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK 

644 ) 

645  

646 @property 

647 def sparse_cuda_array_supported(self) -> bool: 

648 """bool: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.""" 

649 return bool( 

650 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED) 

651 ) 

652  

653 @property 

654 def read_only_host_register_supported(self) -> bool: 

655 """bool: True if device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU, False if not.""" 

656 return bool( 

657 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED) 

658 ) 

659  

660 @property 

661 def memory_pools_supported(self) -> bool: 

662 """bool: Device supports using the cuMemAllocAsync and cuMemPool family of APIs.""" 

663 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED)) 

664  

665 @property 

666 def gpu_direct_rdma_supported(self) -> bool: 

667 """bool: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information).""" 

668 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED)) 

669  

670 @property 

671 def gpu_direct_rdma_flush_writes_options(self) -> int: 

672 """int: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum.""" 

673 return self._get_cached_attribute( 

674 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS 

675 ) 

676  

677 @property 

678 def gpu_direct_rdma_writes_ordering(self) -> int: 

679 """int: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical values returned here.""" 

680 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING) 

681  

682 @property 

683 def mempool_supported_handle_types(self) -> int: 

684 """int: Handle types supported with mempool based IPC.""" 

685 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES) 

686  

687 @property 

688 def deferred_mapping_cuda_array_supported(self) -> bool: 

689 """bool: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.""" 

690 return bool( 

691 self._get_cached_attribute( 

692 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED 

693 ) 

694 ) 

695  

696 @property 

697 def numa_config(self) -> int: 

698 """int: NUMA configuration of a device: value is of type CUdeviceNumaConfig enum.""" 

699 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG) 

700  

701 @property 

702 def numa_id(self) -> int: 

703 """int: NUMA node ID of the GPU memory.""" 

704 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID) 

705  

706 @property 

707 def multicast_supported(self) -> bool: 

708 """bool: Device supports switch multicast and reduction operations.""" 

709 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED)) 

710  

711 @property 

712 def surface_alignment(self) -> int: 

713 """int: Surface alignment requirement in bytes.""" 

714 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT) 

715  

716 @property 

717 def async_engine_count(self) -> int: 

718 """int: Number of asynchronous engines.""" 

719 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT) 

720  

721 @property 

722 def can_tex2d_gather(self) -> bool: 

723 """bool: True if device supports 2D texture gather operations, False if not.""" 

724 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER)) 

725  

726 @property 

727 def maximum_texture2d_gather_width(self) -> int: 

728 """int: Maximum 2D texture gather width.""" 

729 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH) 

730  

731 @property 

732 def maximum_texture2d_gather_height(self) -> int: 

733 """int: Maximum 2D texture gather height.""" 

734 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT) 

735  

736 @property 

737 def stream_priorities_supported(self) -> bool: 

738 """bool: True if device supports stream priorities, False if not.""" 

739 return bool( 

740 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED) 

741 ) 

742  

743 @property 

744 def can_flush_remote_writes(self) -> bool: 

745 """bool: The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See Stream Memory Operations for additional details.""" 

746 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES)) 

747  

748 @property 

749 def host_register_supported(self) -> bool: 

750 """bool: Device supports host memory registration via cudaHostRegister.""" 

751 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED)) 

752  

753 @property 

754 def timeline_semaphore_interop_supported(self) -> bool: 

755 """bool: External timeline semaphore interop is supported on the device.""" 

756 return bool( 

757 self._get_cached_attribute( 

758 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED 

759 ) 

760 ) 

761  

762 @property 

763 def cluster_launch(self) -> bool: 

764 """bool: Indicates device supports cluster launch.""" 

765 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH)) 

766  

767 @property 

768 def can_use_64_bit_stream_mem_ops(self) -> bool: 

769 """bool: 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs.""" 

770 return bool( 

771 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS) 

772 ) 

773  

774 @property 

775 def can_use_stream_wait_value_nor(self) -> bool: 

776 """bool: CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs.""" 

777 return bool( 

778 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR) 

779 ) 

780  

781 @property 

782 def dma_buf_supported(self) -> bool: 

783 """bool: Device supports buffer sharing with dma_buf mechanism.""" 

784 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED)) 

785  

786 @property 

787 def ipc_event_supported(self) -> bool: 

788 """bool: Device supports IPC Events.""" 

789 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED)) 

790  

791 @property 

792 def mem_sync_domain_count(self) -> int: 

793 """int: Number of memory domains the device supports.""" 

794 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT) 

795  

796 @property 

797 def tensor_map_access_supported(self) -> bool: 

798 """bool: Device supports accessing memory using Tensor Map.""" 

799 return bool( 

800 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED) 

801 ) 

802  

803 @property 

804 def handle_type_fabric_supported(self) -> bool: 

805 """bool: Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or requested with cuMemCreate().""" 

806 return bool( 

807 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED) 

808 ) 

809  

810 @property 

811 def unified_function_pointers(self) -> bool: 

812 """bool: Device supports unified function pointers.""" 

813 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS)) 

814  

815 @property 

816 def mps_enabled(self) -> bool: 

817 """bool: Indicates if contexts created on this device will be shared via MPS.""" 

818 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MPS_ENABLED)) 

819  

820 @property 

821 def host_numa_id(self) -> int: 

822 """int: NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA.""" 

823 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID) 

824  

825 @property 

826 def d3d12_cig_supported(self) -> bool: 

827 """bool: Device supports CIG with D3D12.""" 

828 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED)) 

829  

830 @property 

831 def mem_decompress_algorithm_mask(self) -> int: 

832 """int: The returned valued shall be interpreted as a bitmask, where the individual bits are described by the CUmemDecompressAlgorithm enum.""" 

833 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK) 

834  

835 @property 

836 def mem_decompress_maximum_length(self) -> int: 

837 """int: The returned valued is the maximum length in bytes of a single decompress operation that is allowed.""" 

838 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH) 

839  

840 @property 

841 def vulkan_cig_supported(self) -> bool: 

842 """bool: Device supports CIG with Vulkan.""" 

843 return bool(self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VULKAN_CIG_SUPPORTED)) 

844  

845 @property 

846 def gpu_pci_device_id(self) -> int: 

847 """int: The combined 16-bit PCI device ID and 16-bit PCI vendor ID.""" 

848 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID) 

849  

850 @property 

851 def gpu_pci_subsystem_id(self) -> int: 

852 """int: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID.""" 

853 return self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID) 

854  

855 @property 

856 def host_numa_virtual_memory_management_supported(self) -> bool: 

857 """bool: Device supports HOST_NUMA location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs.""" 

858 return bool( 

859 self._get_cached_attribute( 

860 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED 

861 ) 

862 ) 

863  

864 @property 

865 def host_numa_memory_pools_supported(self) -> bool: 

866 """bool: Device supports HOST_NUMA location with the cuMemAllocAsync and cuMemPool family of APIs.""" 

867 return bool( 

868 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MEMORY_POOLS_SUPPORTED) 

869 ) 

870  

871 @property 

872 def host_numa_multinode_ipc_supported(self) -> bool: 

873 """bool: Device supports HOST_NUMA location IPC between nodes in a multi-node system.""" 

874 return bool( 

875 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_MULTINODE_IPC_SUPPORTED) 

876 ) 

877  

878 @property 

879 def host_memory_pools_supported(self) -> bool: 

880 """bool: Device suports HOST location with the cuMemAllocAsync and cuMemPool family of APIs.""" 

881 return bool( 

882 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_MEMORY_POOLS_SUPPORTED) 

883 ) 

884  

885 @property 

886 def host_virtual_memory_management_supported(self) -> bool: 

887 """bool: Device supports HOST location with the virtual memory management APIs like cuMemCreate, cuMemMap and related APIs.""" 

888 return bool( 

889 self._get_cached_attribute( 

890 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED 

891 ) 

892 ) 

893  

894 @property 

895 def host_alloc_dma_buf_supported(self) -> bool: 

896 """bool: Device supports page-locked host memory buffer sharing with dma_buf mechanism.""" 

897 return bool( 

898 self._get_cached_attribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_ALLOC_DMA_BUF_SUPPORTED) 

899 ) 

900  

901 @property 

902 def only_partial_host_native_atomic_supported(self) -> bool: 

903 """bool: Link between the device and the host supports only some native atomic operations.""" 

904 return bool( 

905 self._get_cached_attribute( 

906 driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ONLY_PARTIAL_HOST_NATIVE_ATOMIC_SUPPORTED 

907 ) 

908 ) 

909  

910  

911cdef cydriver.CUcontext _get_primary_context(int dev_id) except?NULL: 

912 try: 

913 primary_ctxs = _tls.primary_ctxs 

914 except AttributeError: 

915 total = len(_tls.devices) 

916 primary_ctxs = _tls.primary_ctxs = [0] * total 

917 cdef cydriver.CUcontext ctx = <cydriver.CUcontext><uintptr_t>(primary_ctxs[dev_id]) 

918 if ctx == NULL: 

919 with nogil: 

920 HANDLE_RETURN(cydriver.cuDevicePrimaryCtxRetain(&ctx, dev_id)) 

921 primary_ctxs[dev_id] = <uintptr_t>(ctx) 

922 return ctx 

923  

924  

925class Device: 

926 """Represent a GPU and act as an entry point for cuda.core features. 

927  

928 This is a singleton object that helps ensure interoperability 

929 across multiple libraries imported in the process to both see 

930 and use the same GPU device. 

931  

932 While acting as the entry point, many other CUDA resources can be 

933 allocated such as streams and buffers. Any :obj:`~_context.Context` dependent 

934 resource created through this device, will continue to refer to 

935 this device's context. 

936  

937 Newly returned :obj:`~_device.Device` objects are thread-local singletons 

938 for a specified device. 

939  

940 Note 

941 ---- 

942 Will not initialize the GPU. 

943  

944 Parameters 

945 ---------- 

946 device_id : int, optional 

947 Device ordinal to return a :obj:`~_device.Device` object for. 

948 Default value of `None` return the currently used device. 

949  

950 """ 

951 __slots__ = ("_id", "_memory_resource", "_has_inited", "_properties", "_uuid") 

952  

953 def __new__(cls, device_id: Device | int | None = None): 

954 # Handle device_id argument. 

955 if isinstance(device_id, Device): 

956 return device_id 

957 else: 

958 device_id = getattr(device_id, 'device_id', device_id) 

959  

960 # Initialize CUDA. 

961 global _is_cuInit 

962 if _is_cuInit is False: 

963 with _lock, nogil: 

964 HANDLE_RETURN(cydriver.cuInit(0)) 

965 _is_cuInit = True 

966  

967 # important: creating a Device instance does not initialize the GPU! 

968 cdef cydriver.CUdevice dev 

969 cdef cydriver.CUcontext ctx 

970 if device_id is None: 

971 with nogil: 

972 err = cydriver.cuCtxGetDevice(&dev) 

973 if err == cydriver.CUresult.CUDA_SUCCESS: 

974 device_id = int(dev) 

975 elif err == cydriver.CUresult.CUDA_ERROR_INVALID_CONTEXT: 

976 with nogil: 

977 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx)) 

978 assert <void*>(ctx) == NULL 

979 device_id = 0 # cudart behavior 

980 else: 

981 HANDLE_RETURN(err) 

982 elif device_id < 0: 

983 raise ValueError(f"device_id must be >= 0, got {device_id}") 

984  

985 # ensure Device is singleton 

986 cdef int total 

987 try: 

988 devices = _tls.devices 

989 except AttributeError: 

990 with nogil: 

991 HANDLE_RETURN(cydriver.cuDeviceGetCount(&total)) 

992 devices = _tls.devices = [] 

993 for dev_id in range(total): 

994 device = super().__new__(cls) 

995 device._id = dev_id 

996 device._memory_resource = None 

997 device._has_inited = False 

998 device._properties = None 

999 device._uuid = None 

1000 devices.append(device) 

1001  

1002 try: 

1003 return devices[device_id] 

1004 except IndexError: 

1005 raise ValueError(f"device_id must be within [0, {len(devices)}), got {device_id}") from None 

1006  

1007 def _check_context_initialized(self): 

1008 if not self._has_inited: 

1009 raise CUDAError( 

1010 f"Device {self._id} is not yet initialized, perhaps you forgot to call .set_current() first?" 

1011 ) 

1012  

1013 def _get_current_context(self, bint check_consistency=False) -> driver.CUcontext: 

1014 cdef cydriver.CUcontext ctx 

1015 cdef cydriver.CUdevice dev 

1016 cdef cydriver.CUdevice this_dev = self._id 

1017 with nogil: 

1018 HANDLE_RETURN(cydriver.cuCtxGetCurrent(&ctx)) 

1019 if ctx == NULL: 

1020 raise CUDAError("No context is bound to the calling CPU thread.") 

1021 if check_consistency: 

1022 HANDLE_RETURN(cydriver.cuCtxGetDevice(&dev)) 

1023 if dev != this_dev: 

1024 raise CUDAError("Internal error (current device is not equal to Device.device_id)") 

1025 return driver.CUcontext(<uintptr_t>ctx) 

1026  

1027 @property 

1028 def device_id(self) -> int: 

1029 """Return device ordinal.""" 

1030 return self._id 

1031  

1032 @property 

1033 def pci_bus_id(self) -> str: 

1034 """Return a PCI Bus Id string for this device.""" 

1035 bus_id = handle_return(runtime.cudaDeviceGetPCIBusId(13, self._id)) 

1036 return bus_id[:12].decode() 

1037  

1038 def can_access_peer(self, peer: Device | int) -> bool: 

1039 """Check if this device can access memory from the specified peer device. 

1040  

1041 Queries whether peer-to-peer memory access is supported between this 

1042 device and the specified peer device. 

1043  

1044 Parameters 

1045 ---------- 

1046 peer : Device | int 

1047 The peer device to check accessibility to. Can be a Device object or device ID. 

1048 """ 

1049 peer = Device(peer) 

1050 cdef int d1 = <int> self.device_id 

1051 cdef int d2 = <int> peer.device_id 

1052 if d1 == d2: 

1053 return True 

1054 cdef int value = 0 

1055 with nogil: 

1056 HANDLE_RETURN(cydriver.cuDeviceCanAccessPeer(&value, d1, d2)) 

1057 return bool(value) 

1058  

1059 @property 

1060 def uuid(self) -> str: 

1061 """Return a UUID for the device. 

1062  

1063 Returns 16-octets identifying the device. If the device is in 

1064 MIG mode, returns its MIG UUID which uniquely identifies the 

1065 subscribed MIG compute instance. 

1066  

1067 Note 

1068 ---- 

1069 MIG UUID is only returned when device is in MIG mode and the 

1070 driver is older than CUDA 11.4. 

1071  

1072 The UUID is cached after first access to avoid repeated CUDA API calls. 

1073  

1074 """ 

1075 cdef cydriver.CUuuid uuid 

1076 cdef cydriver.CUdevice dev 

1077 cdef bytes uuid_b 

1078 cdef str uuid_hex 

1079  

1080 if self._uuid is None: 

1081 dev = self._id 

1082 with nogil: 

1083 IF CUDA_CORE_BUILD_MAJOR == "12": 

1084 HANDLE_RETURN(cydriver.cuDeviceGetUuid_v2(&uuid, dev)) 

1085 ELSE: # 13.0+ 

1086 HANDLE_RETURN(cydriver.cuDeviceGetUuid(&uuid, dev)) 

1087 uuid_b = cpython.PyBytes_FromStringAndSize(uuid.bytes, sizeof(uuid.bytes)) 

1088 uuid_hex = uuid_b.hex() 

1089 # 8-4-4-4-12 

1090 self._uuid = f"{uuid_hex[:8]}-{uuid_hex[8:12]}-{uuid_hex[12:16]}-{uuid_hex[16:20]}-{uuid_hex[20:]}" 

1091 return self._uuid 

1092  

1093 @property 

1094 def name(self) -> str: 

1095 """Return the device name.""" 

1096 # Use 256 characters to be consistent with CUDA Runtime 

1097 cdef int LENGTH = 256 

1098 cdef bytes name = bytes(LENGTH) 

1099 cdef char* name_ptr = name 

1100 cdef cydriver.CUdevice this_dev = self._id 

1101 with nogil: 

1102 HANDLE_RETURN(cydriver.cuDeviceGetName(name_ptr, LENGTH, this_dev)) 

1103 name = name.split(b"\0")[0] 

1104 return name.decode() 

1105  

1106 @property 

1107 def properties(self) -> DeviceProperties: 

1108 """Return a :obj:`~_device.DeviceProperties` class with information about the device.""" 

1109 if self._properties is None: 

1110 self._properties = DeviceProperties._init(self._id) 

1111  

1112 return self._properties 

1113  

1114 @property 

1115 def compute_capability(self) -> ComputeCapability: 

1116 """Return a named tuple with 2 fields: major and minor.""" 

1117 cdef DeviceProperties prop = self.properties 

1118 if "compute_capability" in prop._cache: 

1119 return prop._cache["compute_capability"] 

1120 cc = ComputeCapability(prop.compute_capability_major, prop.compute_capability_minor) 

1121 prop._cache["compute_capability"] = cc 

1122 return cc 

1123  

1124 @property 

1125 def arch(self) -> str: 

1126 """Return compute capability as a string (e.g., '75' for CC 7.5).""" 

1127 return f"{self.compute_capability.major}{self.compute_capability.minor}" 

1128  

1129 @property 

1130 def context(self) -> Context: 

1131 """Return the current :obj:`~_context.Context` associated with this device. 

1132  

1133 Note 

1134 ---- 

1135 Device must be initialized. 

1136  

1137 """ 

1138 self._check_context_initialized() 

1139 ctx = self._get_current_context(check_consistency=True) 

1140 return Context._from_ctx(ctx, self._id) 

1141  

1142 @property 

1143 def memory_resource(self) -> MemoryResource: 

1144 """Return :obj:`~_memory.MemoryResource` associated with this device.""" 

1145 cdef int attr, device_id 

1146 if self._memory_resource is None: 

1147 # If the device is in TCC mode, or does not support memory pools for some other reason, 

1148 # use the SynchronousMemoryResource which does not use memory pools. 

1149 device_id = self._id 

1150 with nogil: 

1151 HANDLE_RETURN( 

1152 cydriver.cuDeviceGetAttribute( 

1153 &attr, cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device_id 

1154 ) 

1155 ) 

1156 if attr == 1: 

1157 from cuda.core.experimental._memory import DeviceMemoryResource 

1158 self._memory_resource = DeviceMemoryResource(self._id) 

1159 else: 

1160 from cuda.core.experimental._memory import _SynchronousMemoryResource 

1161 self._memory_resource = _SynchronousMemoryResource(self._id) 

1162  

1163 return self._memory_resource 

1164  

1165 @memory_resource.setter 

1166 def memory_resource(self, mr): 

1167 from cuda.core.experimental._memory import MemoryResource 

1168 assert_type(mr, MemoryResource) 

1169 self._memory_resource = mr 

1170  

1171 @property 

1172 def default_stream(self) -> Stream: 

1173 """Return default CUDA :obj:`~_stream.Stream` associated with this device. 

1174  

1175 The type of default stream returned depends on if the environment 

1176 variable CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM is set. 

1177  

1178 If set, returns a per-thread default stream. Otherwise returns 

1179 the legacy stream. 

1180  

1181 """ 

1182 return default_stream() 

1183  

1184 def __int__(self): 

1185 """Return device_id.""" 

1186 return self._id 

1187  

1188 def __repr__(self): 

1189 return f"<Device {self._id} ({self.name})>" 

1190  

1191 def __hash__(self) -> int: 

1192 return hash(self.uuid) 

1193  

1194 def __eq__(self, other) -> bool: 

1195 if not isinstance(other, Device): 

1196 return NotImplemented 

1197 return self._id == other._id 

1198  

1199 def __reduce__(self): 

1200 return Device, (self.device_id,) 

1201  

1202 def set_current(self, ctx: Context = None) -> Union[Context, None]: 

1203 """Set device to be used for GPU executions. 

1204  

1205 Initializes CUDA and sets the calling thread to a valid CUDA 

1206 context. By default the primary context is used, but optional `ctx` 

1207 parameter can be used to explicitly supply a :obj:`~_context.Context` object. 

1208  

1209 Providing a `ctx` causes the previous set context to be popped and returned. 

1210  

1211 Parameters 

1212 ---------- 

1213 ctx : :obj:`~_context.Context`, optional 

1214 Optional context to push onto this device's current thread stack. 

1215  

1216 Returns 

1217 ------- 

1218 Union[:obj:`~_context.Context`, None], optional 

1219 Popped context. 

1220  

1221 Examples 

1222 -------- 

1223 Acts as an entry point of this object. Users always start a code by 

1224 calling this method, e.g. 

1225  

1226 >>> from cuda.core.experimental import Device 

1227 >>> dev0 = Device(0) 

1228 >>> dev0.set_current() 

1229 >>> # ... do work on device 0 ... 

1230  

1231 """ 

1232 cdef cydriver.CUcontext prev_ctx 

1233 cdef cydriver.CUcontext curr_ctx 

1234 if ctx is not None: 

1235 # TODO: revisit once Context is cythonized 

1236 assert_type(ctx, Context) 

1237 if ctx._id != self._id: 

1238 raise RuntimeError( 

1239 "the provided context was created on the device with" 

1240 f" id={ctx._id}, which is different from the target id={self._id}" 

1241 ) 

1242 # prev_ctx is the previous context 

1243 curr_ctx = <cydriver.CUcontext>(ctx._handle) 

1244 with nogil: 

1245 HANDLE_RETURN(cydriver.cuCtxPopCurrent(&prev_ctx)) 

1246 HANDLE_RETURN(cydriver.cuCtxPushCurrent(curr_ctx)) 

1247 self._has_inited = True 

1248 if prev_ctx != NULL: 

1249 return Context._from_ctx(<uintptr_t>(prev_ctx), self._id) 

1250 else: 

1251 # use primary ctx 

1252 curr_ctx = _get_primary_context(self._id) 

1253 with nogil: 

1254 HANDLE_RETURN(cydriver.cuCtxSetCurrent(curr_ctx)) 

1255 self._has_inited = True 

1256  

1257 def create_context(self, options: ContextOptions = None) -> Context: 

1258 """Create a new :obj:`~_context.Context` object. 

1259  

1260 Note 

1261 ---- 

1262 The newly context will not be set as current. 

1263  

1264 Parameters 

1265 ---------- 

1266 options : :obj:`~_context.ContextOptions`, optional 

1267 Customizable dataclass for context creation options. 

1268  

1269 Returns 

1270 ------- 

1271 :obj:`~_context.Context` 

1272 Newly created context object. 

1273  

1274 """ 

1275 raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189") 

1276  

1277 def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream: 

1278 """Create a Stream object. 

1279  

1280 New stream objects can be created in two different ways: 

1281  

1282 1) Create a new CUDA stream with customizable ``options``. 

1283 2) Wrap an existing foreign `obj` supporting the ``__cuda_stream__`` protocol. 

1284  

1285 Option (2) internally holds a reference to the foreign object 

1286 such that the lifetime is managed. 

1287  

1288 Note 

1289 ---- 

1290 Device must be initialized. 

1291  

1292 Parameters 

1293 ---------- 

1294 obj : :obj:`~_stream.IsStreamT`, optional 

1295 Any object supporting the ``__cuda_stream__`` protocol. 

1296 options : :obj:`~_stream.StreamOptions`, optional 

1297 Customizable dataclass for stream creation options. 

1298  

1299 Returns 

1300 ------- 

1301 :obj:`~_stream.Stream` 

1302 Newly created stream object. 

1303  

1304 """ 

1305 self._check_context_initialized() 

1306 return Stream._init(obj=obj, options=options, device_id=self._id) 

1307  

1308 def create_event(self, options: EventOptions | None = None) -> Event: 

1309 """Create an Event object without recording it to a Stream. 

1310  

1311 Note 

1312 ---- 

1313 Device must be initialized. 

1314  

1315 Parameters 

1316 ---------- 

1317 options : :obj:`EventOptions`, optional 

1318 Customizable dataclass for event creation options. 

1319  

1320 Returns 

1321 ------- 

1322 :obj:`~_event.Event` 

1323 Newly created event object. 

1324  

1325 """ 

1326 self._check_context_initialized() 

1327 ctx = self._get_current_context() 

1328 return Event._init(self._id, ctx, options, True) 

1329  

1330 def allocate(self, size, stream: Stream | GraphBuilder | None = None) -> Buffer: 

1331 """Allocate device memory from a specified stream. 

1332  

1333 Allocates device memory of `size` bytes on the specified `stream` 

1334 using the memory resource currently associated with this Device. 

1335  

1336 Parameter `stream` is optional, using a default stream by default. 

1337  

1338 Note 

1339 ---- 

1340 Device must be initialized. 

1341  

1342 Parameters 

1343 ---------- 

1344 size : int 

1345 Number of bytes to allocate. 

1346 stream : :obj:`~_stream.Stream`, optional 

1347 The stream establishing the stream ordering semantic. 

1348 Default value of `None` uses default stream. 

1349  

1350 Returns 

1351 ------- 

1352 :obj:`~_memory.Buffer` 

1353 Newly created buffer object. 

1354  

1355 """ 

1356 self._check_context_initialized() 

1357 return self.memory_resource.allocate(size, stream) 

1358  

1359 def sync(self): 

1360 """Synchronize the device. 

1361  

1362 Note 

1363 ---- 

1364 Device must be initialized. 

1365  

1366 """ 

1367 self._check_context_initialized() 

1368 handle_return(runtime.cudaDeviceSynchronize()) 

1369  

1370 def create_graph_builder(self) -> GraphBuilder: 

1371 """Create a new :obj:`~_graph.GraphBuilder` object. 

1372  

1373 Returns 

1374 ------- 

1375 :obj:`~_graph.GraphBuilder` 

1376 Newly created graph builder object. 

1377  

1378 """ 

1379 self._check_context_initialized() 

1380 return GraphBuilder._init(stream=self.create_stream(), is_stream_owner=True)