Coverage for cuda / core / system / _device.pyx: 81.08%

148 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-29 01:27 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from libc.stdint cimport intptr_t, uint64_t 

6from libc.math cimport ceil 

7  

8from multiprocessing import cpu_count 

9from typing import Iterable 

10  

11from cuda.bindings import nvml 

12  

13from ._nvml_context cimport initialize 

14  

15  

16AddressingMode = nvml.DeviceAddressingModeType 

17AffinityScope = nvml.AffinityScope 

18BrandType = nvml.BrandType 

19DeviceArch = nvml.DeviceArch 

20GpuP2PCapsIndex = nvml.GpuP2PCapsIndex 

21GpuP2PStatus = nvml.GpuP2PStatus 

22GpuTopologyLevel = nvml.GpuTopologyLevel 

23Pstates = nvml.Pstates 

24  

25  

26include "_clock.pxi" 

27include "_cooler.pxi" 

28include "_device_attributes.pxi" 

29include "_device_utils.pxi" 

30include "_event.pxi" 

31include "_fan.pxi" 

32include "_field_values.pxi" 

33include "_inforom.pxi" 

34include "_memory.pxi" 

35include "_mig.pxi" 

36include "_nvlink.pxi" 

37include "_pci_info.pxi" 

38include "_performance.pxi" 

39include "_process.pxi" 

40include "_repair_status.pxi" 

41include "_temperature.pxi" 

42include "_utilization.pxi" 

43  

44  

45cdef class Device: 

46 """ 

47 Representation of a device. 

48  

49 :class:`cuda.core.system.Device` provides access to various pieces of metadata 

50 about devices and their topology, as provided by the NVIDIA Management 

51 Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`. 

52  

53 Creating a device instance causes NVML to initialize the target GPU. 

54 NVML may initialize additional GPUs if the target GPU is an SLI slave. 

55  

56 Parameters 

57 ---------- 

58 index: int, optional 

59 Integer representing the CUDA device index to get a handle to. Valid 

60 values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``. 

61  

62 The order in which devices are enumerated has no guarantees of 

63 consistency between reboots. For that reason, it is recommended that 

64 devices are looked up by their PCI ids or UUID. 

65  

66 uuid: bytes or str, optional 

67 UUID of a CUDA device to get a handle to. 

68  

69 pci_bus_id: bytes or str, optional 

70 PCI bus ID of a CUDA device to get a handle to. 

71  

72 Raises 

73 ------ 

74 ValueError 

75 If anything other than a single `index`, `uuid` or `pci_bus_id` are specified. 

76 """ 

77  

78 # This is made public for testing purposes only 

79 cdef public intptr_t _handle 

80  

81 def __init__( 

82 self, 

83 *, 

84 index: int | None = None, 

85 uuid: bytes | str | None = None, 

86 pci_bus_id: bytes | str | None = None, 

87 ): 

88 args = [index, uuid, pci_bus_id] 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

89 cdef int arg_count = sum(arg is not None for arg in args) 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

90  

91 if arg_count > 1: 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

92 raise ValueError("Handle requires only one of `index`, `uuid`, or `pci_bus_id`.") 

93 if arg_count == 0: 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

94 raise ValueError("Handle requires either a device `index`, `uuid`, or `pci_bus_id`.") 

95  

96 initialize() 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

97  

98 if index is not None: 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP

99 self._handle = nvml.device_get_handle_by_index_v2(index) 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO

100 elif uuid is not None: 1acP

101 if isinstance(uuid, bytes): 1P

102 uuid = uuid.decode("ascii") 

103 self._handle = nvml.device_get_handle_by_uuid(uuid) 1P

104 elif pci_bus_id is not None: 1c

105 if isinstance(pci_bus_id, bytes): 1ac

106 pci_bus_id = pci_bus_id.decode("ascii") 

107 self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) 1c

108  

109 ######################################################################### 

110 # BASIC PROPERTIES 

111  

112 @property 

113 def index(self) -> int: 

114 """ 

115 The NVML index of this device. 

116  

117 Valid indices are derived from the count returned by 

118 :meth:`Device.get_device_count`. For example, if ``get_device_count()`` 

119 returns 2, the valid indices are 0 and 1, corresponding to GPU 0 and GPU 

120 1. 

121  

122 The order in which NVML enumerates devices has no guarantees of 

123 consistency between reboots. For that reason, it is recommended that 

124 devices be looked up by their PCI ids or GPU UUID. 

125  

126 Note: The NVML index may not correlate with other APIs, such as the CUDA 

127 device index. 

128 """ 

129 return nvml.device_get_index(self._handle) 1cH

130  

131 @property 

132 def uuid(self) -> str: 

133 """ 

134 Retrieves the globally unique immutable UUID associated with this 

135 device, as a 5 part hexadecimal string, that augments the immutable, 

136 board serial identifier. 

137  

138 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-`` 

139 prefix. If you need a `uuid` without that prefix (for example, to 

140 interact with CUDA), use the `uuid_without_prefix` property. 

141 """ 

142 return nvml.device_get_uuid(self._handle) 1O

143  

144 @property 

145 def uuid_without_prefix(self) -> str: 

146 """ 

147 Retrieves the globally unique immutable UUID associated with this 

148 device, as a 5 part hexadecimal string, that augments the immutable, 

149 board serial identifier. 

150  

151 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-`` 

152 prefix. This property returns it without the prefix, to match the UUIDs 

153 used in CUDA. If you need the prefix, use the `uuid` property. 

154 """ 

155 # NVML UUIDs have a `gpu-` or `mig-` prefix. We remove that here. 

156 return nvml.device_get_uuid(self._handle)[4:] 1CdP

157  

158 @property 

159 def pci_bus_id(self) -> str: 

160 """ 

161 Retrieves the PCI bus ID of this device. 

162 """ 

163 return self.pci_info.bus_id 

164  

165 @property 

166 def numa_node_id(self) -> int: 

167 """ 

168 The NUMA node of the given GPU device. 

169  

170 This only applies to platforms where the GPUs are NUMA nodes. 

171 """ 

172 return nvml.device_get_numa_node_id(self._handle) 1K

173  

174 @property 

175 def arch(self) -> DeviceArch: 

176 """ 

177 :obj:`~DeviceArch` device architecture. 

178  

179 For example, a Tesla V100 will report ``DeviceArchitecture.name == 

180 "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name == 

181 "AMPERE"``. 

182 """ 

183 arch = nvml.device_get_architecture(self._handle) 1ij

184 try: 1ij

185 return DeviceArch(arch) 1ij

186 except ValueError: 

187 return nvml.DeviceArch.UNKNOWN 

188  

189 @property 

190 def name(self) -> str: 

191 """ 

192 Name of the device, e.g.: `"Tesla V100-SXM2-32GB"` 

193 """ 

194 return nvml.device_get_name(self._handle) 1z

195  

196 @property 

197 def brand(self) -> BrandType: 

198 """ 

199 :obj:`~BrandType` brand of the device 

200 """ 

201 return BrandType(nvml.device_get_brand(self._handle)) 1w

202  

203 @property 

204 def serial(self) -> str: 

205 """ 

206 Retrieves the globally unique board serial number associated with this 

207 device's board. 

208  

209 For all products with an InfoROM. 

210 """ 

211 return nvml.device_get_serial(self._handle) 1B

212  

213 @property 

214 def module_id(self) -> int: 

215 """ 

216 Get a unique identifier for the device module on the baseboard. 

217  

218 This API retrieves a unique identifier for each GPU module that exists 

219 on a given baseboard. For non-baseboard products, this ID would always 

220 be 0. 

221 """ 

222 return nvml.device_get_module_id(self._handle) 1J

223  

224 @property 

225 def minor_number(self) -> int: 

226 """ 

227 The minor number of this device. 

228  

229 For Linux only. 

230  

231 The minor number is used by the Linux device driver to identify the 

232 device node in ``/dev/nvidiaX``. 

233 """ 

234 return nvml.device_get_minor_number(self._handle) 1F

235  

236 @property 

237 def is_c2c_enabled(self) -> bool: 

238 """ 

239 Whether the C2C (Chip-to-Chip) mode is enabled for this device. 

240 """ 

241 return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) 1s

242  

243 @property 

244 def is_persistence_mode_enabled(self) -> bool: 

245 """ 

246 Whether persistence mode is enabled for this device. 

247  

248 For Linux only. 

249 """ 

250 return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1h

251  

252 @is_persistence_mode_enabled.setter 

253 def is_persistence_mode_enabled(self, enabled: bool) -> None: 

254 nvml.device_set_persistence_mode( 1h

255 self._handle, 1h

256 nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED 1h

257 ) 

258  

259 @property 

260 def cuda_compute_capability(self) -> tuple[int, int]: 

261 """ 

262 CUDA compute capability of the device, e.g.: `(7, 0)` for a Tesla V100. 

263  

264 Returns a tuple `(major, minor)`. 

265 """ 

266 return nvml.device_get_cuda_compute_capability(self._handle) 1x

267  

268 def to_cuda_device(self) -> "cuda.core.Device": 

269 """ 

270 Get the corresponding :class:`cuda.core.Device` (which is used for CUDA 

271 access) for this :class:`cuda.core.system.Device` (which is used for 

272 NVIDIA machine library (NVML) access). 

273  

274 The devices are mapped to one another by their UUID. 

275  

276 Returns 

277 ------- 

278 cuda.core.Device 

279 The corresponding CUDA device. 

280 """ 

281 from cuda.core import Device as CudaDevice 1d

282  

283 # CUDA does not have an API to get a device by its UUID, so we just 

284 # search all the devices for one with a matching UUID. 

285  

286 for cuda_device in CudaDevice.get_all_devices(): 1d

287 if cuda_device.uuid == self.uuid_without_prefix: 1d

288 return cuda_device 1d

289  

290 raise RuntimeError("No corresponding CUDA device found for this NVML device.") 

291  

292 @classmethod 

293 def get_device_count(cls) -> int: 

294 """ 

295 Get the number of available devices. 

296  

297 Returns 

298 ------- 

299 int 

300 The number of available devices. 

301 """ 

302 initialize() 1RST

303  

304 return nvml.device_get_count_v2() 1RST

305  

306 @classmethod 

307 def get_all_devices(cls) -> Iterable[Device]: 

308 """ 

309 Query the available device instances. 

310  

311 Returns 

312 ------- 

313 Iterator over :obj:`~Device` 

314 An iterator over available devices. 

315 """ 

316 initialize() 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO

317  

318 for device_id in range(nvml.device_get_count_v2()): 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO

319 yield cls(index=device_id) 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO

320  

321 ######################################################################### 

322 # ADDRESSING MODE 

323  

324 @property 

325 def addressing_mode(self) -> AddressingMode: 

326 """ 

327 Get the :obj:`~AddressingMode` of the device. 

328  

329 Addressing modes can be one of: 

330  

331 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated 

332 memory (``malloc``, ``mmap``) is addressable from the device (GPU), via 

333 software-based mirroring of the CPU's page tables, on the GPU. 

334 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_ATS`: System allocated 

335 memory (``malloc``, ``mmap``) is addressable from the device (GPU), via 

336 Address Translation Services. This means that there is (effectively) a 

337 single set of page tables, and the CPU and GPU both use them. 

338 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_NONE`: Neither HMM nor ATS 

339 is active. 

340 """ 

341 return AddressingMode(nvml.device_get_addressing_mode(self._handle).value) 1q

342  

343 ######################################################################### 

344 # MIG (MULTI-INSTANCE GPU) DEVICES 

345  

346 @property 

347 def mig(self) -> MigInfo: 

348 """ 

349 Get :obj:`~MigInfo` accessor for MIG (Multi-Instance GPU) information. 

350  

351 For Ampere™ or newer fully supported devices. 

352 """ 

353 return MigInfo(self) 1nI

354  

355 ######################################################################### 

356 # AFFINITY 

357  

358 @classmethod 

359 def get_all_devices_with_cpu_affinity(cls, cpu_index: int) -> Iterable[Device]: 

360 """ 

361 Retrieve the set of GPUs that have a CPU affinity with the given CPU number. 

362  

363 Supported on Linux only. 

364  

365 Parameters 

366 ---------- 

367 cpu_index: int 

368 The CPU index. 

369  

370 Returns 

371 ------- 

372 Iterator of :obj:`~Device` 

373 An iterator over available devices. 

374 """ 

375 cdef Device device 

376 for handle in nvml.system_get_topology_gpu_set(cpu_index): 1Q

377 device = Device.__new__(Device) 1Q

378 device._handle = handle 1Q

379 yield device 1Q

380  

381 def get_memory_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]: 

382 """ 

383 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal 

384 memory affinity for the device. 

385  

386 For Kepler™ or newer fully supported devices. 

387  

388 Supported on Linux only. 

389  

390 If requested scope is not applicable to the target topology, the API 

391 will fall back to reporting the memory affinity for the immediate non-I/O 

392 ancestor of the device. 

393 """ 

394 return _unpack_bitmask( 1b

395 nvml.device_get_memory_affinity( 1b

396 self._handle, 1b

397 <unsigned int>ceil(cpu_count() / 64), 1b

398 scope 1b

399 ) 

400 ) 

401  

402 def get_cpu_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]: 

403 """ 

404 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal 

405 CPU affinity for the device. 

406  

407 For Kepler™ or newer fully supported devices. 

408  

409 Supported on Linux only. 

410  

411 If requested scope is not applicable to the target topology, the API 

412 will fall back to reporting the memory affinity for the immediate non-I/O 

413 ancestor of the device. 

414 """ 

415 return _unpack_bitmask( 1beQ

416 nvml.device_get_cpu_affinity_within_scope( 1beQ

417 self._handle, 1beQ

418 <unsigned int>ceil(cpu_count() / 64), 1beQ

419 scope, 1beQ

420 ) 

421 ) 

422  

423 def set_cpu_affinity(self): 

424 """ 

425 Sets the ideal affinity for the calling thread and device. 

426  

427 For Kepler™ or newer fully supported devices. 

428  

429 Supported on Linux only. 

430 """ 

431 nvml.device_set_cpu_affinity(self._handle) 

432  

433 def clear_cpu_affinity(self): 

434 """ 

435 Clear all affinity bindings for the calling thread. 

436  

437 For Kepler™ or newer fully supported devices. 

438  

439 Supported on Linux only. 

440 """ 

441 nvml.device_clear_cpu_affinity(self._handle) 

442  

443 ######################################################################### 

444 # CLOCK 

445 # See external class definitions in _clock.pxi 

446  

447 def get_clock(self, clock_type: ClockType) -> ClockInfo: 

448 """ 

449 :obj:`~_device.ClockInfo` object to get information about and manage a specific clock on a device. 

450 """ 

451 return ClockInfo(self._handle, clock_type) 1m

452  

453 @property 

454 def is_auto_boosted_clocks_enabled(self) -> tuple[bool, bool]: 

455 """ 

456 Retrieve the current state of auto boosted clocks on a device. 

457  

458 For Kepler™ or newer fully supported devices. 

459  

460 Auto Boosted clocks are enabled by default on some hardware, allowing 

461 the GPU to run at higher clock rates to maximize performance as thermal 

462 limits allow. 

463  

464 On Pascal™ and newer hardware, Auto Boosted clocks are controlled 

465 through application clocks. Use :meth:`set_application_clocks` and 

466 :meth:`reset_application_clocks` to control Auto Boost behavior. 

467  

468 Returns 

469 ------- 

470 bool 

471 The current state of Auto Boosted clocks 

472 bool 

473 The default Auto Boosted clocks behavior 

474  

475 """ 

476 current, default = nvml.device_get_auto_boosted_clocks_enabled(self._handle) 1r

477 return current == nvml.EnableState.FEATURE_ENABLED, default == nvml.EnableState.FEATURE_ENABLED 

478  

479 @property 

480 def current_clock_event_reasons(self) -> list[ClocksEventReasons]: 

481 """ 

482 Retrieves the current :obj:`~ClocksEventReasons`. 

483  

484 For all fully supported products. 

485 """ 

486 cdef uint64_t[1] reasons 

487 reasons[0] = nvml.device_get_current_clocks_event_reasons(self._handle) 1g

488 return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)] 1g

489  

490 @property 

491 def supported_clock_event_reasons(self) -> list[ClocksEventReasons]: 

492 """ 

493 Retrieves supported :obj:`~ClocksEventReasons` that can be returned by 

494 :meth:`get_current_clock_event_reasons`. 

495  

496 For all fully supported products. 

497  

498 This method is not supported in virtual machines running virtual GPU (vGPU). 

499 """ 

500 cdef uint64_t[1] reasons 

501 reasons[0] = nvml.device_get_supported_clocks_event_reasons(self._handle) 1g

502 return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)] 1g

503  

504 ########################################################################## 

505 # COOLER 

506 # See external class definitions in _cooler.pxi 

507  

508 @property 

509 def cooler(self) -> CoolerInfo: 

510 """ 

511 :obj:`~_device.CoolerInfo` object with cooler information for the device. 

512 """ 

513 return CoolerInfo(nvml.device_get_cooler_info(self._handle)) 

514  

515 ########################################################################## 

516 # DEVICE ATTRIBUTES 

517 # See external class definitions in _device_attributes.pxi 

518  

519 @property 

520 def attributes(self) -> DeviceAttributes: 

521 """ 

522 :obj:`~_device.DeviceAttributes` object with various device attributes. 

523  

524 For Ampere™ or newer fully supported devices. Only available on Linux 

525 systems. 

526 """ 

527 return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) 1u

528  

529 ######################################################################### 

530 # DISPLAY 

531  

532 @property 

533 def is_display_connected(self) -> bool: 

534 """ 

535 The display mode for this device. 

536  

537 Indicates whether a physical display (e.g. monitor) is currently connected to 

538 any of the device's connectors. 

539 """ 

540 return nvml.device_get_display_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1o

541  

542 @property 

543 def is_display_active(self) -> bool: 

544 """ 

545 The display active status for this device. 

546  

547 Indicates whether a display is initialized on the device. For example, 

548 whether X Server is attached to this device and has allocated memory for 

549 the screen. 

550  

551 Display can be active even when no monitor is physically attached. 

552 """ 

553 return nvml.device_get_display_active(self._handle) == nvml.EnableState.FEATURE_ENABLED 1o

554  

555 ########################################################################## 

556 # EVENTS 

557 # See external class definitions in _event.pxi 

558  

559 def register_events(self, events: EventType | int | list[EventType | int]) -> DeviceEvents: 

560 """ 

561 Starts recording events on this device. 

562  

563 For Fermi™ or newer fully supported devices. For Linux only. 

564  

565 ECC events are available only on ECC-enabled devices (see 

566 :meth:`Device.get_total_ecc_errors`). Power capping events are 

567 available only on Power Management enabled devices (see 

568 :meth:`Device.get_power_management_mode`). 

569  

570 This call starts recording of events on specific device. All events 

571 that occurred before this call are not recorded. Wait for events using 

572 the :meth:`DeviceEvents.wait` method on the result. 

573  

574 Examples 

575 -------- 

576 >>> device = Device(index=0) 

577 >>> events = device.register_events([ 

578 ... EventType.EVENT_TYPE_XID_CRITICAL_ERROR, 

579 ... ]) 

580 >>> while event := events.wait(timeout_ms=10000): 

581 ... print(f"Event {event.event_type} occurred on device {event.device.uuid}") 

582  

583 Parameters 

584 ---------- 

585 events: EventType, int, or list of EventType or int 

586 The event type or list of event types to register for this device. 

587  

588 Returns 

589 ------- 

590 :obj:`~_device.DeviceEvents` 

591 An object representing the registered events. Call 

592 :meth:`~_device.DeviceEvents.wait` on this object to wait for events. 

593  

594 Raises 

595 ------ 

596 :class:`cuda.core.system.NotSupportedError` 

597 None of the requested event types are registered. 

598 """ 

599 return DeviceEvents(self._handle, events) 1l

600  

601 def get_supported_event_types(self) -> list[EventType]: 

602 """ 

603 Get the list of event types supported by this device. 

604  

605 For Fermi™ or newer fully supported devices. For Linux only (returns an 

606 empty list on Windows). 

607  

608 Returns 

609 ------- 

610 list[EventType] 

611 The list of supported event types. 

612 """ 

613 cdef uint64_t[1] bitmask 

614 bitmask[0] = nvml.device_get_supported_event_types(self._handle) 1l

615 return [EventType(1 << ev) for ev in _unpack_bitmask(bitmask)] 1l

616  

617 ########################################################################## 

618 # FAN 

619 # See external class definitions in _fan.pxi 

620  

621 def get_fan(self, fan: int = 0) -> FanInfo: 

622 """ 

623 :obj:`~_device.FanInfo` object to get information and manage a specific fan on a device. 

624 """ 

625 if fan < 0 or fan >= self.num_fans: 

626 raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})") 

627 return FanInfo(self._handle, fan) 

628  

629 @property 

630 def num_fans(self) -> int: 

631 """ 

632 The number of fans on the device. 

633 """ 

634 return nvml.device_get_num_fans(self._handle) 1tD

635  

636 ########################################################################## 

637 # FIELD VALUES 

638 # See external class definitions in _field_values.pxi 

639  

640 def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues: 

641 """ 

642 Get multiple field values from the device. 

643  

644 Each value specified can raise its own exception. That exception will 

645 be raised when attempting to access the corresponding ``value`` from the 

646 returned :obj:`~_device.FieldValues` container. 

647  

648 To confirm that there are no exceptions in the entire container, call 

649 :meth:`~_device.FieldValues.validate`. 

650  

651 Parameters 

652 ---------- 

653 field_ids: list[int | tuple[int, int]] 

654 List of field IDs to query. 

655  

656 Each item may be either a single value from the :class:`FieldId` 

657 enum, or a pair of (:class:`FieldId`, scope ID). 

658  

659 Returns 

660 ------- 

661 :obj:`~_device.FieldValues` 

662 Container of field values corresponding to the requested field IDs. 

663 """ 

664 # Passing a field_ids array of length 0 raises an InvalidArgumentError, 

665 # so avoid that. 

666 if len(field_ids) == 0: 1f

667 return FieldValues(nvml.FieldValue(0)) 1f

668  

669 return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) 1f

670  

671 def clear_field_values(self, field_ids: list[int | tuple[int, int]]) -> None: 

672 """ 

673 Clear multiple field values from the device. 

674  

675 Parameters 

676 ---------- 

677 field_ids: list[int | tuple[int, int]] 

678 List of field IDs to clear. 

679  

680 Each item may be either a single value from the :class:`FieldId` 

681 enum, or a pair of (:class:`FieldId`, scope ID). 

682 """ 

683 # Passing a field_ids array of length 0 raises an InvalidArgumentError, 

684 # so avoid that. 

685 if len(field_ids) == 0: 1f

686 return 

687  

688 nvml.device_clear_field_values(self._handle, field_ids) 1f

689  

690 ########################################################################## 

691 # INFOROM 

692 # See external class definitions in _inforom.pxi 

693  

694 @property 

695 def inforom(self) -> InforomInfo: 

696 """ 

697 :obj:`~_device.InforomInfo` object with InfoROM information. 

698  

699 For all products with an InfoROM. 

700 """ 

701 return InforomInfo(self) 1E

702  

703 ########################################################################## 

704 # MEMORY 

705 # See external class definitions in _memory.pxi 

706  

707 @property 

708 def bar1_memory_info(self) -> BAR1MemoryInfo: 

709 """ 

710 :obj:`~_device.BAR1MemoryInfo` object with BAR1 memory information. 

711  

712 BAR1 is used to map the FB (device memory) so that it can be directly 

713 accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE 

714 bus). 

715 """ 

716 return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle)) 1v

717  

718 @property 

719 def memory_info(self) -> MemoryInfo: 

720 """ 

721 :obj:`~_device.MemoryInfo` object with memory information. 

722 """ 

723 return MemoryInfo(nvml.device_get_memory_info_v2(self._handle)) 1y

724  

725 ########################################################################## 

726 # NVLINK 

727 # See external class definitions in _nvlink.pxi 

728  

729 def get_nvlink(self, link: int) -> NvlinkInfo: 

730 """ 

731 Get :obj:`~NvlinkInfo` about this device. 

732  

733 For devices with NVLink support. 

734 """ 

735 if link < 0 or link >= NvlinkInfo.max_links: 1p

736 raise ValueError(f"Link index {link} is out of range [0, {NvlinkInfo.max_links})") 

737 return NvlinkInfo(self, link) 1p

738  

739 ########################################################################## 

740 # PCI INFO 

741 # See external class definitions in _pci_info.pxi 

742  

743 @property 

744 def pci_info(self) -> PciInfo: 

745 """ 

746 :obj:`~_device.PciInfo` object with the PCI attributes of this device. 

747 """ 

748 return PciInfo(nvml.device_get_pci_info_ext(self._handle), self._handle) 1cAdP

749  

750 ########################################################################## 

751 # PERFORMANCE 

752 # See external class definitions in _performance.pxi 

753  

754 @property 

755 def performance_state(self) -> Pstates: 

756 """ 

757 The current performance state of the device. 

758  

759 For Fermi™ or newer fully supported devices. 

760  

761 See :class:`Pstates` for possible performance states. 

762 """ 

763 return Pstates(nvml.device_get_performance_state(self._handle)) 1mk

764  

765 @property 

766 def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo: 

767 """ 

768 :obj:`~_device.GpuDynamicPstatesInfo` object with performance monitor samples from the associated subdevice. 

769 """ 

770 return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle)) 1k

771  

772 @property 

773 def supported_pstates(self) -> list[Pstates]: 

774 """ 

775 Get all supported Performance States (P-States) for the device. 

776  

777 The returned list contains a contiguous list of valid P-States supported by 

778 the device. 

779  

780 Return 

781 ------ 

782 list[Pstates] 

783 A list of supported P-States for the device. 

784 """ 

785 return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)] 1k

786  

787 ########################################################################## 

788 # PROCESS 

789 # See external class definitions in _process.pxi 

790  

791 @property 

792 def compute_running_processes(self) -> list[ProcessInfo]: 

793 """ 

794 Get information about processes with a compute context on a device 

795  

796 For Fermi™ or newer fully supported devices. 

797  

798 This function returns information only about compute running processes 

799 (e.g. CUDA application which have active context). Any graphics 

800 applications (e.g. using OpenGL, DirectX) won't be listed by this 

801 function. 

802  

803 Keep in mind that information returned by this call is dynamic and the 

804 number of elements might change in time. 

805  

806 In MIG mode, if device handle is provided, the API returns aggregate 

807 information, only if the caller has appropriate privileges. Per-instance 

808 information can be queried by using specific MIG device handles. 

809 Querying per-instance information using MIG device handles is not 

810 supported if the device is in vGPU Host virtualization mode. 

811 """ 

812 return [ProcessInfo(self, proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)] 1n

813  

814 ########################################################################## 

815 # REPAIR STATUS 

816 # See external class definitions in _repair_status.pxi 

817  

818 @property 

819 def repair_status(self) -> RepairStatus: 

820 """ 

821 :obj:`~_device.RepairStatus` object with TPC/Channel repair status. 

822  

823 For Ampere™ or newer fully supported devices. 

824 """ 

825 return RepairStatus(self._handle) 1L

826  

827 ########################################################################## 

828 # TEMPERATURE 

829 # See external class definitions in _temperature.pxi 

830  

831 @property 

832 def temperature(self) -> Temperature: 

833 """ 

834 :obj:`~_device.Temperature` object with temperature information for the device. 

835 """ 

836 return Temperature(self._handle) 1M

837  

838 ####################################################################### 

839 # TOPOLOGY 

840  

841 def get_topology_nearest_gpus(self, level: GpuTopologyLevel) -> Iterable[Device]: 

842 """ 

843 Retrieve the GPUs that are nearest to this device at a specific interconnectivity level. 

844  

845 Supported on Linux only. 

846  

847 Parameters 

848 ---------- 

849 level: :class:`GpuTopologyLevel` 

850 The topology level. 

851  

852 Returns 

853 ------- 

854 Iterable of :class:`Device` 

855 The nearest devices at the given topology level. 

856 """ 

857 cdef Device device 

858 for handle in nvml.device_get_topology_nearest_gpus(self._handle, level): 1G

859 device = Device.__new__(Device) 

860 device._handle = handle 

861 yield device 

862  

863 ####################################################################### 

864 # UTILIZATION 

865  

866 @property 

867 def utilization(self) -> Utilization: 

868 """ 

869 Retrieves the current :obj:`~Utilization` rates for the device's major 

870 subsystems. 

871  

872 For Fermi™ or newer fully supported devices. 

873  

874 Note: During driver initialization when ECC is enabled one can see high 

875 GPU and Memory Utilization readings. This is caused by ECC Memory 

876 Scrubbing mechanism that is performed during driver initialization. 

877  

878 Note: On MIG-enabled GPUs, querying device utilization rates is not 

879 currently supported. 

880  

881 Returns 

882 ------- 

883 Utilization 

884 An object containing the current utilization rates for the device. 

885 """ 

886 return Utilization(nvml.device_get_utilization_rates(self._handle)) 1N

887  

888  

889def get_topology_common_ancestor(device1: Device, device2: Device) -> GpuTopologyLevel: 

890 """ 

891 Retrieve the common ancestor for two devices. 

892  

893 For Linux only. 

894  

895 Parameters 

896 ---------- 

897 device1: :class:`Device` 

898 The first device. 

899 device2: :class:`Device` 

900 The second device. 

901  

902 Returns 

903 ------- 

904 :class:`GpuTopologyLevel` 

905 The common ancestor level of the two devices. 

906 """ 

907 return GpuTopologyLevel( 

908 nvml.device_get_topology_common_ancestor( 

909 device1._handle, 

910 device2._handle, 

911 ) 

912 ) 

913  

914  

915def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex) -> GpuP2PStatus: 

916 """ 

917 Retrieve the P2P status between two devices. 

918  

919 Parameters 

920 ---------- 

921 device1: :class:`Device` 

922 The first device. 

923 device2: :class:`Device` 

924 The second device. 

925 index: :class:`GpuP2PCapsIndex` 

926 The P2P capability index being looked for between ``device1`` and ``device2``. 

927  

928 Returns 

929 ------- 

930 :class:`GpuP2PStatus` 

931 The P2P status between the two devices. 

932 """ 

933 return GpuP2PStatus( 

934 nvml.device_get_p2p_status( 

935 device1._handle, 

936 device2._handle, 

937 index, 

938 ) 

939 ) 

940  

941  

942__all__ = [ 

943 "AddressingMode", 

944 "AffinityScope", 

945 "BrandType", 

946 "ClockId", 

947 "ClocksEventReasons", 

948 "ClockType", 

949 "CoolerControl", 

950 "CoolerTarget", 

951 "Device", 

952 "DeviceArch", 

953 "EventType", 

954 "FanControlPolicy", 

955 "FieldId", 

956 "get_p2p_status", 

957 "get_topology_common_ancestor", 

958 "GpuP2PCapsIndex", 

959 "GpuP2PStatus", 

960 "GpuTopologyLevel", 

961 "InforomObject", 

962 "NvlinkVersion", 

963 "PcieUtilCounter", 

964 "Pstates", 

965 "TemperatureSensors", 

966 "TemperatureThresholds", 

967 "ThermalController", 

968 "ThermalTarget", 

969 "Utilization", 

970]