Coverage for cuda/core/system/_device.pyx: 74.14%
263 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from libc.stdint cimport intptr_t, uint64_t
6from libc.math cimport ceil
8from multiprocessing import cpu_count
9from typing import Iterable, TYPE_CHECKING
10import warnings
12from cuda.bindings import nvml
14from ._nvml_context cimport initialize
15from cuda.core.system.typing import (
16 AddressingMode,
17 AffinityScope,
18 DeviceArch,
19 ClockId,
20 ClocksEventReasons,
21 ClockType,
22 CoolerControl,
23 CoolerTarget,
24 DeviceArch,
25 EventType,
26 FanControlPolicy,
27 FieldId,
28 GpuP2PCapsIndex,
29 GpuP2PStatus,
30 GpuTopologyLevel,
31 InforomObject,
32 TemperatureThresholds,
33 ThermalController,
34 ThermalTarget,
35)
36from cuda.core._vendored.deprecated.sphinx import deprecated, versionadded, versionchanged
38if TYPE_CHECKING:
39 import cuda.core # no-cython-lint
42cdef object _pstate_to_int(object pstate):
43 if pstate == nvml.Pstates.PSTATE_UNKNOWN: 1aec
44 return None 1ac
45 assert ( 1aec
46 int(pstate) >= 0 and int(pstate) <= 15 1aec
47 ), f"Invalid P-state: {pstate}. Must be between 0 and 15 inclusive, or PSTATE_UNKNOWN."
48 return int(pstate) - int(nvml.Pstates.PSTATE_0) 1aec
51cdef int _pstate_to_enum(int pstate):
52 if pstate < 0 or pstate > 15: 1e
53 raise ValueError(f"Invalid P-state: {pstate}. Must be between 0 and 15 inclusive.")
54 return int(pstate) + int(nvml.Pstates.PSTATE_0) 1e
57include "_clock.pxi"
58include "_cooler.pxi"
59include "_device_attributes.pxi"
60include "_device_utils.pxi"
61include "_event.pxi"
62include "_fan.pxi"
63include "_field_values.pxi"
64include "_inforom.pxi"
65include "_memory.pxi"
66include "_mig.pxi"
67include "_nvlink.pxi"
68include "_pci_info.pxi"
69include "_performance.pxi"
70include "_process.pxi"
71include "_repair_status.pxi"
72include "_temperature.pxi"
73include "_utilization.pxi"
76_ADDRESSING_MODE_MAPPING = {
77 nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_HMM: AddressingMode.HMM,
78 nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_ATS: AddressingMode.ATS,
79}
82_AFFINITY_SCOPE_MAPPING = {
83 AffinityScope.NODE: nvml.AffinityScope.NODE,
84 AffinityScope.SOCKET: nvml.AffinityScope.SOCKET,
85}
88_BRAND_TYPE_MAPPING = {
89 nvml.BrandType.BRAND_UNKNOWN: "Unknown",
90 nvml.BrandType.BRAND_QUADRO: "Quadro",
91 nvml.BrandType.BRAND_TESLA: "Tesla",
92 nvml.BrandType.BRAND_NVS: "NVS",
93 nvml.BrandType.BRAND_GRID: "GRID",
94 nvml.BrandType.BRAND_GEFORCE: "GeForce",
95 nvml.BrandType.BRAND_TITAN: "Titan",
96 nvml.BrandType.BRAND_NVIDIA_VAPPS: "NVIDIA vApps",
97 nvml.BrandType.BRAND_NVIDIA_VPC: "NVIDIA VPC",
98 nvml.BrandType.BRAND_NVIDIA_VCS: "NVIDIA VCS",
99 nvml.BrandType.BRAND_NVIDIA_VWS: "NVIDIA VWS",
100 nvml.BrandType.BRAND_NVIDIA_CLOUD_GAMING: "NVIDIA Cloud Gaming",
101 nvml.BrandType.BRAND_NVIDIA_VGAMING: "NVIDIA vGaming",
102 nvml.BrandType.BRAND_QUADRO_RTX: "Quadro RTX",
103 nvml.BrandType.BRAND_NVIDIA_RTX: "NVIDIA RTX",
104 nvml.BrandType.BRAND_NVIDIA: "NVIDIA",
105 nvml.BrandType.BRAND_GEFORCE_RTX: "GeForce RTX",
106 nvml.BrandType.BRAND_TITAN_RTX: "Titan RTX",
107}
110_GPU_P2P_CAPS_INDEX_MAPPING = {
111 GpuP2PCapsIndex.READ: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_READ,
112 GpuP2PCapsIndex.WRITE: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_WRITE,
113 GpuP2PCapsIndex.NVLINK: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_NVLINK,
114 GpuP2PCapsIndex.ATOMICS: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_ATOMICS,
115 GpuP2PCapsIndex.PCI: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PCI,
116 GpuP2PCapsIndex.PROP: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PROP,
117 GpuP2PCapsIndex.UNKNOWN: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_UNKNOWN,
118}
121_GPU_P2P_STATUS_MAPPING = {
122 nvml.GpuP2PStatus.P2P_STATUS_OK: GpuP2PStatus.OK,
123 nvml.GpuP2PStatus.P2P_STATUS_CHIPSET_NOT_SUPPORTED: GpuP2PStatus.CHIPSET_NOT_SUPPORTED,
124 nvml.GpuP2PStatus.P2P_STATUS_GPU_NOT_SUPPORTED: GpuP2PStatus.GPU_NOT_SUPPORTED,
125 nvml.GpuP2PStatus.P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED: GpuP2PStatus.IOH_TOPOLOGY_NOT_SUPPORTED,
126 nvml.GpuP2PStatus.P2P_STATUS_DISABLED_BY_REGKEY: GpuP2PStatus.DISABLED_BY_REGKEY,
127 nvml.GpuP2PStatus.P2P_STATUS_NOT_SUPPORTED: GpuP2PStatus.NOT_SUPPORTED,
128 nvml.GpuP2PStatus.P2P_STATUS_UNKNOWN: GpuP2PStatus.UNKNOWN,
129}
132_GPU_TOPOLOGY_LEVEL_MAPPING = {
133 GpuTopologyLevel.INTERNAL: nvml.GpuTopologyLevel.TOPOLOGY_INTERNAL,
134 GpuTopologyLevel.SINGLE: nvml.GpuTopologyLevel.TOPOLOGY_SINGLE,
135 GpuTopologyLevel.MULTIPLE: nvml.GpuTopologyLevel.TOPOLOGY_MULTIPLE,
136 GpuTopologyLevel.HOSTBRIDGE: nvml.GpuTopologyLevel.TOPOLOGY_HOSTBRIDGE,
137 GpuTopologyLevel.NODE: nvml.GpuTopologyLevel.TOPOLOGY_NODE,
138 GpuTopologyLevel.SYSTEM: nvml.GpuTopologyLevel.TOPOLOGY_SYSTEM,
139}
142_GPU_TOPOLOGY_LEVEL_INV_MAPPING = {v: k for k, v in _GPU_TOPOLOGY_LEVEL_MAPPING.items()}
146cdef class Device:
147 """
148 Representation of a device.
150 :class:`cuda.core.system.Device` provides access to various pieces of metadata
151 about devices and their topology, as provided by the NVIDIA Management
152 Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`.
154 Creating a device instance causes NVML to initialize the target GPU.
155 NVML may initialize additional GPUs if the target GPU is an SLI slave.
157 Parameters
158 ----------
159 index: int, optional
160 Integer representing the CUDA device index to get a handle to. Valid
161 values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.
163 The order in which devices are enumerated has no guarantees of
164 consistency between reboots. For that reason, it is recommended that
165 devices are looked up by their PCI ids or UUID.
167 uuid: bytes or str, optional
168 UUID of a CUDA device to get a handle to.
170 pci_bus_id: bytes or str, optional
171 PCI bus ID of a CUDA device to get a handle to.
173 Raises
174 ------
175 ValueError
176 If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
177 """
179 # This is made public for testing purposes only
180 cdef public intptr_t _handle
182 def __init__(
183 self,
184 *,
185 index: int | None = None,
186 uuid: bytes | str | None = None,
187 pci_bus_id: bytes | str | None = None,
188 ) -> None:
189 args = [index, uuid, pci_bus_id] 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
190 cdef int arg_count = sum(arg is not None for arg in args) 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
192 if arg_count > 1: 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
193 raise ValueError("Handle requires only one of `index`, `uuid`, or `pci_bus_id`.")
194 if arg_count == 0: 1atbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
195 raise ValueError("Handle requires either a device `index`, `uuid`, or `pci_bus_id`.")
197 initialize() 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
199 if index is not None: 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQq
200 self._handle = nvml.device_get_handle_by_index_v2(index) 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQ
201 elif uuid is not None: 1fq
202 if isinstance(uuid, bytes): 1q
203 uuid = uuid.decode("ascii")
204 self._handle = nvml.device_get_handle_by_uuid(uuid) 1q
205 elif pci_bus_id is not None: 1f
206 if isinstance(pci_bus_id, bytes): 1f
207 pci_bus_id = pci_bus_id.decode("ascii")
208 self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) 1f
210 #########################################################################
211 # BASIC PROPERTIES
213 @property
214 def index(self) -> int:
215 """
216 The NVML index of this device.
218 Valid indices are derived from the count returned by
219 :meth:`Device.get_device_count`. For example, if ``get_device_count()``
220 returns 2, the valid indices are 0 and 1, corresponding to GPU 0 and GPU
221 1.
223 The order in which NVML enumerates devices has no guarantees of
224 consistency between reboots. For that reason, it is recommended that
225 devices be looked up by their PCI ids or GPU UUID.
227 Note: The NVML index may not correlate with other APIs, such as the CUDA
228 device index.
229 """
230 return nvml.device_get_index(self._handle) 1fI
232 @property
233 def uuid(self) -> str:
234 """
235 Retrieves the globally unique immutable UUID associated with this
236 device, as a 5 part hexadecimal string, that augments the immutable,
237 board serial identifier.
239 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
240 prefix. If you need a `uuid` without that prefix (for example, to
241 interact with CUDA), use the `uuid_without_prefix` property.
242 """
243 return nvml.device_get_uuid(self._handle) 1P
245 @property
246 def uuid_without_prefix(self) -> str:
247 """
248 Retrieves the globally unique immutable UUID associated with this
249 device, as a 5 part hexadecimal string, that augments the immutable,
250 board serial identifier.
252 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
253 prefix. This property returns it without the prefix, to match the UUIDs
254 used in CUDA. If you need the prefix, use the `uuid` property.
255 """
256 # NVML UUIDs have a `gpu-` or `mig-` prefix. We remove that here.
257 return nvml.device_get_uuid(self._handle)[4:] 1Ehq
259 @property
260 def pci_bus_id(self) -> str:
261 """
262 Retrieves the PCI bus ID of this device.
263 """
264 return self.pci_info.bus_id
266 @property
267 def numa_node_id(self) -> int:
268 """
269 The NUMA node of the given GPU device.
271 This only applies to platforms where the GPUs are NUMA nodes.
272 """
273 return nvml.device_get_numa_node_id(self._handle) 1L
275 @property
276 def arch(self) -> DeviceArch:
277 """
278 :obj:`~DeviceArch` device architecture.
280 For example, a Tesla V100 will report ``DeviceArchitecture.name ==
281 "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
282 "AMPERE"``.
283 """
284 arch = nvml.device_get_architecture(self._handle) 1ln
285 try: 1ln
286 return DeviceArch(arch) 1ln
287 except ValueError:
288 return DeviceArch.UNKNOWN
290 @property
291 def name(self) -> str:
292 """
293 Name of the device, e.g.: `"Tesla V100-SXM2-32GB"`
294 """
295 return nvml.device_get_name(self._handle) 1C
297 @property
298 def brand(self) -> str:
299 """
300 The brand of the device.
302 Returns "Unknown" if the brand is unknown.
303 """
304 return _BRAND_TYPE_MAPPING.get(nvml.device_get_brand(self._handle), "Unknown") 1z
306 @property
307 def serial(self) -> str:
308 """
309 Retrieves the globally unique board serial number associated with this
310 device's board.
312 For all products with an InfoROM.
313 """
314 return nvml.device_get_serial(self._handle) 1D
316 @property
317 def module_id(self) -> int:
318 """
319 Get a unique identifier for the device module on the baseboard.
321 This API retrieves a unique identifier for each GPU module that exists
322 on a given baseboard. For non-baseboard products, this ID would always
323 be 0.
324 """
325 return nvml.device_get_module_id(self._handle) 1K
327 @property
328 def minor_number(self) -> int:
329 """
330 The minor number of this device.
332 For Linux only.
334 The minor number is used by the Linux device driver to identify the
335 device node in ``/dev/nvidiaX``.
336 """
337 return nvml.device_get_minor_number(self._handle) 1H
339 @property
340 def is_c2c_enabled(self) -> bool:
341 """
342 Whether the C2C (Chip-to-Chip) mode is enabled for this device.
343 """
344 return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) 1v
346 @property
347 def is_persistence_mode_enabled(self) -> bool:
348 """
349 Whether persistence mode is enabled for this device.
351 For Linux only.
352 """
353 return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1k
355 @is_persistence_mode_enabled.setter
356 def is_persistence_mode_enabled(self, enabled: bool) -> None:
357 nvml.device_set_persistence_mode( 1k
358 self._handle, 1k
359 nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED 1k
360 )
362 @property
363 def cuda_compute_capability(self) -> tuple[int, int]:
364 """
365 CUDA compute capability of the device, e.g.: `(7, 0)` for a Tesla V100.
367 Returns a tuple `(major, minor)`.
368 """
369 return nvml.device_get_cuda_compute_capability(self._handle) 1A
371 def to_cuda_device(self) -> "cuda.core.Device":
372 """
373 Get the corresponding :class:`cuda.core.Device` (which is used for CUDA
374 access) for this :class:`cuda.core.system.Device` (which is used for
375 NVIDIA machine library (NVML) access).
377 The devices are mapped to one another by their UUID.
379 Returns
380 -------
381 cuda.core.Device
382 The corresponding CUDA device.
384 Raises
385 ------
386 RuntimeError
387 No corresponding CUDA device is found for this NVML device.
389 For example, on a MIG system, the physical GPU will not have an
390 available CUDA device, since it can not be used directly, even
391 though it can be enumerated from NVML.
392 """
393 from cuda.core import Device as CudaDevice 1h
395 # CUDA does not have an API to get a device by its UUID, so we just
396 # search all the devices for one with a matching UUID.
398 for cuda_device in CudaDevice.get_all_devices(): 1h
399 if cuda_device.uuid == self.uuid_without_prefix: 1h
400 return cuda_device 1h
402 raise RuntimeError("No corresponding CUDA device found for this NVML device.")
404 @classmethod
405 def get_device_count(cls) -> int:
406 """
407 Get the number of available devices.
409 Returns
410 -------
411 int
412 The number of available devices.
413 """
414 initialize() 1STU
416 return nvml.device_get_count_v2() 1STU
418 @classmethod
419 def get_all_devices(cls) -> Iterable[Device]:
420 """
421 Query the available device instances.
423 Returns
424 -------
425 Iterator over :obj:`~Device`
426 An iterator over available devices.
427 """
428 initialize() 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQ
430 for device_id in range(nvml.device_get_count_v2()): 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQ
431 yield cls(index=device_id) 1tbuvderwlxyziABCfmDEnsFjGHoIJKLpkcgMNhOPQ
433 #########################################################################
434 # ADDRESSING MODE
436 @property
437 def addressing_mode(self) -> AddressingMode | None:
438 """
439 Get the :obj:`~AddressingMode` of the device.
440 """
441 return _ADDRESSING_MODE_MAPPING.get(nvml.device_get_addressing_mode(self._handle).value, None) 1t
443 #########################################################################
444 # MIG (MULTI-INSTANCE GPU) DEVICES
446 @property
447 def mig(self) -> MigInfo:
448 """
449 Get :obj:`~MigInfo` accessor for MIG (Multi-Instance GPU) information.
451 For Ampere™ or newer fully supported devices.
452 """
453 return MigInfo(self) 1rJ
455 #########################################################################
456 # AFFINITY
458 @classmethod
459 def get_all_devices_with_cpu_affinity(cls, cpu_index: int) -> Iterable[Device]:
460 """
461 Retrieve the set of GPUs that have a CPU affinity with the given CPU number.
463 Supported on Linux only.
465 Parameters
466 ----------
467 cpu_index: int
468 The CPU index.
470 Returns
471 -------
472 Iterator of :obj:`~Device`
473 An iterator over available devices.
474 """
475 cdef Device device
476 for handle in nvml.system_get_topology_gpu_set(cpu_index): 1R
477 device = Device.__new__(Device) 1R
478 device._handle = handle 1R
479 yield device 1R
481 def get_memory_affinity(self, scope: AffinityScope | str=AffinityScope.NODE) -> list[int]:
482 """
483 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
484 memory affinity for the device.
486 For Kepler™ or newer fully supported devices.
488 Supported on Linux only.
490 If requested scope is not applicable to the target topology, the API
491 will fall back to reporting the memory affinity for the immediate non-I/O
492 ancestor of the device.
494 Parameters
495 ----------
496 scope: AffinityScope | str, optional
497 The scope of the affinity query. Must be one of the values of
498 :class:`AffinityScope`. Default is :attr:`AffinityScope.NODE`.
500 Returns
501 -------
502 list[int]
503 A list of indices of NUMA nodes or CPU sockets with the ideal memory
504 affinity for the device.
505 """
506 try: 1b
507 scope = _AFFINITY_SCOPE_MAPPING[scope] 1b
508 except KeyError:
509 raise ValueError(
510 f"Invalid affinity scope: {scope}. "
511 f"Must be one of {list(AffinityScope.__members__.values())}"
512 ) from None
513 return _unpack_bitmask( 1b
514 nvml.device_get_memory_affinity( 1b
515 self._handle, 1b
516 <unsigned int>ceil(cpu_count() / 64), 1b
517 scope, 1b
518 )
519 )
521 def get_cpu_affinity(self, scope: AffinityScope | str=AffinityScope.NODE) -> list[int]:
522 """
523 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
524 CPU affinity for the device.
526 For Kepler™ or newer fully supported devices.
528 Supported on Linux only.
530 If requested scope is not applicable to the target topology, the API
531 will fall back to reporting the memory affinity for the immediate non-I/O
532 ancestor of the device.
534 Parameters
535 ----------
536 scope: AffinityScope | str, optional
537 The scope of the affinity query. Must be one of the values of
538 :class:`AffinityScope`. Default is :attr:`AffinityScope.NODE`.
540 Returns
541 -------
542 list[int]
543 A list of indices of NUMA nodes or CPU sockets with the ideal memory
544 affinity for the device.
545 """
546 try: 1biR
547 scope = _AFFINITY_SCOPE_MAPPING[scope] 1biR
548 except KeyError:
549 raise ValueError(
550 f"Invalid affinity scope: {scope}. "
551 f"Must be one of {list(AffinityScope.__members__.values())}"
552 ) from None
553 return _unpack_bitmask( 1biR
554 nvml.device_get_cpu_affinity_within_scope( 1biR
555 self._handle, 1biR
556 <unsigned int>ceil(cpu_count() / 64), 1biR
557 scope, 1biR
558 )
559 )
561 def set_cpu_affinity(self) -> None:
562 """
563 Sets the ideal affinity for the calling thread and device.
565 For Kepler™ or newer fully supported devices.
567 Supported on Linux only.
568 """
569 nvml.device_set_cpu_affinity(self._handle)
571 def clear_cpu_affinity(self) -> None:
572 """
573 Clear all affinity bindings for the calling thread.
575 For Kepler™ or newer fully supported devices.
577 Supported on Linux only.
578 """
579 nvml.device_clear_cpu_affinity(self._handle)
581 #########################################################################
582 # CLOCK
583 # See external class definitions in _clock.pxi
585 def get_clock(self, clock_type: ClockType | str) -> ClockInfo:
586 """
587 :obj:`~_device.ClockInfo` object to get information about and manage a specific clock on a device.
588 """
589 return ClockInfo(self._handle, clock_type) 1e
591 @property
592 def is_auto_boosted_clocks_enabled(self) -> tuple[bool, bool]:
593 """
594 Retrieve the current state of auto boosted clocks on a device.
596 For Kepler™ or newer fully supported devices.
598 Auto Boosted clocks are enabled by default on some hardware, allowing
599 the GPU to run at higher clock rates to maximize performance as thermal
600 limits allow.
602 On Pascal™ and newer hardware, Auto Boosted clocks are controlled
603 through application clocks. Use :meth:`set_application_clocks` and
604 :meth:`reset_application_clocks` to control Auto Boost behavior.
606 Returns
607 -------
608 bool
609 The current state of Auto Boosted clocks
610 bool
611 The default Auto Boosted clocks behavior
613 """
614 current, default = nvml.device_get_auto_boosted_clocks_enabled(self._handle) 1u
615 return current == nvml.EnableState.FEATURE_ENABLED, default == nvml.EnableState.FEATURE_ENABLED
617 @property
618 def current_clock_event_reasons(self) -> list[ClocksEventReasons]:
619 """
620 Retrieves the current :obj:`~ClocksEventReasons`.
622 For all fully supported products.
623 """
624 cdef uint64_t[1] reasons
625 reasons[0] = nvml.device_get_current_clocks_event_reasons(self._handle) 1d
626 output_reasons = [] 1d
627 for reason in _unpack_bitmask(reasons): 1d
628 try:
629 output_reason = _CLOCKS_EVENT_REASONS_MAPPING[1 << reason]
630 except KeyError:
631 raise ValueError(f"Unknown clock event reason bit: {1 << reason}")
632 output_reasons.append(output_reason)
633 return output_reasons 1d
635 @property
636 def supported_clock_event_reasons(self) -> list[ClocksEventReasons]:
637 """
638 Retrieves supported :obj:`~ClocksEventReasons` that can be returned by
639 :meth:`get_current_clock_event_reasons`.
641 For all fully supported products.
643 This method is not supported in virtual machines running virtual GPU (vGPU).
644 """
645 cdef uint64_t[1] reasons
646 reasons[0] = nvml.device_get_supported_clocks_event_reasons(self._handle) 1d
647 output_reasons = [] 1d
648 for reason in _unpack_bitmask(reasons): 1d
649 try: 1d
650 output_reason = _CLOCKS_EVENT_REASONS_MAPPING[1 << reason] 1d
651 except KeyError:
652 raise ValueError(f"Unknown clock event reason bit: {1 << reason}")
653 output_reasons.append(output_reason) 1d
654 return output_reasons 1d
656 ##########################################################################
657 # COOLER
658 # See external class definitions in _cooler.pxi
660 @property
661 def cooler(self) -> CoolerInfo:
662 """
663 :obj:`~_device.CoolerInfo` object with cooler information for the device.
664 """
665 return CoolerInfo(nvml.device_get_cooler_info(self._handle))
667 ##########################################################################
668 # DEVICE ATTRIBUTES
669 # See external class definitions in _device_attributes.pxi
671 @property
672 def attributes(self) -> DeviceAttributes:
673 """
674 :obj:`~_device.DeviceAttributes` object with various device attributes.
676 For Ampere™ or newer fully supported devices. Only available on Linux
677 systems.
678 """
679 return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) 1x
681 #########################################################################
682 # DISPLAY
684 @property
685 def is_display_connected(self) -> bool:
686 """
687 The display mode for this device.
689 Indicates whether a physical display (e.g. monitor) is currently connected to
690 any of the device's connectors.
691 """
692 return nvml.device_get_display_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1s
694 @property
695 def is_display_active(self) -> bool:
696 """
697 The display active status for this device.
699 Indicates whether a display is initialized on the device. For example,
700 whether X Server is attached to this device and has allocated memory for
701 the screen.
703 Display can be active even when no monitor is physically attached.
704 """
705 return nvml.device_get_display_active(self._handle) == nvml.EnableState.FEATURE_ENABLED 1s
707 ##########################################################################
708 # EVENTS
709 # See external class definitions in _event.pxi
711 def register_events(self, events: EventType | str | list[EventType | str]) -> DeviceEvents:
712 """
713 Starts recording events on this device.
715 For Fermi™ or newer fully supported devices. For Linux only.
717 ECC events are available only on ECC-enabled devices (see
718 :meth:`Device.get_total_ecc_errors`). Power capping events are
719 available only on Power Management enabled devices (see
720 :meth:`Device.get_power_management_mode`).
722 This call starts recording of events on specific device. All events
723 that occurred before this call are not recorded. Wait for events using
724 the :meth:`DeviceEvents.wait` method on the result.
726 Examples
727 --------
728 >>> device = Device(index=0)
729 >>> events = device.register_events([
730 ... EventType.XID_CRITICAL_ERROR,
731 ... ])
732 >>> while event := events.wait(timeout_ms=10000):
733 ... print(f"Event {event.event_type} occurred on device {event.device.uuid}")
735 Parameters
736 ----------
737 events: EventType, str, or list of EventType or str
738 The event type or list of event types to register for this device.
740 Returns
741 -------
742 :obj:`~_device.DeviceEvents`
743 An object representing the registered events. Call
744 :meth:`~_device.DeviceEvents.wait` on this object to wait for events.
746 Raises
747 ------
748 :class:`cuda.core.system.NotSupportedError`
749 None of the requested event types are registered.
750 """
751 return DeviceEvents(self._handle, events) 1g
753 def get_supported_event_types(self) -> list[EventType]:
754 """
755 Get the list of event types supported by this device.
757 For Fermi™ or newer fully supported devices. For Linux only (returns an
758 empty list on Windows).
760 Returns
761 -------
762 list[EventType]
763 The list of supported event types.
764 """
765 cdef uint64_t[1] bitmask
766 bitmask[0] = nvml.device_get_supported_event_types(self._handle) 1g
767 events = [] 1g
768 for ev in _unpack_bitmask(bitmask): 1g
769 try: 1g
770 ev_enum = _EVENT_TYPE_MAPPING[1 << ev] 1g
771 except KeyError:
772 raise ValueError(f"Unknown event type bit: {1 << ev}")
773 events.append(ev_enum) 1g
774 return events 1g
776 ##########################################################################
777 # FAN
778 # See external class definitions in _fan.pxi
780 def get_fan(self, fan: int = 0) -> FanInfo:
781 """
782 :obj:`~_device.FanInfo` object to get information and manage a specific fan on a device.
783 """
784 if fan < 0 or fan >= self.num_fans:
785 raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})")
786 return FanInfo(self._handle, fan)
788 @property
789 def num_fans(self) -> int:
790 """
791 The number of fans on the device.
792 """
793 return nvml.device_get_num_fans(self._handle) 1wF
795 ##########################################################################
796 # FIELD VALUES
797 # See external class definitions in _field_values.pxi
799 def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
800 """
801 Get multiple field values from the device.
803 Each value specified can raise its own exception. That exception will
804 be raised when attempting to access the corresponding ``value`` from the
805 returned :obj:`~_device.FieldValues` container.
807 To confirm that there are no exceptions in the entire container, call
808 :meth:`~_device.FieldValues.validate`.
810 Parameters
811 ----------
812 field_ids: list[int | tuple[int, int]]
813 List of field IDs to query.
815 Each item may be either a single value from the :class:`FieldId`
816 enum, or a pair of (:class:`FieldId`, scope ID).
818 Returns
819 -------
820 :obj:`~_device.FieldValues`
821 Container of field values corresponding to the requested field IDs.
822 """
823 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
824 # so avoid that.
825 if len(field_ids) == 0: 1jp
826 return FieldValues(nvml.FieldValue(0)) 1j
828 return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) 1jp
830 def clear_field_values(self, field_ids: list[int | tuple[int, int]]) -> None:
831 """
832 Clear multiple field values from the device.
834 Parameters
835 ----------
836 field_ids: list[int | tuple[int, int]]
837 List of field IDs to clear.
839 Each item may be either a single value from the :class:`FieldId`
840 enum, or a pair of (:class:`FieldId`, scope ID).
841 """
842 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
843 # so avoid that.
844 if len(field_ids) == 0: 1j
845 return
847 nvml.device_clear_field_values(self._handle, field_ids) 1j
849 ##########################################################################
850 # INFOROM
851 # See external class definitions in _inforom.pxi
853 @property
854 def inforom(self) -> InforomInfo:
855 """
856 :obj:`~_device.InforomInfo` object with InfoROM information.
858 For all products with an InfoROM.
859 """
860 return InforomInfo(self) 1G
862 ##########################################################################
863 # MEMORY
864 # See external class definitions in _memory.pxi
866 @property
867 def bar1_memory_info(self) -> BAR1MemoryInfo:
868 """
869 :obj:`~_device.BAR1MemoryInfo` object with BAR1 memory information.
871 BAR1 is used to map the FB (device memory) so that it can be directly
872 accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE
873 bus).
874 """
875 return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle)) 1y
877 @property
878 def memory_info(self) -> MemoryInfo:
879 """
880 :obj:`~_device.MemoryInfo` object with memory information.
881 """
882 return MemoryInfo(nvml.device_get_memory_info_v2(self._handle)) 1B
884 ##########################################################################
885 # NVLINK
886 # See external class definitions in _nvlink.pxi
888 @versionchanged(
889 version="1.1.0",
890 reason="Any link number not supported by this specific device will raise a `ValueError`."
891 )
892 def get_nvlink(self, link: int) -> NvlinkInfo:
893 """
894 Get :obj:`~NvlinkInfo` about this device.
896 For devices with NVLink support.
897 """
898 link_count = self.get_nvlink_count()
899 if link < 0 or link >= link_count:
900 raise ValueError(f"Link index {link} is out of range [0, {link_count})")
901 return NvlinkInfo(self, link)
903 @versionadded(version="1.1.0")
904 def get_nvlink_count(self) -> int:
905 """
906 Get the number of NVLink links on this device.
908 For devices with NVLink support.
909 """
910 return self.get_field_values([FieldId.DEV_NVLINK_LINK_COUNT])[0].value 1p
912 @versionadded(version="1.1.0")
913 def get_nvlinks(self) -> Iterable[NvlinkInfo]:
914 """
915 Get :obj:`~NvlinkInfo` about all NVLink links on this device.
917 For devices with NVLink support.
918 """
919 for link in range(self.get_nvlink_count()):
920 yield self.get_nvlink(link)
922 ##########################################################################
923 # PCI INFO
924 # See external class definitions in _pci_info.pxi
926 @property
927 def pci_info(self) -> PciInfo:
928 """
929 :obj:`~_device.PciInfo` object with the PCI attributes of this device.
931 Non-physical devices, such as MIG devices, may not have PCI attributes.
932 In that case, this property will raise a `RuntimeError`.
933 """
934 try: 1fmhq
935 pci_info = nvml.device_get_pci_info_ext(self._handle) 1fmhq
936 except nvml.InvalidArgumentError:
937 raise RuntimeError("This device does not have PCI attributes") from None
938 else:
939 return PciInfo(pci_info, self._handle) 1fmhq
941 ##########################################################################
942 # PERFORMANCE
943 # See external class definitions in _performance.pxi
945 @property
946 def performance_state(self) -> int | None:
947 """
948 The current performance state of the device.
950 For Fermi™ or newer fully supported devices.
952 Returns
953 -------
954 int | None
955 The current performance state of the device, as an integer between 0 and 15,
956 where 0 is maximum performance and higher numbers are lower performance.
957 Returns `None` if the performance state is unknown.
958 """
959 return _pstate_to_int(nvml.device_get_performance_state(self._handle)) 1ec
961 @property
962 def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo:
963 """
964 :obj:`~_device.GpuDynamicPstatesInfo` object with performance monitor samples from the associated subdevice.
965 """
966 return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle)) 1c
968 @property
969 def supported_pstates(self) -> list[int]:
970 """
971 Get all supported Performance States (P-States) for the device.
973 The returned list contains a contiguous list of valid P-States supported by
974 the device.
976 Return
977 ------
978 list[int]
979 A list of supported performance state of the device, as an integer
980 between 0 and 15, where 0 is maximum performance and higher numbers
981 are lower performance.
982 """
983 # From nvml.h:
984 # The returned array would contain a contiguous list of valid P-States
985 # supported by the device. If the number of supported P-States is fewer
986 # than the size of the array supplied missing elements would contain \a
987 # NVML_PSTATE_UNKNOWN.
989 pstates = [] 1c
990 for pstate in nvml.device_get_supported_performance_states(self._handle): 1c
991 pstate_value = _pstate_to_int(pstate) 1c
992 if pstate_value is not None: 1c
993 pstates.append(pstate_value) 1c
994 return pstates 1c
996 ##########################################################################
997 # PROCESS
998 # See external class definitions in _process.pxi
1000 @property
1001 def compute_running_processes(self) -> list[ProcessInfo]:
1002 """
1003 Get information about processes with a compute context on a device
1005 For Fermi™ or newer fully supported devices.
1007 This function returns information only about compute running processes
1008 (e.g. CUDA application which have active context). Any graphics
1009 applications (e.g. using OpenGL, DirectX) won't be listed by this
1010 function.
1012 Keep in mind that information returned by this call is dynamic and the
1013 number of elements might change in time.
1015 In MIG mode, if device handle is provided, the API returns aggregate
1016 information, only if the caller has appropriate privileges. Per-instance
1017 information can be queried by using specific MIG device handles.
1018 Querying per-instance information using MIG device handles is not
1019 supported if the device is in vGPU Host virtualization mode.
1020 """
1021 return [ProcessInfo(self, proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)] 1rQ
1023 ##########################################################################
1024 # REPAIR STATUS
1025 # See external class definitions in _repair_status.pxi
1027 @property
1028 def repair_status(self) -> RepairStatus:
1029 """
1030 :obj:`~_device.RepairStatus` object with TPC/Channel repair status.
1032 For Ampere™ or newer fully supported devices.
1033 """
1034 return RepairStatus(self._handle) 1M
1036 ##########################################################################
1037 # TEMPERATURE
1038 # See external class definitions in _temperature.pxi
1040 @property
1041 def temperature(self) -> Temperature:
1042 """
1043 :obj:`~_device.Temperature` object with temperature information for the device.
1044 """
1045 return Temperature(self._handle) 1N
1047 #######################################################################
1048 # TOPOLOGY
1050 def get_topology_nearest_gpus(self, level: GpuTopologyLevel | str) -> Iterable[Device]:
1051 """
1052 Retrieve the GPUs that are nearest to this device at a specific interconnectivity level.
1054 Supported on Linux only.
1056 Parameters
1057 ----------
1058 level: :class:`GpuTopologyLevel`
1059 The topology level.
1061 Returns
1062 -------
1063 Iterable of :class:`Device`
1064 The nearest devices at the given topology level.
1065 """
1066 cdef Device device
1067 try: 1o
1068 level = _GPU_TOPOLOGY_LEVEL_MAPPING[level] 1o
1069 except KeyError:
1070 raise ValueError(
1071 f"Invalid topology level: {level}. "
1072 f"Must be one of {list(GpuTopologyLevel.__members__.values())}"
1073 ) from None
1074 for handle in nvml.device_get_topology_nearest_gpus(self._handle, level): 1o
1075 device = Device.__new__(Device)
1076 device._handle = handle
1077 yield device
1079 #######################################################################
1080 # UTILIZATION
1082 @property
1083 def utilization(self) -> Utilization:
1084 """
1085 Retrieves the current :obj:`~Utilization` rates for the device's major
1086 subsystems.
1088 For Fermi™ or newer fully supported devices.
1090 Note: During driver initialization when ECC is enabled one can see high
1091 GPU and Memory Utilization readings. This is caused by ECC Memory
1092 Scrubbing mechanism that is performed during driver initialization.
1094 Note: On MIG-enabled GPUs, querying device utilization rates is not
1095 currently supported.
1097 Returns
1098 -------
1099 Utilization
1100 An object containing the current utilization rates for the device.
1101 """
1102 return Utilization(nvml.device_get_utilization_rates(self._handle)) 1O
1105def get_topology_common_ancestor(device1: Device, device2: Device) -> GpuTopologyLevel:
1106 """
1107 Retrieve the common ancestor for two devices.
1109 For Linux only.
1111 Parameters
1112 ----------
1113 device1: :class:`Device`
1114 The first device.
1115 device2: :class:`Device`
1116 The second device.
1118 Returns
1119 -------
1120 :class:`GpuTopologyLevel`
1121 The common ancestor level of the two devices.
1122 """
1123 return _GPU_TOPOLOGY_LEVEL_INV_MAPPING[
1124 nvml.device_get_topology_common_ancestor(
1125 device1._handle,
1126 device2._handle,
1127 )
1128 ]
1131def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex | str) -> GpuP2PStatus:
1132 """
1133 Retrieve the P2P status between two devices.
1135 Parameters
1136 ----------
1137 device1: :class:`Device`
1138 The first device.
1139 device2: :class:`Device`
1140 The second device.
1141 index: :class:`GpuP2PCapsIndex` | str
1142 The P2P capability index being looked for between ``device1`` and ``device2``.
1144 Returns
1145 -------
1146 :class:`GpuP2PStatus`
1147 The P2P status between the two devices.
1148 """
1149 try:
1150 index_enum = _GPU_P2P_CAPS_INDEX_MAPPING[index]
1151 except KeyError:
1152 raise ValueError(
1153 f"Invalid P2P caps index: {index}. "
1154 f"Must be one of {list(GpuP2PCapsIndex.__members__.values())}"
1155 ) from None
1156 return _GPU_P2P_STATUS_MAPPING.get(
1157 nvml.device_get_p2p_status(
1158 device1._handle,
1159 device2._handle,
1160 index_enum,
1161 ),
1162 GpuP2PStatus.UNKNOWN
1163 )
1166__all__ = [
1167 "Device",
1168 "get_p2p_status",
1169 "get_topology_common_ancestor",
1170 "NvlinkInfo",
1171]