Coverage for cuda/core/system/_device.pyx: 75.00%
256 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-13 01:38 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from libc.stdint cimport intptr_t, uint64_t
6from libc.math cimport ceil
8from multiprocessing import cpu_count
9from typing import Iterable, TYPE_CHECKING
10import warnings
12from cuda.bindings import nvml
14from ._nvml_context cimport initialize
15from cuda.core.system.typing import (
16 AddressingMode,
17 AffinityScope,
18 DeviceArch,
19 ClockId,
20 ClocksEventReasons,
21 ClockType,
22 CoolerControl,
23 CoolerTarget,
24 DeviceArch,
25 EventType,
26 FanControlPolicy,
27 FieldId,
28 GpuP2PCapsIndex,
29 GpuP2PStatus,
30 GpuTopologyLevel,
31 InforomObject,
32 TemperatureThresholds,
33 ThermalController,
34 ThermalTarget,
35)
37if TYPE_CHECKING:
38 import cuda.core # no-cython-lint
41cdef object _pstate_to_int(object pstate):
42 if pstate == nvml.Pstates.PSTATE_UNKNOWN: 1aec
43 return None 1ac
44 assert ( 1aec
45 int(pstate) >= 0 and int(pstate) <= 15 1aec
46 ), f"Invalid P-state: {pstate}. Must be between 0 and 15 inclusive, or PSTATE_UNKNOWN."
47 return int(pstate) - int(nvml.Pstates.PSTATE_0) 1aec
50cdef int _pstate_to_enum(int pstate):
51 if pstate < 0 or pstate > 15: 1e
52 raise ValueError(f"Invalid P-state: {pstate}. Must be between 0 and 15 inclusive.")
53 return int(pstate) + int(nvml.Pstates.PSTATE_0) 1ae
56include "_clock.pxi"
57include "_cooler.pxi"
58include "_device_attributes.pxi"
59include "_device_utils.pxi"
60include "_event.pxi"
61include "_fan.pxi"
62include "_field_values.pxi"
63include "_inforom.pxi"
64include "_memory.pxi"
65include "_mig.pxi"
66include "_nvlink.pxi"
67include "_pci_info.pxi"
68include "_performance.pxi"
69include "_process.pxi"
70include "_repair_status.pxi"
71include "_temperature.pxi"
72include "_utilization.pxi"
75_ADDRESSING_MODE_MAPPING = {
76 nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_HMM: AddressingMode.HMM,
77 nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_ATS: AddressingMode.ATS,
78}
81_AFFINITY_SCOPE_MAPPING = {
82 AffinityScope.NODE: nvml.AffinityScope.NODE,
83 AffinityScope.SOCKET: nvml.AffinityScope.SOCKET,
84}
87_BRAND_TYPE_MAPPING = {
88 nvml.BrandType.BRAND_UNKNOWN: "Unknown",
89 nvml.BrandType.BRAND_QUADRO: "Quadro",
90 nvml.BrandType.BRAND_TESLA: "Tesla",
91 nvml.BrandType.BRAND_NVS: "NVS",
92 nvml.BrandType.BRAND_GRID: "GRID",
93 nvml.BrandType.BRAND_GEFORCE: "GeForce",
94 nvml.BrandType.BRAND_TITAN: "Titan",
95 nvml.BrandType.BRAND_NVIDIA_VAPPS: "NVIDIA vApps",
96 nvml.BrandType.BRAND_NVIDIA_VPC: "NVIDIA VPC",
97 nvml.BrandType.BRAND_NVIDIA_VCS: "NVIDIA VCS",
98 nvml.BrandType.BRAND_NVIDIA_VWS: "NVIDIA VWS",
99 nvml.BrandType.BRAND_NVIDIA_CLOUD_GAMING: "NVIDIA Cloud Gaming",
100 nvml.BrandType.BRAND_NVIDIA_VGAMING: "NVIDIA vGaming",
101 nvml.BrandType.BRAND_QUADRO_RTX: "Quadro RTX",
102 nvml.BrandType.BRAND_NVIDIA_RTX: "NVIDIA RTX",
103 nvml.BrandType.BRAND_NVIDIA: "NVIDIA",
104 nvml.BrandType.BRAND_GEFORCE_RTX: "GeForce RTX",
105 nvml.BrandType.BRAND_TITAN_RTX: "Titan RTX",
106}
109_GPU_P2P_CAPS_INDEX_MAPPING = {
110 GpuP2PCapsIndex.READ: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_READ,
111 GpuP2PCapsIndex.WRITE: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_WRITE,
112 GpuP2PCapsIndex.NVLINK: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_NVLINK,
113 GpuP2PCapsIndex.ATOMICS: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_ATOMICS,
114 GpuP2PCapsIndex.PCI: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PCI,
115 GpuP2PCapsIndex.PROP: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PROP,
116 GpuP2PCapsIndex.UNKNOWN: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_UNKNOWN,
117}
120_GPU_P2P_STATUS_MAPPING = {
121 nvml.GpuP2PStatus.P2P_STATUS_OK: GpuP2PStatus.OK,
122 nvml.GpuP2PStatus.P2P_STATUS_CHIPSET_NOT_SUPPORTED: GpuP2PStatus.CHIPSET_NOT_SUPPORTED,
123 nvml.GpuP2PStatus.P2P_STATUS_GPU_NOT_SUPPORTED: GpuP2PStatus.GPU_NOT_SUPPORTED,
124 nvml.GpuP2PStatus.P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED: GpuP2PStatus.IOH_TOPOLOGY_NOT_SUPPORTED,
125 nvml.GpuP2PStatus.P2P_STATUS_DISABLED_BY_REGKEY: GpuP2PStatus.DISABLED_BY_REGKEY,
126 nvml.GpuP2PStatus.P2P_STATUS_NOT_SUPPORTED: GpuP2PStatus.NOT_SUPPORTED,
127 nvml.GpuP2PStatus.P2P_STATUS_UNKNOWN: GpuP2PStatus.UNKNOWN,
128}
131_GPU_TOPOLOGY_LEVEL_MAPPING = {
132 GpuTopologyLevel.INTERNAL: nvml.GpuTopologyLevel.TOPOLOGY_INTERNAL,
133 GpuTopologyLevel.SINGLE: nvml.GpuTopologyLevel.TOPOLOGY_SINGLE,
134 GpuTopologyLevel.MULTIPLE: nvml.GpuTopologyLevel.TOPOLOGY_MULTIPLE,
135 GpuTopologyLevel.HOSTBRIDGE: nvml.GpuTopologyLevel.TOPOLOGY_HOSTBRIDGE,
136 GpuTopologyLevel.NODE: nvml.GpuTopologyLevel.TOPOLOGY_NODE,
137 GpuTopologyLevel.SYSTEM: nvml.GpuTopologyLevel.TOPOLOGY_SYSTEM,
138}
141_GPU_TOPOLOGY_LEVEL_INV_MAPPING = {v: k for k, v in _GPU_TOPOLOGY_LEVEL_MAPPING.items()}
145cdef class Device:
146 """
147 Representation of a device.
149 :class:`cuda.core.system.Device` provides access to various pieces of metadata
150 about devices and their topology, as provided by the NVIDIA Management
151 Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`.
153 Creating a device instance causes NVML to initialize the target GPU.
154 NVML may initialize additional GPUs if the target GPU is an SLI slave.
156 Parameters
157 ----------
158 index: int, optional
159 Integer representing the CUDA device index to get a handle to. Valid
160 values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.
162 The order in which devices are enumerated has no guarantees of
163 consistency between reboots. For that reason, it is recommended that
164 devices are looked up by their PCI ids or UUID.
166 uuid: bytes or str, optional
167 UUID of a CUDA device to get a handle to.
169 pci_bus_id: bytes or str, optional
170 PCI bus ID of a CUDA device to get a handle to.
172 Raises
173 ------
174 ValueError
175 If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
176 """
178 # This is made public for testing purposes only
179 cdef public intptr_t _handle
181 def __init__(
182 self,
183 *,
184 index: int | None = None,
185 uuid: bytes | str | None = None,
186 pci_bus_id: bytes | str | None = None,
187 ) -> None:
188 args = [index, uuid, pci_bus_id] 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
189 cdef int arg_count = sum(arg is not None for arg in args) 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
191 if arg_count > 1: 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
192 raise ValueError("Handle requires only one of `index`, `uuid`, or `pci_bus_id`.")
193 if arg_count == 0: 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
194 raise ValueError("Handle requires either a device `index`, `uuid`, or `pci_bus_id`.")
196 initialize() 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
198 if index is not None: 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQp
199 self._handle = nvml.device_get_handle_by_index_v2(index) 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQ
200 elif uuid is not None: 1fp
201 if isinstance(uuid, bytes): 1p
202 uuid = uuid.decode("ascii")
203 self._handle = nvml.device_get_handle_by_uuid(uuid) 1p
204 elif pci_bus_id is not None: 1f
205 if isinstance(pci_bus_id, bytes): 1f
206 pci_bus_id = pci_bus_id.decode("ascii")
207 self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) 1f
209 #########################################################################
210 # BASIC PROPERTIES
212 @property
213 def index(self) -> int:
214 """
215 The NVML index of this device.
217 Valid indices are derived from the count returned by
218 :meth:`Device.get_device_count`. For example, if ``get_device_count()``
219 returns 2, the valid indices are 0 and 1, corresponding to GPU 0 and GPU
220 1.
222 The order in which NVML enumerates devices has no guarantees of
223 consistency between reboots. For that reason, it is recommended that
224 devices be looked up by their PCI ids or GPU UUID.
226 Note: The NVML index may not correlate with other APIs, such as the CUDA
227 device index.
228 """
229 return nvml.device_get_index(self._handle) 1fI
231 @property
232 def uuid(self) -> str:
233 """
234 Retrieves the globally unique immutable UUID associated with this
235 device, as a 5 part hexadecimal string, that augments the immutable,
236 board serial identifier.
238 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
239 prefix. If you need a `uuid` without that prefix (for example, to
240 interact with CUDA), use the `uuid_without_prefix` property.
241 """
242 return nvml.device_get_uuid(self._handle) 1P
244 @property
245 def uuid_without_prefix(self) -> str:
246 """
247 Retrieves the globally unique immutable UUID associated with this
248 device, as a 5 part hexadecimal string, that augments the immutable,
249 board serial identifier.
251 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
252 prefix. This property returns it without the prefix, to match the UUIDs
253 used in CUDA. If you need the prefix, use the `uuid` property.
254 """
255 # NVML UUIDs have a `gpu-` or `mig-` prefix. We remove that here.
256 return nvml.device_get_uuid(self._handle)[4:] 1Ehp
258 @property
259 def pci_bus_id(self) -> str:
260 """
261 Retrieves the PCI bus ID of this device.
262 """
263 return self.pci_info.bus_id
265 @property
266 def numa_node_id(self) -> int:
267 """
268 The NUMA node of the given GPU device.
270 This only applies to platforms where the GPUs are NUMA nodes.
271 """
272 return nvml.device_get_numa_node_id(self._handle) 1L
274 @property
275 def arch(self) -> DeviceArch:
276 """
277 :obj:`~DeviceArch` device architecture.
279 For example, a Tesla V100 will report ``DeviceArchitecture.name ==
280 "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
281 "AMPERE"``.
282 """
283 arch = nvml.device_get_architecture(self._handle) 1ln
284 try: 1ln
285 return DeviceArch(arch) 1ln
286 except ValueError:
287 return DeviceArch.UNKNOWN
289 @property
290 def name(self) -> str:
291 """
292 Name of the device, e.g.: `"Tesla V100-SXM2-32GB"`
293 """
294 return nvml.device_get_name(self._handle) 1C
296 @property
297 def brand(self) -> str:
298 """
299 The brand of the device.
301 Returns "Unknown" if the brand is unknown.
302 """
303 return _BRAND_TYPE_MAPPING.get(nvml.device_get_brand(self._handle), "Unknown") 1z
305 @property
306 def serial(self) -> str:
307 """
308 Retrieves the globally unique board serial number associated with this
309 device's board.
311 For all products with an InfoROM.
312 """
313 return nvml.device_get_serial(self._handle) 1D
315 @property
316 def module_id(self) -> int:
317 """
318 Get a unique identifier for the device module on the baseboard.
320 This API retrieves a unique identifier for each GPU module that exists
321 on a given baseboard. For non-baseboard products, this ID would always
322 be 0.
323 """
324 return nvml.device_get_module_id(self._handle) 1K
326 @property
327 def minor_number(self) -> int:
328 """
329 The minor number of this device.
331 For Linux only.
333 The minor number is used by the Linux device driver to identify the
334 device node in ``/dev/nvidiaX``.
335 """
336 return nvml.device_get_minor_number(self._handle) 1H
338 @property
339 def is_c2c_enabled(self) -> bool:
340 """
341 Whether the C2C (Chip-to-Chip) mode is enabled for this device.
342 """
343 return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) 1v
345 @property
346 def is_persistence_mode_enabled(self) -> bool:
347 """
348 Whether persistence mode is enabled for this device.
350 For Linux only.
351 """
352 return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1k
354 @is_persistence_mode_enabled.setter
355 def is_persistence_mode_enabled(self, enabled: bool) -> None:
356 nvml.device_set_persistence_mode( 1k
357 self._handle, 1k
358 nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED 1k
359 )
361 @property
362 def cuda_compute_capability(self) -> tuple[int, int]:
363 """
364 CUDA compute capability of the device, e.g.: `(7, 0)` for a Tesla V100.
366 Returns a tuple `(major, minor)`.
367 """
368 return nvml.device_get_cuda_compute_capability(self._handle) 1A
370 def to_cuda_device(self) -> "cuda.core.Device":
371 """
372 Get the corresponding :class:`cuda.core.Device` (which is used for CUDA
373 access) for this :class:`cuda.core.system.Device` (which is used for
374 NVIDIA machine library (NVML) access).
376 The devices are mapped to one another by their UUID.
378 Returns
379 -------
380 cuda.core.Device
381 The corresponding CUDA device.
383 Raises
384 ------
385 RuntimeError
386 No corresponding CUDA device is found for this NVML device.
388 For example, on a MIG system, the physical GPU will not have an
389 available CUDA device, since it can not be used directly, even
390 though it can be enumerated from NVML.
391 """
392 from cuda.core import Device as CudaDevice 1h
394 # CUDA does not have an API to get a device by its UUID, so we just
395 # search all the devices for one with a matching UUID.
397 for cuda_device in CudaDevice.get_all_devices(): 1h
398 if cuda_device.uuid == self.uuid_without_prefix: 1h
399 return cuda_device 1h
401 raise RuntimeError("No corresponding CUDA device found for this NVML device.")
403 @classmethod
404 def get_device_count(cls) -> int:
405 """
406 Get the number of available devices.
408 Returns
409 -------
410 int
411 The number of available devices.
412 """
413 initialize() 1STU
415 return nvml.device_get_count_v2() 1STU
417 @classmethod
418 def get_all_devices(cls) -> Iterable[Device]:
419 """
420 Query the available device instances.
422 Returns
423 -------
424 Iterator over :obj:`~Device`
425 An iterator over available devices.
426 """
427 initialize() 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQ
429 for device_id in range(nvml.device_get_count_v2()): 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQ
430 yield cls(index=device_id) 1tbuvdeqwlxyziABCfmDEnrFjGHoIJKLskcgMNhOPQ
432 #########################################################################
433 # ADDRESSING MODE
435 @property
436 def addressing_mode(self) -> AddressingMode | None:
437 """
438 Get the :obj:`~AddressingMode` of the device.
439 """
440 return _ADDRESSING_MODE_MAPPING.get(nvml.device_get_addressing_mode(self._handle).value, None) 1t
442 #########################################################################
443 # MIG (MULTI-INSTANCE GPU) DEVICES
445 @property
446 def mig(self) -> MigInfo:
447 """
448 Get :obj:`~MigInfo` accessor for MIG (Multi-Instance GPU) information.
450 For Ampere™ or newer fully supported devices.
451 """
452 return MigInfo(self) 1qJ
454 #########################################################################
455 # AFFINITY
457 @classmethod
458 def get_all_devices_with_cpu_affinity(cls, cpu_index: int) -> Iterable[Device]:
459 """
460 Retrieve the set of GPUs that have a CPU affinity with the given CPU number.
462 Supported on Linux only.
464 Parameters
465 ----------
466 cpu_index: int
467 The CPU index.
469 Returns
470 -------
471 Iterator of :obj:`~Device`
472 An iterator over available devices.
473 """
474 cdef Device device
475 for handle in nvml.system_get_topology_gpu_set(cpu_index): 1R
476 device = Device.__new__(Device) 1R
477 device._handle = handle 1R
478 yield device 1R
480 def get_memory_affinity(self, scope: AffinityScope | str=AffinityScope.NODE) -> list[int]:
481 """
482 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
483 memory affinity for the device.
485 For Kepler™ or newer fully supported devices.
487 Supported on Linux only.
489 If requested scope is not applicable to the target topology, the API
490 will fall back to reporting the memory affinity for the immediate non-I/O
491 ancestor of the device.
493 Parameters
494 ----------
495 scope: AffinityScope | str, optional
496 The scope of the affinity query. Must be one of the values of
497 :class:`AffinityScope`. Default is :attr:`AffinityScope.NODE`.
499 Returns
500 -------
501 list[int]
502 A list of indices of NUMA nodes or CPU sockets with the ideal memory
503 affinity for the device.
504 """
505 try: 1b
506 scope = _AFFINITY_SCOPE_MAPPING[scope] 1b
507 except KeyError:
508 raise ValueError(
509 f"Invalid affinity scope: {scope}. "
510 f"Must be one of {list(AffinityScope.__members__.values())}"
511 ) from None
512 return _unpack_bitmask( 1b
513 nvml.device_get_memory_affinity( 1b
514 self._handle, 1b
515 <unsigned int>ceil(cpu_count() / 64), 1b
516 scope, 1b
517 )
518 )
520 def get_cpu_affinity(self, scope: AffinityScope | str=AffinityScope.NODE) -> list[int]:
521 """
522 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
523 CPU affinity for the device.
525 For Kepler™ or newer fully supported devices.
527 Supported on Linux only.
529 If requested scope is not applicable to the target topology, the API
530 will fall back to reporting the memory affinity for the immediate non-I/O
531 ancestor of the device.
533 Parameters
534 ----------
535 scope: AffinityScope | str, optional
536 The scope of the affinity query. Must be one of the values of
537 :class:`AffinityScope`. Default is :attr:`AffinityScope.NODE`.
539 Returns
540 -------
541 list[int]
542 A list of indices of NUMA nodes or CPU sockets with the ideal memory
543 affinity for the device.
544 """
545 try: 1biR
546 scope = _AFFINITY_SCOPE_MAPPING[scope] 1biR
547 except KeyError:
548 raise ValueError(
549 f"Invalid affinity scope: {scope}. "
550 f"Must be one of {list(AffinityScope.__members__.values())}"
551 ) from None
552 return _unpack_bitmask( 1biR
553 nvml.device_get_cpu_affinity_within_scope( 1biR
554 self._handle, 1biR
555 <unsigned int>ceil(cpu_count() / 64), 1biR
556 scope, 1biR
557 )
558 )
560 def set_cpu_affinity(self) -> None:
561 """
562 Sets the ideal affinity for the calling thread and device.
564 For Kepler™ or newer fully supported devices.
566 Supported on Linux only.
567 """
568 nvml.device_set_cpu_affinity(self._handle)
570 def clear_cpu_affinity(self) -> None:
571 """
572 Clear all affinity bindings for the calling thread.
574 For Kepler™ or newer fully supported devices.
576 Supported on Linux only.
577 """
578 nvml.device_clear_cpu_affinity(self._handle)
580 #########################################################################
581 # CLOCK
582 # See external class definitions in _clock.pxi
584 def get_clock(self, clock_type: ClockType | str) -> ClockInfo:
585 """
586 :obj:`~_device.ClockInfo` object to get information about and manage a specific clock on a device.
587 """
588 return ClockInfo(self._handle, clock_type) 1e
590 @property
591 def is_auto_boosted_clocks_enabled(self) -> tuple[bool, bool]:
592 """
593 Retrieve the current state of auto boosted clocks on a device.
595 For Kepler™ or newer fully supported devices.
597 Auto Boosted clocks are enabled by default on some hardware, allowing
598 the GPU to run at higher clock rates to maximize performance as thermal
599 limits allow.
601 On Pascal™ and newer hardware, Auto Boosted clocks are controlled
602 through application clocks. Use :meth:`set_application_clocks` and
603 :meth:`reset_application_clocks` to control Auto Boost behavior.
605 Returns
606 -------
607 bool
608 The current state of Auto Boosted clocks
609 bool
610 The default Auto Boosted clocks behavior
612 """
613 current, default = nvml.device_get_auto_boosted_clocks_enabled(self._handle) 1u
614 return current == nvml.EnableState.FEATURE_ENABLED, default == nvml.EnableState.FEATURE_ENABLED
616 @property
617 def current_clock_event_reasons(self) -> list[ClocksEventReasons]:
618 """
619 Retrieves the current :obj:`~ClocksEventReasons`.
621 For all fully supported products.
622 """
623 cdef uint64_t[1] reasons
624 reasons[0] = nvml.device_get_current_clocks_event_reasons(self._handle) 1d
625 output_reasons = [] 1d
626 for reason in _unpack_bitmask(reasons): 1d
627 try:
628 output_reason = _CLOCKS_EVENT_REASONS_MAPPING[1 << reason]
629 except KeyError:
630 raise ValueError(f"Unknown clock event reason bit: {1 << reason}")
631 output_reasons.append(output_reason)
632 return output_reasons 1d
634 @property
635 def supported_clock_event_reasons(self) -> list[ClocksEventReasons]:
636 """
637 Retrieves supported :obj:`~ClocksEventReasons` that can be returned by
638 :meth:`get_current_clock_event_reasons`.
640 For all fully supported products.
642 This method is not supported in virtual machines running virtual GPU (vGPU).
643 """
644 cdef uint64_t[1] reasons
645 reasons[0] = nvml.device_get_supported_clocks_event_reasons(self._handle) 1d
646 output_reasons = [] 1d
647 for reason in _unpack_bitmask(reasons): 1d
648 try: 1d
649 output_reason = _CLOCKS_EVENT_REASONS_MAPPING[1 << reason] 1d
650 except KeyError:
651 raise ValueError(f"Unknown clock event reason bit: {1 << reason}")
652 output_reasons.append(output_reason) 1d
653 return output_reasons 1d
655 ##########################################################################
656 # COOLER
657 # See external class definitions in _cooler.pxi
659 @property
660 def cooler(self) -> CoolerInfo:
661 """
662 :obj:`~_device.CoolerInfo` object with cooler information for the device.
663 """
664 return CoolerInfo(nvml.device_get_cooler_info(self._handle))
666 ##########################################################################
667 # DEVICE ATTRIBUTES
668 # See external class definitions in _device_attributes.pxi
670 @property
671 def attributes(self) -> DeviceAttributes:
672 """
673 :obj:`~_device.DeviceAttributes` object with various device attributes.
675 For Ampere™ or newer fully supported devices. Only available on Linux
676 systems.
677 """
678 return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) 1x
680 #########################################################################
681 # DISPLAY
683 @property
684 def is_display_connected(self) -> bool:
685 """
686 The display mode for this device.
688 Indicates whether a physical display (e.g. monitor) is currently connected to
689 any of the device's connectors.
690 """
691 return nvml.device_get_display_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1r
693 @property
694 def is_display_active(self) -> bool:
695 """
696 The display active status for this device.
698 Indicates whether a display is initialized on the device. For example,
699 whether X Server is attached to this device and has allocated memory for
700 the screen.
702 Display can be active even when no monitor is physically attached.
703 """
704 return nvml.device_get_display_active(self._handle) == nvml.EnableState.FEATURE_ENABLED 1r
706 ##########################################################################
707 # EVENTS
708 # See external class definitions in _event.pxi
710 def register_events(self, events: EventType | str | list[EventType | str]) -> DeviceEvents:
711 """
712 Starts recording events on this device.
714 For Fermi™ or newer fully supported devices. For Linux only.
716 ECC events are available only on ECC-enabled devices (see
717 :meth:`Device.get_total_ecc_errors`). Power capping events are
718 available only on Power Management enabled devices (see
719 :meth:`Device.get_power_management_mode`).
721 This call starts recording of events on specific device. All events
722 that occurred before this call are not recorded. Wait for events using
723 the :meth:`DeviceEvents.wait` method on the result.
725 Examples
726 --------
727 >>> device = Device(index=0)
728 >>> events = device.register_events([
729 ... EventType.XID_CRITICAL_ERROR,
730 ... ])
731 >>> while event := events.wait(timeout_ms=10000):
732 ... print(f"Event {event.event_type} occurred on device {event.device.uuid}")
734 Parameters
735 ----------
736 events: EventType, str, or list of EventType or str
737 The event type or list of event types to register for this device.
739 Returns
740 -------
741 :obj:`~_device.DeviceEvents`
742 An object representing the registered events. Call
743 :meth:`~_device.DeviceEvents.wait` on this object to wait for events.
745 Raises
746 ------
747 :class:`cuda.core.system.NotSupportedError`
748 None of the requested event types are registered.
749 """
750 return DeviceEvents(self._handle, events) 1g
752 def get_supported_event_types(self) -> list[EventType]:
753 """
754 Get the list of event types supported by this device.
756 For Fermi™ or newer fully supported devices. For Linux only (returns an
757 empty list on Windows).
759 Returns
760 -------
761 list[EventType]
762 The list of supported event types.
763 """
764 cdef uint64_t[1] bitmask
765 bitmask[0] = nvml.device_get_supported_event_types(self._handle) 1g
766 events = [] 1g
767 for ev in _unpack_bitmask(bitmask): 1g
768 try: 1g
769 ev_enum = _EVENT_TYPE_MAPPING[1 << ev] 1g
770 except KeyError:
771 raise ValueError(f"Unknown event type bit: {1 << ev}")
772 events.append(ev_enum) 1g
773 return events 1g
775 ##########################################################################
776 # FAN
777 # See external class definitions in _fan.pxi
779 def get_fan(self, fan: int = 0) -> FanInfo:
780 """
781 :obj:`~_device.FanInfo` object to get information and manage a specific fan on a device.
782 """
783 if fan < 0 or fan >= self.num_fans:
784 raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})")
785 return FanInfo(self._handle, fan)
787 @property
788 def num_fans(self) -> int:
789 """
790 The number of fans on the device.
791 """
792 return nvml.device_get_num_fans(self._handle) 1wF
794 ##########################################################################
795 # FIELD VALUES
796 # See external class definitions in _field_values.pxi
798 def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
799 """
800 Get multiple field values from the device.
802 Each value specified can raise its own exception. That exception will
803 be raised when attempting to access the corresponding ``value`` from the
804 returned :obj:`~_device.FieldValues` container.
806 To confirm that there are no exceptions in the entire container, call
807 :meth:`~_device.FieldValues.validate`.
809 Parameters
810 ----------
811 field_ids: list[int | tuple[int, int]]
812 List of field IDs to query.
814 Each item may be either a single value from the :class:`FieldId`
815 enum, or a pair of (:class:`FieldId`, scope ID).
817 Returns
818 -------
819 :obj:`~_device.FieldValues`
820 Container of field values corresponding to the requested field IDs.
821 """
822 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
823 # so avoid that.
824 if len(field_ids) == 0: 1j
825 return FieldValues(nvml.FieldValue(0)) 1j
827 return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) 1j
829 def clear_field_values(self, field_ids: list[int | tuple[int, int]]) -> None:
830 """
831 Clear multiple field values from the device.
833 Parameters
834 ----------
835 field_ids: list[int | tuple[int, int]]
836 List of field IDs to clear.
838 Each item may be either a single value from the :class:`FieldId`
839 enum, or a pair of (:class:`FieldId`, scope ID).
840 """
841 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
842 # so avoid that.
843 if len(field_ids) == 0: 1j
844 return
846 nvml.device_clear_field_values(self._handle, field_ids) 1j
848 ##########################################################################
849 # INFOROM
850 # See external class definitions in _inforom.pxi
852 @property
853 def inforom(self) -> InforomInfo:
854 """
855 :obj:`~_device.InforomInfo` object with InfoROM information.
857 For all products with an InfoROM.
858 """
859 return InforomInfo(self) 1G
861 ##########################################################################
862 # MEMORY
863 # See external class definitions in _memory.pxi
865 @property
866 def bar1_memory_info(self) -> BAR1MemoryInfo:
867 """
868 :obj:`~_device.BAR1MemoryInfo` object with BAR1 memory information.
870 BAR1 is used to map the FB (device memory) so that it can be directly
871 accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE
872 bus).
873 """
874 return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle)) 1y
876 @property
877 def memory_info(self) -> MemoryInfo:
878 """
879 :obj:`~_device.MemoryInfo` object with memory information.
880 """
881 return MemoryInfo(nvml.device_get_memory_info_v2(self._handle)) 1B
883 ##########################################################################
884 # NVLINK
885 # See external class definitions in _nvlink.pxi
887 def get_nvlink(self, link: int) -> NvlinkInfo:
888 """
889 Get :obj:`~NvlinkInfo` about this device.
891 For devices with NVLink support.
892 """
893 if link < 0 or link >= NvlinkInfo.max_links: 1s
894 raise ValueError(f"Link index {link} is out of range [0, {NvlinkInfo.max_links})")
895 return NvlinkInfo(self, link) 1s
897 ##########################################################################
898 # PCI INFO
899 # See external class definitions in _pci_info.pxi
901 @property
902 def pci_info(self) -> PciInfo:
903 """
904 :obj:`~_device.PciInfo` object with the PCI attributes of this device.
906 Non-physical devices, such as MIG devices, may not have PCI attributes.
907 In that case, this property will raise a `RuntimeError`.
908 """
909 try: 1fmhp
910 pci_info = nvml.device_get_pci_info_ext(self._handle) 1fmhp
911 except nvml.InvalidArgumentError:
912 raise RuntimeError("This device does not have PCI attributes") from None
913 else:
914 return PciInfo(pci_info, self._handle) 1fmhp
916 ##########################################################################
917 # PERFORMANCE
918 # See external class definitions in _performance.pxi
920 @property
921 def performance_state(self) -> int | None:
922 """
923 The current performance state of the device.
925 For Fermi™ or newer fully supported devices.
927 Returns
928 -------
929 int | None
930 The current performance state of the device, as an integer between 0 and 15,
931 where 0 is maximum performance and higher numbers are lower performance.
932 Returns `None` if the performance state is unknown.
933 """
934 return _pstate_to_int(nvml.device_get_performance_state(self._handle)) 1ec
936 @property
937 def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo:
938 """
939 :obj:`~_device.GpuDynamicPstatesInfo` object with performance monitor samples from the associated subdevice.
940 """
941 return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle)) 1c
943 @property
944 def supported_pstates(self) -> list[int]:
945 """
946 Get all supported Performance States (P-States) for the device.
948 The returned list contains a contiguous list of valid P-States supported by
949 the device.
951 Return
952 ------
953 list[int]
954 A list of supported performance state of the device, as an integer
955 between 0 and 15, where 0 is maximum performance and higher numbers
956 are lower performance.
957 """
958 # From nvml.h:
959 # The returned array would contain a contiguous list of valid P-States
960 # supported by the device. If the number of supported P-States is fewer
961 # than the size of the array supplied missing elements would contain \a
962 # NVML_PSTATE_UNKNOWN.
964 pstates = [] 1c
965 for pstate in nvml.device_get_supported_performance_states(self._handle): 1c
966 pstate_value = _pstate_to_int(pstate) 1c
967 if pstate_value is not None: 1c
968 pstates.append(pstate_value) 1c
969 return pstates 1c
971 ##########################################################################
972 # PROCESS
973 # See external class definitions in _process.pxi
975 @property
976 def compute_running_processes(self) -> list[ProcessInfo]:
977 """
978 Get information about processes with a compute context on a device
980 For Fermi™ or newer fully supported devices.
982 This function returns information only about compute running processes
983 (e.g. CUDA application which have active context). Any graphics
984 applications (e.g. using OpenGL, DirectX) won't be listed by this
985 function.
987 Keep in mind that information returned by this call is dynamic and the
988 number of elements might change in time.
990 In MIG mode, if device handle is provided, the API returns aggregate
991 information, only if the caller has appropriate privileges. Per-instance
992 information can be queried by using specific MIG device handles.
993 Querying per-instance information using MIG device handles is not
994 supported if the device is in vGPU Host virtualization mode.
995 """
996 return [ProcessInfo(self, proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)] 1qQ
998 ##########################################################################
999 # REPAIR STATUS
1000 # See external class definitions in _repair_status.pxi
1002 @property
1003 def repair_status(self) -> RepairStatus:
1004 """
1005 :obj:`~_device.RepairStatus` object with TPC/Channel repair status.
1007 For Ampere™ or newer fully supported devices.
1008 """
1009 return RepairStatus(self._handle) 1M
1011 ##########################################################################
1012 # TEMPERATURE
1013 # See external class definitions in _temperature.pxi
1015 @property
1016 def temperature(self) -> Temperature:
1017 """
1018 :obj:`~_device.Temperature` object with temperature information for the device.
1019 """
1020 return Temperature(self._handle) 1N
1022 #######################################################################
1023 # TOPOLOGY
1025 def get_topology_nearest_gpus(self, level: GpuTopologyLevel | str) -> Iterable[Device]:
1026 """
1027 Retrieve the GPUs that are nearest to this device at a specific interconnectivity level.
1029 Supported on Linux only.
1031 Parameters
1032 ----------
1033 level: :class:`GpuTopologyLevel`
1034 The topology level.
1036 Returns
1037 -------
1038 Iterable of :class:`Device`
1039 The nearest devices at the given topology level.
1040 """
1041 cdef Device device
1042 try: 1o
1043 level = _GPU_TOPOLOGY_LEVEL_MAPPING[level] 1o
1044 except KeyError:
1045 raise ValueError(
1046 f"Invalid topology level: {level}. "
1047 f"Must be one of {list(GpuTopologyLevel.__members__.values())}"
1048 ) from None
1049 for handle in nvml.device_get_topology_nearest_gpus(self._handle, level): 1o
1050 device = Device.__new__(Device)
1051 device._handle = handle
1052 yield device
1054 #######################################################################
1055 # UTILIZATION
1057 @property
1058 def utilization(self) -> Utilization:
1059 """
1060 Retrieves the current :obj:`~Utilization` rates for the device's major
1061 subsystems.
1063 For Fermi™ or newer fully supported devices.
1065 Note: During driver initialization when ECC is enabled one can see high
1066 GPU and Memory Utilization readings. This is caused by ECC Memory
1067 Scrubbing mechanism that is performed during driver initialization.
1069 Note: On MIG-enabled GPUs, querying device utilization rates is not
1070 currently supported.
1072 Returns
1073 -------
1074 Utilization
1075 An object containing the current utilization rates for the device.
1076 """
1077 return Utilization(nvml.device_get_utilization_rates(self._handle)) 1O
1080def get_topology_common_ancestor(device1: Device, device2: Device) -> GpuTopologyLevel:
1081 """
1082 Retrieve the common ancestor for two devices.
1084 For Linux only.
1086 Parameters
1087 ----------
1088 device1: :class:`Device`
1089 The first device.
1090 device2: :class:`Device`
1091 The second device.
1093 Returns
1094 -------
1095 :class:`GpuTopologyLevel`
1096 The common ancestor level of the two devices.
1097 """
1098 return _GPU_TOPOLOGY_LEVEL_INV_MAPPING[
1099 nvml.device_get_topology_common_ancestor(
1100 device1._handle,
1101 device2._handle,
1102 )
1103 ]
1106def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex | str) -> GpuP2PStatus:
1107 """
1108 Retrieve the P2P status between two devices.
1110 Parameters
1111 ----------
1112 device1: :class:`Device`
1113 The first device.
1114 device2: :class:`Device`
1115 The second device.
1116 index: :class:`GpuP2PCapsIndex` | str
1117 The P2P capability index being looked for between ``device1`` and ``device2``.
1119 Returns
1120 -------
1121 :class:`GpuP2PStatus`
1122 The P2P status between the two devices.
1123 """
1124 try:
1125 index_enum = _GPU_P2P_CAPS_INDEX_MAPPING[index]
1126 except KeyError:
1127 raise ValueError(
1128 f"Invalid P2P caps index: {index}. "
1129 f"Must be one of {list(GpuP2PCapsIndex.__members__.values())}"
1130 ) from None
1131 return _GPU_P2P_STATUS_MAPPING.get(
1132 nvml.device_get_p2p_status(
1133 device1._handle,
1134 device2._handle,
1135 index_enum,
1136 ),
1137 GpuP2PStatus.UNKNOWN
1138 )
1141__all__ = [
1142 "Device",
1143 "get_p2p_status",
1144 "get_topology_common_ancestor",
1145 "NvlinkInfo",
1146]