Coverage for cuda / core / system / _device.pyx: 81.08%
148 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-29 01:27 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-29 01:27 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5from libc.stdint cimport intptr_t, uint64_t
6from libc.math cimport ceil
8from multiprocessing import cpu_count
9from typing import Iterable
11from cuda.bindings import nvml
13from ._nvml_context cimport initialize
16AddressingMode = nvml.DeviceAddressingModeType
17AffinityScope = nvml.AffinityScope
18BrandType = nvml.BrandType
19DeviceArch = nvml.DeviceArch
20GpuP2PCapsIndex = nvml.GpuP2PCapsIndex
21GpuP2PStatus = nvml.GpuP2PStatus
22GpuTopologyLevel = nvml.GpuTopologyLevel
23Pstates = nvml.Pstates
26include "_clock.pxi"
27include "_cooler.pxi"
28include "_device_attributes.pxi"
29include "_device_utils.pxi"
30include "_event.pxi"
31include "_fan.pxi"
32include "_field_values.pxi"
33include "_inforom.pxi"
34include "_memory.pxi"
35include "_mig.pxi"
36include "_nvlink.pxi"
37include "_pci_info.pxi"
38include "_performance.pxi"
39include "_process.pxi"
40include "_repair_status.pxi"
41include "_temperature.pxi"
42include "_utilization.pxi"
45cdef class Device:
46 """
47 Representation of a device.
49 :class:`cuda.core.system.Device` provides access to various pieces of metadata
50 about devices and their topology, as provided by the NVIDIA Management
51 Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`.
53 Creating a device instance causes NVML to initialize the target GPU.
54 NVML may initialize additional GPUs if the target GPU is an SLI slave.
56 Parameters
57 ----------
58 index: int, optional
59 Integer representing the CUDA device index to get a handle to. Valid
60 values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.
62 The order in which devices are enumerated has no guarantees of
63 consistency between reboots. For that reason, it is recommended that
64 devices are looked up by their PCI ids or UUID.
66 uuid: bytes or str, optional
67 UUID of a CUDA device to get a handle to.
69 pci_bus_id: bytes or str, optional
70 PCI bus ID of a CUDA device to get a handle to.
72 Raises
73 ------
74 ValueError
75 If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
76 """
78 # This is made public for testing purposes only
79 cdef public intptr_t _handle
81 def __init__(
82 self,
83 *,
84 index: int | None = None,
85 uuid: bytes | str | None = None,
86 pci_bus_id: bytes | str | None = None,
87 ):
88 args = [index, uuid, pci_bus_id] 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
89 cdef int arg_count = sum(arg is not None for arg in args) 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
91 if arg_count > 1: 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
92 raise ValueError("Handle requires only one of `index`, `uuid`, or `pci_bus_id`.")
93 if arg_count == 0: 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
94 raise ValueError("Handle requires either a device `index`, `uuid`, or `pci_bus_id`.")
96 initialize() 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
98 if index is not None: 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNOP
99 self._handle = nvml.device_get_handle_by_index_v2(index) 1aqbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO
100 elif uuid is not None: 1acP
101 if isinstance(uuid, bytes): 1P
102 uuid = uuid.decode("ascii")
103 self._handle = nvml.device_get_handle_by_uuid(uuid) 1P
104 elif pci_bus_id is not None: 1c
105 if isinstance(pci_bus_id, bytes): 1ac
106 pci_bus_id = pci_bus_id.decode("ascii")
107 self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) 1c
109 #########################################################################
110 # BASIC PROPERTIES
112 @property
113 def index(self) -> int:
114 """
115 The NVML index of this device.
117 Valid indices are derived from the count returned by
118 :meth:`Device.get_device_count`. For example, if ``get_device_count()``
119 returns 2, the valid indices are 0 and 1, corresponding to GPU 0 and GPU
120 1.
122 The order in which NVML enumerates devices has no guarantees of
123 consistency between reboots. For that reason, it is recommended that
124 devices be looked up by their PCI ids or GPU UUID.
126 Note: The NVML index may not correlate with other APIs, such as the CUDA
127 device index.
128 """
129 return nvml.device_get_index(self._handle) 1cH
131 @property
132 def uuid(self) -> str:
133 """
134 Retrieves the globally unique immutable UUID associated with this
135 device, as a 5 part hexadecimal string, that augments the immutable,
136 board serial identifier.
138 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
139 prefix. If you need a `uuid` without that prefix (for example, to
140 interact with CUDA), use the `uuid_without_prefix` property.
141 """
142 return nvml.device_get_uuid(self._handle) 1O
144 @property
145 def uuid_without_prefix(self) -> str:
146 """
147 Retrieves the globally unique immutable UUID associated with this
148 device, as a 5 part hexadecimal string, that augments the immutable,
149 board serial identifier.
151 In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
152 prefix. This property returns it without the prefix, to match the UUIDs
153 used in CUDA. If you need the prefix, use the `uuid` property.
154 """
155 # NVML UUIDs have a `gpu-` or `mig-` prefix. We remove that here.
156 return nvml.device_get_uuid(self._handle)[4:] 1CdP
158 @property
159 def pci_bus_id(self) -> str:
160 """
161 Retrieves the PCI bus ID of this device.
162 """
163 return self.pci_info.bus_id
165 @property
166 def numa_node_id(self) -> int:
167 """
168 The NUMA node of the given GPU device.
170 This only applies to platforms where the GPUs are NUMA nodes.
171 """
172 return nvml.device_get_numa_node_id(self._handle) 1K
174 @property
175 def arch(self) -> DeviceArch:
176 """
177 :obj:`~DeviceArch` device architecture.
179 For example, a Tesla V100 will report ``DeviceArchitecture.name ==
180 "VOLTA"``, and RTX A6000 will report ``DeviceArchitecture.name ==
181 "AMPERE"``.
182 """
183 arch = nvml.device_get_architecture(self._handle) 1ij
184 try: 1ij
185 return DeviceArch(arch) 1ij
186 except ValueError:
187 return nvml.DeviceArch.UNKNOWN
189 @property
190 def name(self) -> str:
191 """
192 Name of the device, e.g.: `"Tesla V100-SXM2-32GB"`
193 """
194 return nvml.device_get_name(self._handle) 1z
196 @property
197 def brand(self) -> BrandType:
198 """
199 :obj:`~BrandType` brand of the device
200 """
201 return BrandType(nvml.device_get_brand(self._handle)) 1w
203 @property
204 def serial(self) -> str:
205 """
206 Retrieves the globally unique board serial number associated with this
207 device's board.
209 For all products with an InfoROM.
210 """
211 return nvml.device_get_serial(self._handle) 1B
213 @property
214 def module_id(self) -> int:
215 """
216 Get a unique identifier for the device module on the baseboard.
218 This API retrieves a unique identifier for each GPU module that exists
219 on a given baseboard. For non-baseboard products, this ID would always
220 be 0.
221 """
222 return nvml.device_get_module_id(self._handle) 1J
224 @property
225 def minor_number(self) -> int:
226 """
227 The minor number of this device.
229 For Linux only.
231 The minor number is used by the Linux device driver to identify the
232 device node in ``/dev/nvidiaX``.
233 """
234 return nvml.device_get_minor_number(self._handle) 1F
236 @property
237 def is_c2c_enabled(self) -> bool:
238 """
239 Whether the C2C (Chip-to-Chip) mode is enabled for this device.
240 """
241 return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) 1s
243 @property
244 def is_persistence_mode_enabled(self) -> bool:
245 """
246 Whether persistence mode is enabled for this device.
248 For Linux only.
249 """
250 return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1h
252 @is_persistence_mode_enabled.setter
253 def is_persistence_mode_enabled(self, enabled: bool) -> None:
254 nvml.device_set_persistence_mode( 1h
255 self._handle, 1h
256 nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED 1h
257 )
259 @property
260 def cuda_compute_capability(self) -> tuple[int, int]:
261 """
262 CUDA compute capability of the device, e.g.: `(7, 0)` for a Tesla V100.
264 Returns a tuple `(major, minor)`.
265 """
266 return nvml.device_get_cuda_compute_capability(self._handle) 1x
268 def to_cuda_device(self) -> "cuda.core.Device":
269 """
270 Get the corresponding :class:`cuda.core.Device` (which is used for CUDA
271 access) for this :class:`cuda.core.system.Device` (which is used for
272 NVIDIA machine library (NVML) access).
274 The devices are mapped to one another by their UUID.
276 Returns
277 -------
278 cuda.core.Device
279 The corresponding CUDA device.
280 """
281 from cuda.core import Device as CudaDevice 1d
283 # CUDA does not have an API to get a device by its UUID, so we just
284 # search all the devices for one with a matching UUID.
286 for cuda_device in CudaDevice.get_all_devices(): 1d
287 if cuda_device.uuid == self.uuid_without_prefix: 1d
288 return cuda_device 1d
290 raise RuntimeError("No corresponding CUDA device found for this NVML device.")
292 @classmethod
293 def get_device_count(cls) -> int:
294 """
295 Get the number of available devices.
297 Returns
298 -------
299 int
300 The number of available devices.
301 """
302 initialize() 1RST
304 return nvml.device_get_count_v2() 1RST
306 @classmethod
307 def get_all_devices(cls) -> Iterable[Device]:
308 """
309 Query the available device instances.
311 Returns
312 -------
313 Iterator over :obj:`~Device`
314 An iterator over available devices.
315 """
316 initialize() 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO
318 for device_id in range(nvml.device_get_count_v2()): 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO
319 yield cls(index=device_id) 1qbrsgmntiuvwexyzcABCjoDfEFGHIJKphklLMdNO
321 #########################################################################
322 # ADDRESSING MODE
324 @property
325 def addressing_mode(self) -> AddressingMode:
326 """
327 Get the :obj:`~AddressingMode` of the device.
329 Addressing modes can be one of:
331 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_HMM`: System allocated
332 memory (``malloc``, ``mmap``) is addressable from the device (GPU), via
333 software-based mirroring of the CPU's page tables, on the GPU.
334 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_ATS`: System allocated
335 memory (``malloc``, ``mmap``) is addressable from the device (GPU), via
336 Address Translation Services. This means that there is (effectively) a
337 single set of page tables, and the CPU and GPU both use them.
338 - :attr:`AddressingMode.DEVICE_ADDRESSING_MODE_NONE`: Neither HMM nor ATS
339 is active.
340 """
341 return AddressingMode(nvml.device_get_addressing_mode(self._handle).value) 1q
343 #########################################################################
344 # MIG (MULTI-INSTANCE GPU) DEVICES
346 @property
347 def mig(self) -> MigInfo:
348 """
349 Get :obj:`~MigInfo` accessor for MIG (Multi-Instance GPU) information.
351 For Ampere™ or newer fully supported devices.
352 """
353 return MigInfo(self) 1nI
355 #########################################################################
356 # AFFINITY
358 @classmethod
359 def get_all_devices_with_cpu_affinity(cls, cpu_index: int) -> Iterable[Device]:
360 """
361 Retrieve the set of GPUs that have a CPU affinity with the given CPU number.
363 Supported on Linux only.
365 Parameters
366 ----------
367 cpu_index: int
368 The CPU index.
370 Returns
371 -------
372 Iterator of :obj:`~Device`
373 An iterator over available devices.
374 """
375 cdef Device device
376 for handle in nvml.system_get_topology_gpu_set(cpu_index): 1Q
377 device = Device.__new__(Device) 1Q
378 device._handle = handle 1Q
379 yield device 1Q
381 def get_memory_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]:
382 """
383 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
384 memory affinity for the device.
386 For Kepler™ or newer fully supported devices.
388 Supported on Linux only.
390 If requested scope is not applicable to the target topology, the API
391 will fall back to reporting the memory affinity for the immediate non-I/O
392 ancestor of the device.
393 """
394 return _unpack_bitmask( 1b
395 nvml.device_get_memory_affinity( 1b
396 self._handle, 1b
397 <unsigned int>ceil(cpu_count() / 64), 1b
398 scope 1b
399 )
400 )
402 def get_cpu_affinity(self, scope: AffinityScope=AffinityScope.NODE) -> list[int]:
403 """
404 Retrieves a list of indices of NUMA nodes or CPU sockets with the ideal
405 CPU affinity for the device.
407 For Kepler™ or newer fully supported devices.
409 Supported on Linux only.
411 If requested scope is not applicable to the target topology, the API
412 will fall back to reporting the memory affinity for the immediate non-I/O
413 ancestor of the device.
414 """
415 return _unpack_bitmask( 1beQ
416 nvml.device_get_cpu_affinity_within_scope( 1beQ
417 self._handle, 1beQ
418 <unsigned int>ceil(cpu_count() / 64), 1beQ
419 scope, 1beQ
420 )
421 )
423 def set_cpu_affinity(self):
424 """
425 Sets the ideal affinity for the calling thread and device.
427 For Kepler™ or newer fully supported devices.
429 Supported on Linux only.
430 """
431 nvml.device_set_cpu_affinity(self._handle)
433 def clear_cpu_affinity(self):
434 """
435 Clear all affinity bindings for the calling thread.
437 For Kepler™ or newer fully supported devices.
439 Supported on Linux only.
440 """
441 nvml.device_clear_cpu_affinity(self._handle)
443 #########################################################################
444 # CLOCK
445 # See external class definitions in _clock.pxi
447 def get_clock(self, clock_type: ClockType) -> ClockInfo:
448 """
449 :obj:`~_device.ClockInfo` object to get information about and manage a specific clock on a device.
450 """
451 return ClockInfo(self._handle, clock_type) 1m
453 @property
454 def is_auto_boosted_clocks_enabled(self) -> tuple[bool, bool]:
455 """
456 Retrieve the current state of auto boosted clocks on a device.
458 For Kepler™ or newer fully supported devices.
460 Auto Boosted clocks are enabled by default on some hardware, allowing
461 the GPU to run at higher clock rates to maximize performance as thermal
462 limits allow.
464 On Pascal™ and newer hardware, Auto Boosted clocks are controlled
465 through application clocks. Use :meth:`set_application_clocks` and
466 :meth:`reset_application_clocks` to control Auto Boost behavior.
468 Returns
469 -------
470 bool
471 The current state of Auto Boosted clocks
472 bool
473 The default Auto Boosted clocks behavior
475 """
476 current, default = nvml.device_get_auto_boosted_clocks_enabled(self._handle) 1r
477 return current == nvml.EnableState.FEATURE_ENABLED, default == nvml.EnableState.FEATURE_ENABLED
479 @property
480 def current_clock_event_reasons(self) -> list[ClocksEventReasons]:
481 """
482 Retrieves the current :obj:`~ClocksEventReasons`.
484 For all fully supported products.
485 """
486 cdef uint64_t[1] reasons
487 reasons[0] = nvml.device_get_current_clocks_event_reasons(self._handle) 1g
488 return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)] 1g
490 @property
491 def supported_clock_event_reasons(self) -> list[ClocksEventReasons]:
492 """
493 Retrieves supported :obj:`~ClocksEventReasons` that can be returned by
494 :meth:`get_current_clock_event_reasons`.
496 For all fully supported products.
498 This method is not supported in virtual machines running virtual GPU (vGPU).
499 """
500 cdef uint64_t[1] reasons
501 reasons[0] = nvml.device_get_supported_clocks_event_reasons(self._handle) 1g
502 return [ClocksEventReasons(1 << reason) for reason in _unpack_bitmask(reasons)] 1g
504 ##########################################################################
505 # COOLER
506 # See external class definitions in _cooler.pxi
508 @property
509 def cooler(self) -> CoolerInfo:
510 """
511 :obj:`~_device.CoolerInfo` object with cooler information for the device.
512 """
513 return CoolerInfo(nvml.device_get_cooler_info(self._handle))
515 ##########################################################################
516 # DEVICE ATTRIBUTES
517 # See external class definitions in _device_attributes.pxi
519 @property
520 def attributes(self) -> DeviceAttributes:
521 """
522 :obj:`~_device.DeviceAttributes` object with various device attributes.
524 For Ampere™ or newer fully supported devices. Only available on Linux
525 systems.
526 """
527 return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) 1u
529 #########################################################################
530 # DISPLAY
532 @property
533 def is_display_connected(self) -> bool:
534 """
535 The display mode for this device.
537 Indicates whether a physical display (e.g. monitor) is currently connected to
538 any of the device's connectors.
539 """
540 return nvml.device_get_display_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED 1o
542 @property
543 def is_display_active(self) -> bool:
544 """
545 The display active status for this device.
547 Indicates whether a display is initialized on the device. For example,
548 whether X Server is attached to this device and has allocated memory for
549 the screen.
551 Display can be active even when no monitor is physically attached.
552 """
553 return nvml.device_get_display_active(self._handle) == nvml.EnableState.FEATURE_ENABLED 1o
555 ##########################################################################
556 # EVENTS
557 # See external class definitions in _event.pxi
559 def register_events(self, events: EventType | int | list[EventType | int]) -> DeviceEvents:
560 """
561 Starts recording events on this device.
563 For Fermi™ or newer fully supported devices. For Linux only.
565 ECC events are available only on ECC-enabled devices (see
566 :meth:`Device.get_total_ecc_errors`). Power capping events are
567 available only on Power Management enabled devices (see
568 :meth:`Device.get_power_management_mode`).
570 This call starts recording of events on specific device. All events
571 that occurred before this call are not recorded. Wait for events using
572 the :meth:`DeviceEvents.wait` method on the result.
574 Examples
575 --------
576 >>> device = Device(index=0)
577 >>> events = device.register_events([
578 ... EventType.EVENT_TYPE_XID_CRITICAL_ERROR,
579 ... ])
580 >>> while event := events.wait(timeout_ms=10000):
581 ... print(f"Event {event.event_type} occurred on device {event.device.uuid}")
583 Parameters
584 ----------
585 events: EventType, int, or list of EventType or int
586 The event type or list of event types to register for this device.
588 Returns
589 -------
590 :obj:`~_device.DeviceEvents`
591 An object representing the registered events. Call
592 :meth:`~_device.DeviceEvents.wait` on this object to wait for events.
594 Raises
595 ------
596 :class:`cuda.core.system.NotSupportedError`
597 None of the requested event types are registered.
598 """
599 return DeviceEvents(self._handle, events) 1l
601 def get_supported_event_types(self) -> list[EventType]:
602 """
603 Get the list of event types supported by this device.
605 For Fermi™ or newer fully supported devices. For Linux only (returns an
606 empty list on Windows).
608 Returns
609 -------
610 list[EventType]
611 The list of supported event types.
612 """
613 cdef uint64_t[1] bitmask
614 bitmask[0] = nvml.device_get_supported_event_types(self._handle) 1l
615 return [EventType(1 << ev) for ev in _unpack_bitmask(bitmask)] 1l
617 ##########################################################################
618 # FAN
619 # See external class definitions in _fan.pxi
621 def get_fan(self, fan: int = 0) -> FanInfo:
622 """
623 :obj:`~_device.FanInfo` object to get information and manage a specific fan on a device.
624 """
625 if fan < 0 or fan >= self.num_fans:
626 raise ValueError(f"Fan index {fan} is out of range [0, {self.num_fans})")
627 return FanInfo(self._handle, fan)
629 @property
630 def num_fans(self) -> int:
631 """
632 The number of fans on the device.
633 """
634 return nvml.device_get_num_fans(self._handle) 1tD
636 ##########################################################################
637 # FIELD VALUES
638 # See external class definitions in _field_values.pxi
640 def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
641 """
642 Get multiple field values from the device.
644 Each value specified can raise its own exception. That exception will
645 be raised when attempting to access the corresponding ``value`` from the
646 returned :obj:`~_device.FieldValues` container.
648 To confirm that there are no exceptions in the entire container, call
649 :meth:`~_device.FieldValues.validate`.
651 Parameters
652 ----------
653 field_ids: list[int | tuple[int, int]]
654 List of field IDs to query.
656 Each item may be either a single value from the :class:`FieldId`
657 enum, or a pair of (:class:`FieldId`, scope ID).
659 Returns
660 -------
661 :obj:`~_device.FieldValues`
662 Container of field values corresponding to the requested field IDs.
663 """
664 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
665 # so avoid that.
666 if len(field_ids) == 0: 1f
667 return FieldValues(nvml.FieldValue(0)) 1f
669 return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) 1f
671 def clear_field_values(self, field_ids: list[int | tuple[int, int]]) -> None:
672 """
673 Clear multiple field values from the device.
675 Parameters
676 ----------
677 field_ids: list[int | tuple[int, int]]
678 List of field IDs to clear.
680 Each item may be either a single value from the :class:`FieldId`
681 enum, or a pair of (:class:`FieldId`, scope ID).
682 """
683 # Passing a field_ids array of length 0 raises an InvalidArgumentError,
684 # so avoid that.
685 if len(field_ids) == 0: 1f
686 return
688 nvml.device_clear_field_values(self._handle, field_ids) 1f
690 ##########################################################################
691 # INFOROM
692 # See external class definitions in _inforom.pxi
694 @property
695 def inforom(self) -> InforomInfo:
696 """
697 :obj:`~_device.InforomInfo` object with InfoROM information.
699 For all products with an InfoROM.
700 """
701 return InforomInfo(self) 1E
703 ##########################################################################
704 # MEMORY
705 # See external class definitions in _memory.pxi
707 @property
708 def bar1_memory_info(self) -> BAR1MemoryInfo:
709 """
710 :obj:`~_device.BAR1MemoryInfo` object with BAR1 memory information.
712 BAR1 is used to map the FB (device memory) so that it can be directly
713 accessed by the CPU or by 3rd party devices (peer-to-peer on the PCIE
714 bus).
715 """
716 return BAR1MemoryInfo(nvml.device_get_bar1_memory_info(self._handle)) 1v
718 @property
719 def memory_info(self) -> MemoryInfo:
720 """
721 :obj:`~_device.MemoryInfo` object with memory information.
722 """
723 return MemoryInfo(nvml.device_get_memory_info_v2(self._handle)) 1y
725 ##########################################################################
726 # NVLINK
727 # See external class definitions in _nvlink.pxi
729 def get_nvlink(self, link: int) -> NvlinkInfo:
730 """
731 Get :obj:`~NvlinkInfo` about this device.
733 For devices with NVLink support.
734 """
735 if link < 0 or link >= NvlinkInfo.max_links: 1p
736 raise ValueError(f"Link index {link} is out of range [0, {NvlinkInfo.max_links})")
737 return NvlinkInfo(self, link) 1p
739 ##########################################################################
740 # PCI INFO
741 # See external class definitions in _pci_info.pxi
743 @property
744 def pci_info(self) -> PciInfo:
745 """
746 :obj:`~_device.PciInfo` object with the PCI attributes of this device.
747 """
748 return PciInfo(nvml.device_get_pci_info_ext(self._handle), self._handle) 1cAdP
750 ##########################################################################
751 # PERFORMANCE
752 # See external class definitions in _performance.pxi
754 @property
755 def performance_state(self) -> Pstates:
756 """
757 The current performance state of the device.
759 For Fermi™ or newer fully supported devices.
761 See :class:`Pstates` for possible performance states.
762 """
763 return Pstates(nvml.device_get_performance_state(self._handle)) 1mk
765 @property
766 def dynamic_pstates_info(self) -> GpuDynamicPstatesInfo:
767 """
768 :obj:`~_device.GpuDynamicPstatesInfo` object with performance monitor samples from the associated subdevice.
769 """
770 return GpuDynamicPstatesInfo(nvml.device_get_dynamic_pstates_info(self._handle)) 1k
772 @property
773 def supported_pstates(self) -> list[Pstates]:
774 """
775 Get all supported Performance States (P-States) for the device.
777 The returned list contains a contiguous list of valid P-States supported by
778 the device.
780 Return
781 ------
782 list[Pstates]
783 A list of supported P-States for the device.
784 """
785 return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)] 1k
787 ##########################################################################
788 # PROCESS
789 # See external class definitions in _process.pxi
791 @property
792 def compute_running_processes(self) -> list[ProcessInfo]:
793 """
794 Get information about processes with a compute context on a device
796 For Fermi™ or newer fully supported devices.
798 This function returns information only about compute running processes
799 (e.g. CUDA application which have active context). Any graphics
800 applications (e.g. using OpenGL, DirectX) won't be listed by this
801 function.
803 Keep in mind that information returned by this call is dynamic and the
804 number of elements might change in time.
806 In MIG mode, if device handle is provided, the API returns aggregate
807 information, only if the caller has appropriate privileges. Per-instance
808 information can be queried by using specific MIG device handles.
809 Querying per-instance information using MIG device handles is not
810 supported if the device is in vGPU Host virtualization mode.
811 """
812 return [ProcessInfo(self, proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)] 1n
814 ##########################################################################
815 # REPAIR STATUS
816 # See external class definitions in _repair_status.pxi
818 @property
819 def repair_status(self) -> RepairStatus:
820 """
821 :obj:`~_device.RepairStatus` object with TPC/Channel repair status.
823 For Ampere™ or newer fully supported devices.
824 """
825 return RepairStatus(self._handle) 1L
827 ##########################################################################
828 # TEMPERATURE
829 # See external class definitions in _temperature.pxi
831 @property
832 def temperature(self) -> Temperature:
833 """
834 :obj:`~_device.Temperature` object with temperature information for the device.
835 """
836 return Temperature(self._handle) 1M
838 #######################################################################
839 # TOPOLOGY
841 def get_topology_nearest_gpus(self, level: GpuTopologyLevel) -> Iterable[Device]:
842 """
843 Retrieve the GPUs that are nearest to this device at a specific interconnectivity level.
845 Supported on Linux only.
847 Parameters
848 ----------
849 level: :class:`GpuTopologyLevel`
850 The topology level.
852 Returns
853 -------
854 Iterable of :class:`Device`
855 The nearest devices at the given topology level.
856 """
857 cdef Device device
858 for handle in nvml.device_get_topology_nearest_gpus(self._handle, level): 1G
859 device = Device.__new__(Device)
860 device._handle = handle
861 yield device
863 #######################################################################
864 # UTILIZATION
866 @property
867 def utilization(self) -> Utilization:
868 """
869 Retrieves the current :obj:`~Utilization` rates for the device's major
870 subsystems.
872 For Fermi™ or newer fully supported devices.
874 Note: During driver initialization when ECC is enabled one can see high
875 GPU and Memory Utilization readings. This is caused by ECC Memory
876 Scrubbing mechanism that is performed during driver initialization.
878 Note: On MIG-enabled GPUs, querying device utilization rates is not
879 currently supported.
881 Returns
882 -------
883 Utilization
884 An object containing the current utilization rates for the device.
885 """
886 return Utilization(nvml.device_get_utilization_rates(self._handle)) 1N
889def get_topology_common_ancestor(device1: Device, device2: Device) -> GpuTopologyLevel:
890 """
891 Retrieve the common ancestor for two devices.
893 For Linux only.
895 Parameters
896 ----------
897 device1: :class:`Device`
898 The first device.
899 device2: :class:`Device`
900 The second device.
902 Returns
903 -------
904 :class:`GpuTopologyLevel`
905 The common ancestor level of the two devices.
906 """
907 return GpuTopologyLevel(
908 nvml.device_get_topology_common_ancestor(
909 device1._handle,
910 device2._handle,
911 )
912 )
915def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex) -> GpuP2PStatus:
916 """
917 Retrieve the P2P status between two devices.
919 Parameters
920 ----------
921 device1: :class:`Device`
922 The first device.
923 device2: :class:`Device`
924 The second device.
925 index: :class:`GpuP2PCapsIndex`
926 The P2P capability index being looked for between ``device1`` and ``device2``.
928 Returns
929 -------
930 :class:`GpuP2PStatus`
931 The P2P status between the two devices.
932 """
933 return GpuP2PStatus(
934 nvml.device_get_p2p_status(
935 device1._handle,
936 device2._handle,
937 index,
938 )
939 )
942__all__ = [
943 "AddressingMode",
944 "AffinityScope",
945 "BrandType",
946 "ClockId",
947 "ClocksEventReasons",
948 "ClockType",
949 "CoolerControl",
950 "CoolerTarget",
951 "Device",
952 "DeviceArch",
953 "EventType",
954 "FanControlPolicy",
955 "FieldId",
956 "get_p2p_status",
957 "get_topology_common_ancestor",
958 "GpuP2PCapsIndex",
959 "GpuP2PStatus",
960 "GpuTopologyLevel",
961 "InforomObject",
962 "NvlinkVersion",
963 "PcieUtilCounter",
964 "Pstates",
965 "TemperatureSensors",
966 "TemperatureThresholds",
967 "ThermalController",
968 "ThermalTarget",
969 "Utilization",
970]