Coverage for cuda / core / system / _system_events.pyx: 64.29%
28 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
6from libc.stdint cimport intptr_t
8from cuda.bindings import nvml
10from ._nvml_context cimport initialize
12from . import _device
15SystemEventType = nvml.SystemEventType
18cdef class SystemEvent:
19 """
20 Data about a collection of system events.
21 """
22 def __init__(self, event_data: nvml.SystemEventData_v1):
23 assert len(event_data) == 1
24 self._event_data = event_data
26 @property
27 def event_type(self) -> SystemEventType:
28 """
29 The type of event that was triggered.
30 """
31 return SystemEventType(self._event_data.event_type)
33 @property
34 def gpu_id(self) -> int:
35 """
36 The GPU ID in PCI ID format.
37 """
38 return self._event_data.gpu_id
40 @property
41 def device(self) -> _device.Device:
42 """
43 The device associated with this event.
44 """
45 return _device.Device(pci_bus_id=self.gpu_id)
48cdef class SystemEvents:
49 """
50 Data about a collection of system events.
51 """
52 def __init__(self, event_data: nvml.SystemEventData_v1):
53 self._event_data = event_data
55 def __len__(self):
56 return len(self._event_data)
58 def __getitem__(self, idx: int) -> SystemEvent:
59 return SystemEvent(self._event_data[idx])
62cdef class RegisteredSystemEvents:
63 """
64 Represents a set of events that can be waited on for a specific device.
65 """
66 cdef intptr_t _event_set
68 def __init__(self, events: SystemEventType | int | list[SystemEventType | int]):
69 cdef unsigned long long event_bitmask
70 if isinstance(events, (int, SystemEventType)): 1a
71 event_bitmask = <unsigned long long>int(events)
72 elif isinstance(events, list): 1a
73 event_bitmask = 0 1a
74 for ev in events: 1a
75 event_bitmask |= <unsigned long long>int(ev) 1a
76 else:
77 raise TypeError("events must be an SystemEventType, int, or list of SystemEventType or int")
79 initialize() 1a
81 self._event_set = nvml.system_event_set_create() 1a
82 # If this raises, the event needs to be freed and this is handled by
83 # this class's __dealloc__ method.
84 nvml.system_register_events(event_bitmask, self._event_set) 1a
86 def __dealloc__(self):
87 nvml.system_event_set_free(self._event_set) 1a
89 def wait(self, timeout_ms: int = 0, buffer_size: int = 1) -> SystemEvents:
90 """
91 Wait for events in the system event set.
93 For Fermi™ or newer fully supported devices.
95 If some events are ready to be delivered at the time of the call,
96 function returns immediately. If there are no events ready to be
97 delivered, function sleeps till event arrives but not longer than
98 specified timeout. If timeout passes, a
99 :class:`cuda.core.system.TimeoutError` is raised. This function in
100 certain conditions can return before specified timeout passes (e.g. when
101 interrupt arrives)
103 Parameters
104 ----------
105 timeout_ms: int
106 The timeout in milliseconds. A value of 0 means to wait indefinitely.
107 buffer_size: int
108 The maximum number of events to retrieve. Must be at least 1.
110 Raises
111 ------
112 :class:`cuda.core.system.TimeoutError`
113 If the timeout expires before an event is received.
114 :class:`cuda.core.system.GpuIsLostError`
115 If the GPU has fallen off the bus or is otherwise inaccessible.
116 """
117 return SystemEvents(nvml.system_event_set_wait(self._event_set, timeout_ms, buffer_size)) 1a
120def register_events(events: SystemEventType | int | list[SystemEventType | int]) -> RegisteredSystemEvents:
121 """
122 Starts recording of events on test system.
124 For Linux only.
126 All events that occurred before this call are not recorded. Wait for events
127 using the :meth:`RegisteredSystemEvents.wait` method on the result.
129 Examples
130 --------
131 >>> from cuda.core import system
132 >>> events = system.register_events([
133 ... SystemEventType.SYSTEM_EVENT_TYPE_GPU_DRIVER_UNBIND,
134 ... ])
135 >>> while event := events.wait(timeout_ms=10000):
136 ... print(f"Event {event.event_type} occurred.")
138 Parameters
139 ----------
140 events: SystemEventType, int, or list of SystemEventType or int
141 The event type or list of event types to register for this device.
143 Returns
144 -------
145 :class:`RegisteredSystemEvents`
146 An object representing the registered events. Call
147 :meth:`RegisteredSystemEvents.wait` on this object to wait for events.
149 Raises
150 ------
151 :class:`cuda.core.system.NotSupportedError`
152 None of the requested event types are registered.
153 """
154 return RegisteredSystemEvents(events) 1a
157__all__ = [
158 "register_events",
159 "RegisteredSystemEvents",
160 "SystemEvent",
161 "SystemEvents",
162 "SystemEventType",
163]