Coverage for cuda / core / system / _temperature.pxi: 14.63%
82 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
6_TEMPERATURE_THRESHOLD_MAPPING = {
7 TemperatureThresholds.SHUTDOWN: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_SHUTDOWN,
8 TemperatureThresholds.SLOWDOWN: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_SLOWDOWN,
9 TemperatureThresholds.MEM_MAX: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_MEM_MAX,
10 TemperatureThresholds.GPU_MAX: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_GPU_MAX,
11 TemperatureThresholds.ACOUSTIC_MIN: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_ACOUSTIC_MIN,
12 TemperatureThresholds.ACOUSTIC_CURR: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_ACOUSTIC_CURR,
13 TemperatureThresholds.ACOUSTIC_MAX: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_ACOUSTIC_MAX,
14 TemperatureThresholds.GPS_CURR: nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_GPS_CURR,
15}
18_THERMAL_CONTROLLER_MAPPING = {
19 nvml.ThermalController.GPU_INTERNAL: ThermalController.GPU_INTERNAL,
20 nvml.ThermalController.ADM1032: ThermalController.ADM1032,
21 nvml.ThermalController.ADT7461: ThermalController.ADT7461,
22 nvml.ThermalController.MAX6649: ThermalController.MAX6649,
23 nvml.ThermalController.MAX1617: ThermalController.MAX1617,
24 nvml.ThermalController.LM99: ThermalController.LM99,
25 nvml.ThermalController.LM89: ThermalController.LM89,
26 nvml.ThermalController.LM64: ThermalController.LM64,
27 nvml.ThermalController.G781: ThermalController.G781,
28 nvml.ThermalController.ADT7473: ThermalController.ADT7473,
29 nvml.ThermalController.SBMAX6649: ThermalController.SBMAX6649,
30 nvml.ThermalController.VBIOSEVT: ThermalController.VBIOSEVT,
31 nvml.ThermalController.OS: ThermalController.OS,
32 nvml.ThermalController.NVSYSCON_CANOAS: ThermalController.NVSYSCON_CANOAS,
33 nvml.ThermalController.NVSYSCON_E551: ThermalController.NVSYSCON_E551,
34 nvml.ThermalController.MAX6649R: ThermalController.MAX6649R,
35 nvml.ThermalController.ADT7473S: ThermalController.ADT7473S,
36 nvml.ThermalController.UNKNOWN: ThermalController.UNKNOWN,
37}
40_THERMAL_TARGET_MAPPING = {
41 nvml.ThermalTarget.NONE: ThermalTarget.NONE,
42 nvml.ThermalTarget.GPU: ThermalTarget.GPU,
43 nvml.ThermalTarget.MEMORY: ThermalTarget.MEMORY,
44 nvml.ThermalTarget.POWER_SUPPLY: ThermalTarget.POWER_SUPPLY,
45 nvml.ThermalTarget.BOARD: ThermalTarget.BOARD,
46 nvml.ThermalTarget.VCD_BOARD: ThermalTarget.VCD_BOARD,
47 nvml.ThermalTarget.VCD_INLET: ThermalTarget.VCD_INLET,
48 nvml.ThermalTarget.VCD_OUTLET: ThermalTarget.VCD_OUTLET,
49 nvml.ThermalTarget.ALL: ThermalTarget.ALL,
50}
53_THERMAL_TARGET_INV_MAPPING = {v: k for k, v in _THERMAL_TARGET_MAPPING.items()}
56# In cuda.bindings.nvml, this is an anonymous struct inside nvmlThermalSettings_t.
59ctypedef struct _ThermalSensor:
60 int controller
61 int defaultMinTemp
62 int defaultMaxTemp
63 int currentTemp
64 int target
67cdef class ThermalSensor:
68 cdef:
69 _ThermalSensor *_ptr
70 object _owner
72 def __init__(self, ptr: int, owner: object):
73 # ptr points to a part of the numpy buffer held by `_owner`, so we need
74 # to maintain a reference to `_owner` to keep it alive.
75 self._ptr = <_ThermalSensor *><intptr_t>ptr
76 self._owner = owner
78 @property
79 def controller(self) -> ThermalController:
80 return _THERMAL_CONTROLLER_MAPPING.get(self._ptr[0].controller, ThermalController.UNKNOWN)
82 @property
83 def default_min_temp(self) -> int:
84 return self._ptr[0].defaultMinTemp
86 @property
87 def default_max_temp(self) -> int:
88 return self._ptr[0].defaultMaxTemp
90 @property
91 def current_temp(self) -> int:
92 return self._ptr[0].currentTemp
94 @property
95 def target(self) -> ThermalTarget:
96 return _THERMAL_TARGET_MAPPING.get(self._ptr[0].target, ThermalTarget.NONE)
99cdef class ThermalSettings:
100 cdef object _thermal_settings
102 def __init__(self, thermal_settings: nvml.ThermalSettings):
103 self._thermal_settings = thermal_settings
105 def __len__(self):
106 # MAX_THERMAL_SENSORS_PER_GPU is 3
107 return min(self._thermal_settings.count, 3)
109 def __getitem__(self, idx: int) -> nvml.ThermalSensor:
110 if idx < 0 or idx >= len(self):
111 raise IndexError("Thermal sensor index out of range")
112 return ThermalSensor(
113 self._thermal_settings.sensor.ptr + idx * sizeof(_ThermalSensor),
114 self._thermal_settings
115 )
118cdef class Temperature:
119 cdef intptr_t _handle
121 def __init__(self, handle: int):
122 self._handle = handle 1a
124 def get_sensor(self) -> int:
125 """
126 Get the temperature reading from a specific sensor on the device, in
127 degrees Celsius.
129 The only sensor currently supported is the GPU temperature sensor.
131 Returns
132 -------
133 int
134 The temperature in degrees Celsius.
135 """
136 # NOTE: nvml.device_get_temperature_v takes a sensor type from the
137 # TemperatorSensors enum, but there is only one value in that enum. For
138 # future compatibility if there are other values for that enum, this is
139 # a method, not a property
140 return nvml.device_get_temperature_v(self._handle, nvml.TemperatureSensors.TEMPERATURE_GPU) 1a
142 def get_threshold(self, threshold_type: TemperatureThresholds | str) -> int:
143 """
144 Retrieves the temperature threshold for this GPU with the specified
145 threshold type, in degrees Celsius.
147 For Kepler™ or newer fully supported devices.
149 See :class:`TemperatureThresholds` for possible threshold types.
151 Note: This API is no longer the preferred interface for retrieving the
152 following temperature thresholds on Ada and later architectures:
153 ``NVML_TEMPERATURE_THRESHOLD_SHUTDOWN``,
154 ``NVML_TEMPERATURE_THRESHOLD_SLOWDOWN``,
155 ``NVML_TEMPERATURE_THRESHOLD_MEM_MAX`` and
156 ``NVML_TEMPERATURE_THRESHOLD_GPU_MAX``.
158 Support for reading these temperature thresholds for Ada and later
159 architectures would be removed from this API in future releases. Please
160 use :meth:`get_field_values` with ``NVML_FI_DEV_TEMPERATURE_*`` fields
161 to retrieve temperature thresholds on these architectures.
162 """
163 try: 1a
164 threshold_type_enum = _TEMPERATURE_THRESHOLD_MAPPING[threshold_type] 1a
165 except KeyError:
166 raise ValueError(
167 f"Invalid temperature threshold type: {threshold_type}. "
168 f"Must be one of {list(TemperatureThresholds.__members__.values())}"
169 ) from None
170 if threshold_type_enum in ( 1a
171 nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_SHUTDOWN, 1a
172 nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_SLOWDOWN, 1a
173 nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_MEM_MAX, 1a
174 nvml.TemperatureThresholds.TEMPERATURE_THRESHOLD_GPU_MAX 1a
175 ):
176 device_arch = nvml.DeviceArch(nvml.device_get_architecture(self._handle)) 1a
177 if device_arch >= nvml.DeviceArch.ADA: 1a
178 warnings.warn(
179 f"{threshold_type} is no longer recommended for Ada and later architectures. "
180 "Use get_field_values with NVML_FI_DEV_TEMPERATURE_* fields to retrieve this "
181 "threshold on these architectures.",
182 DeprecationWarning,
183 stacklevel=2
184 )
185 return nvml.device_get_temperature_threshold(self._handle, threshold_type_enum) 1a
187 @property
188 def margin(self) -> int:
189 """
190 The thermal margin temperature (distance to nearest slowdown threshold) for the device.
191 """
192 return nvml.device_get_margin_temperature(self._handle)
194 def get_thermal_settings(self, sensor_index: ThermalTarget | str) -> ThermalSettings:
195 """
196 Used to execute a list of thermal system instructions.
198 Parameters
199 ----------
200 sensor_index: ThermalTarget
201 The index of the thermal sensor.
203 Returns
204 -------
205 :obj:`~_device.ThermalSettings`
206 The thermal settings for the specified sensor.
207 """
208 # TODO: The above docstring is from the NVML header, but it doesn't seem to make sense.
209 try:
210 sensor_index_enum = _THERMAL_TARGET_INV_MAPPING[sensor_index]
211 except KeyError:
212 raise ValueError(
213 f"Invalid thermal sensor index: {sensor_index}. "
214 f"Must be one of {list(ThermalTarget.__members__.values())}"
215 ) from None
217 return ThermalSettings(nvml.device_get_thermal_settings(self._handle, sensor_index_enum))