Coverage for cuda / core / system / _nvml_context.pyx: 60.00%
35 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5import threading
7from cuda.bindings import nvml
9from . import exceptions
12_NVML_STATE = _NVMLState.UNINITIALIZED
15_NVML_OWNER_PID = 0
18_lock = threading.Lock()
21# For testing
22def _get_nvml_state():
23 return _NVML_STATE
26cpdef _initialize():
27 """
28 Initializes Nvidia Management Library (NVML), ensuring it only happens once per process.
29 """
30 global _NVML_STATE, _NVML_OWNER_PID
32 with _lock: 1bc
33 # Double-check to make sure nothing has changed since acquiring the lock
34 if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND or ( 1abc
35 _NVML_STATE == _NVMLState.INITIALIZED and getpid() == _NVML_OWNER_PID 1bc
36 ):
37 return
38 elif ( 1bc
39 _NVML_STATE == _NVMLState.INITIALIZED and getpid() != _NVML_OWNER_PID 1bc
40 ) or _NVML_STATE == _NVMLState.UNINITIALIZED: 1bc
41 try: 1bc
42 nvml.init_v2() 1bc
43 except (
44 exceptions.LibraryNotFoundError,
45 exceptions.DriverNotLoadedError,
46 exceptions.UnknownError,
47 ):
48 _NVML_STATE = _NVMLState.DISABLED_LIBRARY_NOT_FOUND
49 return
51 # initialization was successful
52 _NVML_STATE = _NVMLState.INITIALIZED 1bc
53 _NVML_OWNER_PID = getpid() 1bc
54 else:
55 raise RuntimeError(f"Unhandled initialisation state ({_NVML_STATE=}, {_NVML_OWNER_PID=})")
58cpdef validate():
59 """
60 Validate NVML state.
62 Validate that NVML is initialized, functional and that the system has at
63 least one GPU available.
65 Raises
66 ------
67 nvml.UninitializedError
68 If NVML hasn't been initialized.
69 nvml.LibraryNotFoundError
70 If the NVML library could not be found.
71 nvml.GpuNotFoundError
72 If no GPUs are available.
73 """
74 if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND:
75 raise exceptions.LibraryNotFoundError()
76 elif not is_initialized():
77 raise exceptions.UninitializedError()
78 elif nvml.device_get_count_v2() == 0:
79 raise exceptions.GpuNotFoundError()