Coverage for cuda / core / system / _nvml_context.pyx: 60.00%

35 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5import threading 

6  

7from cuda.bindings import nvml 

8  

9from . import exceptions 

10  

11  

12_NVML_STATE = _NVMLState.UNINITIALIZED 

13  

14  

15_NVML_OWNER_PID = 0 

16  

17  

18_lock = threading.Lock() 

19  

20  

21# For testing 

22def _get_nvml_state(): 

23 return _NVML_STATE 

24  

25  

26cpdef _initialize(): 

27 """ 

28 Initializes Nvidia Management Library (NVML), ensuring it only happens once per process. 

29 """ 

30 global _NVML_STATE, _NVML_OWNER_PID 

31  

32 with _lock: 1bc

33 # Double-check to make sure nothing has changed since acquiring the lock 

34 if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND or ( 1abc

35 _NVML_STATE == _NVMLState.INITIALIZED and getpid() == _NVML_OWNER_PID 1bc

36 ): 

37 return 

38 elif ( 1bc

39 _NVML_STATE == _NVMLState.INITIALIZED and getpid() != _NVML_OWNER_PID 1bc

40 ) or _NVML_STATE == _NVMLState.UNINITIALIZED: 1bc

41 try: 1bc

42 nvml.init_v2() 1bc

43 except ( 

44 exceptions.LibraryNotFoundError, 

45 exceptions.DriverNotLoadedError, 

46 exceptions.UnknownError, 

47 ): 

48 _NVML_STATE = _NVMLState.DISABLED_LIBRARY_NOT_FOUND 

49 return 

50  

51 # initialization was successful 

52 _NVML_STATE = _NVMLState.INITIALIZED 1bc

53 _NVML_OWNER_PID = getpid() 1bc

54 else: 

55 raise RuntimeError(f"Unhandled initialisation state ({_NVML_STATE=}, {_NVML_OWNER_PID=})") 

56  

57  

58cpdef validate(): 

59 """ 

60 Validate NVML state. 

61  

62 Validate that NVML is initialized, functional and that the system has at 

63 least one GPU available. 

64  

65 Raises 

66 ------ 

67 nvml.UninitializedError 

68 If NVML hasn't been initialized. 

69 nvml.LibraryNotFoundError 

70 If the NVML library could not be found. 

71 nvml.GpuNotFoundError 

72 If no GPUs are available. 

73 """ 

74 if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND: 

75 raise exceptions.LibraryNotFoundError() 

76 elif not is_initialized(): 

77 raise exceptions.UninitializedError() 

78 elif nvml.device_get_count_v2() == 0: 

79 raise exceptions.GpuNotFoundError()