Coverage for cuda / pathfinder / _dynamic_libs / load_nvidia_dynamic_lib.py: 98.82%

85 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# SPDX-License-Identifier: Apache-2.0 

3 

4from __future__ import annotations 

5 

6import functools 

7import json 

8import struct 

9import sys 

10from typing import TYPE_CHECKING 

11 

12from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json 

13from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS 

14from cuda.pathfinder._dynamic_libs.load_dl_common import ( 

15 DynamicLibNotAvailableError, 

16 DynamicLibNotFoundError, 

17 DynamicLibUnknownError, 

18 LoadedDL, 

19 load_dependencies, 

20) 

21from cuda.pathfinder._dynamic_libs.platform_loader import LOADER 

22from cuda.pathfinder._dynamic_libs.search_steps import ( 

23 EARLY_FIND_STEPS, 

24 LATE_FIND_STEPS, 

25 SearchContext, 

26 derive_ctk_root, 

27 find_via_ctk_root, 

28 run_find_steps, 

29) 

30from cuda.pathfinder._utils.platform_aware import IS_WINDOWS 

31from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process 

32 

33if TYPE_CHECKING: 

34 from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor 

35 

36# All libnames recognized by load_nvidia_dynamic_lib, across all categories 

37# (CTK, third-party, driver). 

38_ALL_KNOWN_LIBNAMES: frozenset[str] = frozenset(LIB_DESCRIPTORS) 

39_ALL_SUPPORTED_LIBNAMES: frozenset[str] = frozenset( 

40 name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames) 

41) 

42_PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux" 

43 

44# Driver libraries: shipped with the NVIDIA display driver, always on the 

45# system linker path. These skip all CTK search steps (site-packages, 

46# conda, CUDA_HOME, canary) and go straight to system search. 

47_DRIVER_ONLY_LIBNAMES = frozenset(name for name, desc in LIB_DESCRIPTORS.items() if desc.packaged_with == "driver") 

48 

49 

50def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL: 

51 """Load an NVIDIA driver library (system-search only). 

52 

53 Driver libs (libcuda, libnvidia-ml) are part of the display driver, not 

54 the CUDA Toolkit. They are always on the system linker path, so the 

55 full CTK search cascade (site-packages, conda, CUDA_HOME, canary) is 

56 unnecessary. 

57 """ 

58 loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, False) 1auvwC

59 if loaded is not None: 1auvwC

60 return loaded 1C

61 loaded = LOADER.load_with_system_search(desc) 1auvw

62 if loaded is not None: 1auvw

63 return loaded 1auv

64 raise DynamicLibNotFoundError( 1w

65 f'"{desc.name}" is an NVIDIA driver library and can only be found via' 

66 f" system search. Ensure the NVIDIA display driver is installed." 

67 ) 

68 

69 

70@functools.cache 

71def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None: 

72 """Resolve a canary library's absolute path in a spawned child process.""" 

73 result = run_in_spawned_child_process( 1GAqmrnijs

74 probe_canary_abs_path_and_print_json, 

75 args=(libname,), 

76 timeout=10.0, 

77 rethrow=True, 

78 ) 

79 

80 # Use the final non-empty line in case earlier output lines are emitted. 

81 lines = [line for line in result.stdout.splitlines() if line.strip()] 1Aqmrnijs

82 if not lines: 1Aqmrnijs

83 raise RuntimeError(f"Canary probe child process produced no stdout payload for {libname!r}") 1A

84 try: 1qmrnijs

85 payload = json.loads(lines[-1]) 1qmrnijs

86 except json.JSONDecodeError: 1q

87 raise RuntimeError( 1q

88 f"Canary probe child process emitted invalid JSON payload for {libname!r}: {lines[-1]!r}" 

89 ) from None 

90 if isinstance(payload, str): 1mrnijs

91 return payload 1rijs

92 if payload is None: 1mnij

93 return None 1nij

94 raise RuntimeError(f"Canary probe child process emitted unexpected payload for {libname!r}: {payload!r}") 1m

95 

96 

97def _try_ctk_root_canary(ctx: SearchContext) -> str | None: 

98 """Try CTK-root canary fallback for descriptor-configured libraries.""" 

99 for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames: 1obtpxyc

100 canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname) 1obtpxyc

101 if canary_abs_path is None: 1obtpxyc

102 continue 1xyc

103 ctk_root = derive_ctk_root(canary_abs_path) 1obtp

104 if ctk_root is None: 1obtp

105 continue 1t

106 find = find_via_ctk_root(ctx, ctk_root) 1obp

107 if find is not None: 1obp

108 return str(find.abs_path) 1ob

109 return None 1tpxyc

110 

111 

112def _load_lib_no_cache(libname: str) -> LoadedDL: 

113 desc = LIB_DESCRIPTORS[libname] 1abgfeDEhkcld

114 

115 if libname in _DRIVER_ONLY_LIBNAMES: 1abgfeDEhkcld

116 return _load_driver_lib_no_cache(desc) 1aDE

117 

118 ctx = SearchContext(desc) 1abgfehkcld

119 

120 # Phase 1: Try to find the library file on disk (pip wheels, conda). 

121 find = run_find_steps(ctx, EARLY_FIND_STEPS) 1abgfehkcld

122 

123 # Phase 2: Cross-cutting — already-loaded check and dependency loading. 

124 # The already-loaded check on Windows uses the "have we found a path?" 

125 # flag to decide whether to apply AddDllDirectory side-effects. 

126 loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, find is not None) 1abgfehkcld

127 load_dependencies(desc, load_nvidia_dynamic_lib) 1abgfehkcld

128 if loaded is not None: 1abgfehkcld

129 return loaded 

130 

131 # Phase 3: Load from found path, or fall back to system search + late find. 

132 if find is not None: 1abgfehkcld

133 return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via) 1kl

134 

135 loaded = LOADER.load_with_system_search(desc) 1abgfehcd

136 if loaded is not None: 1abgfehcd

137 return loaded 1aeh

138 

139 find = run_find_steps(ctx, LATE_FIND_STEPS) 1abgfcd

140 if find is not None: 1abgfcd

141 return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via) 1agd

142 

143 if desc.ctk_root_canary_anchor_libnames: 1bfc

144 canary_abs_path = _try_ctk_root_canary(ctx) 1bc

145 if canary_abs_path is not None: 1bc

146 return LOADER.load_with_abs_path(desc, canary_abs_path, "system-ctk-root") 1b

147 

148 ctx.raise_not_found() 1fc

149 

150 

151@functools.cache 

152def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: 

153 """Load an NVIDIA dynamic library by name. 

154 

155 Args: 

156 libname (str): The short name of the library to load (e.g., ``"cudart"``, 

157 ``"nvvm"``, etc.). 

158 

159 Returns: 

160 LoadedDL: Object containing the OS library handle and absolute path. 

161 

162 **Important:** 

163 

164 **Never close the returned handle.** Do **not** call ``dlclose`` (Linux) or 

165 ``FreeLibrary`` (Windows) on the ``LoadedDL._handle_uint``. 

166 

167 **Why:** the return value is cached (``functools.cache``) and shared across the 

168 process. Closing the handle can unload the module while other code still uses 

169 it, leading to crashes or subtle failures. 

170 

171 This applies to Linux and Windows. For context, see issue #1011: 

172 https://github.com/NVIDIA/cuda-python/issues/1011 

173 

174 Raises: 

175 DynamicLibUnknownError: If ``libname`` is not a recognized library name. 

176 DynamicLibNotAvailableError: If ``libname`` is recognized but not 

177 supported on this platform. 

178 DynamicLibNotFoundError: If the library cannot be found or loaded. 

179 RuntimeError: If Python is not 64-bit. 

180 

181 Search order: 

182 0. **Already loaded in the current process** 

183 

184 - If a matching library is already loaded by some other component, 

185 return its absolute path and handle and skip the rest of the search. 

186 

187 1. **NVIDIA Python wheels** 

188 

189 - Scan installed distributions (``site-packages``) to find libraries 

190 shipped in NVIDIA wheels. 

191 

192 2. **Conda environment** 

193 

194 - Conda installations are discovered via ``CONDA_PREFIX``, which is 

195 defined automatically in activated conda environments (see 

196 https://docs.conda.io/projects/conda-build/en/stable/user-guide/environment-variables.html). 

197 

198 3. **OS default mechanisms** 

199 

200 - Fall back to the native loader: 

201 

202 - Linux: ``dlopen()`` 

203 

204 - Windows: ``LoadLibraryW()`` 

205 

206 - CUDA Toolkit (CTK) system installs with system config updates are often 

207 discovered via: 

208 

209 - Linux: ``/etc/ld.so.conf.d/*cuda*.conf`` 

210 

211 - Windows: ``C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\vX.Y\\bin`` 

212 on the system ``PATH``. 

213 

214 4. **Environment variables** 

215 

216 - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order). 

217 

218 5. **CTK root canary probe (discoverable libs only)** 

219 

220 - For selected libraries whose shared object doesn't reside on the 

221 standard linker path (currently ``nvvm``), attempt to derive CTK 

222 root by system-loading a well-known CTK canary library in a 

223 subprocess and then searching relative to that root. 

224 

225 **Driver libraries** (``"cuda"``, ``"nvml"``): 

226 

227 These are part of the NVIDIA display driver (not the CUDA Toolkit) and 

228 are always on the system linker path. For these libraries the search 

229 is simplified to: 

230 

231 0. Already loaded in the current process 

232 1. OS default mechanisms (``dlopen`` / ``LoadLibraryW``) 

233 

234 The CTK-specific steps (site-packages, conda, ``CUDA_HOME``, canary 

235 probe) are skipped entirely. 

236 

237 Notes: 

238 The search is performed **per library**. There is currently no mechanism to 

239 guarantee that multiple libraries are all resolved from the same location. 

240 

241 """ 

242 pointer_size_bits = struct.calcsize("P") * 8 1aezFBd

243 if pointer_size_bits != 64: 1aezFBd

244 raise RuntimeError( 1F

245 f"cuda.pathfinder.load_nvidia_dynamic_lib() requires 64-bit Python." 

246 f" Currently running: {pointer_size_bits}-bit Python" 

247 f" {sys.version_info.major}.{sys.version_info.minor}" 

248 ) 

249 if libname not in _ALL_KNOWN_LIBNAMES: 1aezBd

250 raise DynamicLibUnknownError(f"Unknown library name: {libname!r}. Known names: {sorted(_ALL_KNOWN_LIBNAMES)}") 1B

251 if libname not in _ALL_SUPPORTED_LIBNAMES: 1aezd

252 raise DynamicLibNotAvailableError( 1z

253 f"Library name {libname!r} is known but not available on {_PLATFORM_NAME}. " 

254 f"Supported names on {_PLATFORM_NAME}: {sorted(_ALL_SUPPORTED_LIBNAMES)}" 

255 ) 

256 return _load_lib_no_cache(libname) 1aed